Source code for pas.jobmgr
"""
Remote JobMgr management utilities.
The following functions provide some wrappers around remote shell commands to
easily start, stop, restart or otherwise interact with a POP-C++ job manager.
"""
import time
from pas import shell
from pas.conf import role
from pas.conf import settings
[docs]def start():
"""
Starts a JobMgr instance on one or more (depending on the current context)
remote nodes.
This function is intended to be used to start a single node. To start all
nodes of a system, use the startall function, which introduces some delays
to allow a correct registration of the slaves by the master.
"""
# Cleanup previsouly created log files
shell.remote('rm -f {0}'.format(" ".join(settings.LOG_FILES)), sudo=True)
# pty required to be set to true, otherwise the remote invocation hangs
# and never returns
shell.remote('SXXpopc start', pty=True, sudo=True)
[docs]def stop():
"""
Stops a currently running JobMgr instance on one or more (depending on the
current context) remote nodes.
This function is intended to be used to stop a single node. To stop all
nodes of a system, use the startall function, which introduces some delays
to allow a correct registration of the slaves by the master.
"""
shell.remote('SXXpopc stop', sudo=True)
[docs]def restart():
"""
Stops and restarts a currently running JobMgr instance on one or more
(depending on the current context) remote nodes in a unique command.
This function is intended to be used to restart a single node. To restart
all nodes of a system, use the startall function, which introduces some
delays to allow a correct registration of the slaves by the master.
"""
shell.remote('SXXpopc stop ; SXXpopc start', pty=True, sudo=True)
[docs]def kill():
"""
Kills ALL running job managers (and the relative search nodes) on the
hosts provided by the context or (by default) on all known hosts.
"""
with shell.ignore_warnings():
shell.remote('pkill "jobmgr|popc_*"', sudo=True)
[docs]def startall():
"""
Starts all nodes of the system grouped by roles with the necessary delays
to allow a proper registration to the parent JobMgr.
The delay between the invocations can be set in the settings.
"""
# Start all masters before, allowing for a proper setup before registering
# a new slave
with shell.workon(role('master')):
start()
# Wait some (configurable) time. One or two seconds should be enough here
time.sleep(settings.STARTUP_DELAY)
# And now start the slaves
with shell.workon(role('slaves')):
start()
# Wait again some time for possible subsequent programs execution
time.sleep(settings.STARTUP_DELAY)
[docs]def stopall():
"""
Stops all nodes of the system grouped by roles with the necessary delays
to allow a proper registration to the parent JobMgr.
The delay between the invocations can be set in the settings.
Note that in this case the delays are not as important as in the start
function on could probably safely be omitted. The behavior is preserved to
grant compatibility with future versions of the JobMgr which possibly
wants to execute some cleanup code before terminating.
In the meanwhile it is possible to set the delay to 0 in the settings.
"""
with shell.workon(role('slaves')):
stop()
time.sleep(settings.SHUTDOWN_DELAY)
with shell.workon(role('master')):
stop()
time.sleep(settings.SHUTDOWN_DELAY)
[docs]def restartall():
"""
Restarts all nodes in the system using the stopall and startall functions.
Due to the introduction of the delays, the stop and start calls will not
happen in the same command as for the restart function.
"""
stopall()
startall()
[docs]def killall():
"""
Alias for the kill function, as no special treatment is needed here.
"""
kill()