diff --git a/README.txt b/README.txt index 3ef6005..88a7290 100644 --- a/README.txt +++ b/README.txt @@ -68,8 +68,15 @@ action if/when the application dies. With no configuration from you, NSSM will try to restart itself if it notices that the application died but you didn't send it a stop signal. NSSM will -keep trying, pausing 30 seconds between each attempt, until the service is -successfully started or you send it a stop signal. +keep trying, pausing between each attempt, until the service is successfully +started or you send it a stop signal. + +NSSM will pause an increasingly longer time between subsequent restart attempts +if the service fails to start in a timely manner, up to a maximum of 60 seconds. +This is so it does not consume an excessive amount of CPU time trying to start +a failed application over and over again. If you identify the cause of the +failure and don't want to wait you can use the Windows service console to +send a continue signal to NSSM and it will retry within a few seconds. NSSM will look in the registry under HKLM\SYSTEM\CurrentControlSet\Services\\Parameters\AppExit for @@ -161,6 +168,7 @@ Thanks to Joel Reingold for spotting a command line truncation bug. Thanks to Arve Knudsen for spotting that child processes of the monitored application could be left running on service shutdown, and that a missing registry value for AppDirectory confused NSSM. +Thanks to Peter Wagemans and Laszlo Kereszt for suggesting throttling restarts. Licence ------- diff --git a/messages.mc b/messages.mc index c92daa3..413eab3 100644 --- a/messages.mc +++ b/messages.mc @@ -259,3 +259,18 @@ Failed to enumerate running threads when terminating service %1: %2 . +MessageId = +1 +SymbolicName = NSSM_EVENT_THROTTLED +Severity = Warning +Language = English +Service %1 ran for less than %2 milliseconds. +Restart will be delayed by %3 milliseconds. +. + +MessageId = +1 +SymbolicName = NSSM_EVENT_RESET_THROTTLE +Severity = Informational +Language = English +Request to resume service %1. Throttling of restart attempts will be reset. +. + diff --git a/nssm.h b/nssm.h index c29ec81..af8380e 100644 --- a/nssm.h +++ b/nssm.h @@ -32,6 +32,12 @@ int str_equiv(const char *, const char *); #define VALUE_LENGTH 16383 #define SERVICE_NAME_LENGTH KEY_LENGTH - 55 +/* + Throttle the restart of the service if it stops before this many + milliseconds have elapsed since startup. +*/ +#define NSSM_RESET_THROTTLE_RESTART 1500 + /* How many milliseconds to wait for the application to die after posting to its windows' message queues. @@ -43,4 +49,7 @@ int str_equiv(const char *, const char *); */ #define NSSM_KILL_THREADS_GRACE_PERIOD 1500 +/* Margin of error for service status wait hints in milliseconds. */ +#define NSSM_WAITHINT_MARGIN 2000 + #endif diff --git a/service.cpp b/service.cpp index 8e56cf7..1010666 100644 --- a/service.cpp +++ b/service.cpp @@ -10,10 +10,20 @@ char exe[EXE_LENGTH]; char flags[CMD_LENGTH]; char dir[MAX_PATH]; bool stopping; +CRITICAL_SECTION throttle_section; +CONDITION_VARIABLE throttle_condition; static enum { NSSM_EXIT_RESTART, NSSM_EXIT_IGNORE, NSSM_EXIT_REALLY, NSSM_EXIT_UNCLEAN } exit_actions; static const char *exit_action_strings[] = { "Restart", "Ignore", "Exit", "Suicide", 0 }; +static unsigned long throttle; + +static inline int throttle_milliseconds() { + /* pow() operates on doubles. */ + int ret = 1; for (unsigned long i = 1; i < throttle; i++) ret *= 2; + return ret * 1000; +} + /* Connect to the service manager */ SC_HANDLE open_service_manager() { SC_HANDLE ret = OpenSCManager(0, SERVICES_ACTIVE_DATABASE, SC_MANAGER_ALL_ACCESS); @@ -148,11 +158,11 @@ void WINAPI service_main(unsigned long argc, char **argv) { /* Initialise status */ ZeroMemory(&service_status, sizeof(service_status)); service_status.dwServiceType = SERVICE_WIN32_OWN_PROCESS | SERVICE_INTERACTIVE_PROCESS; - service_status.dwControlsAccepted = SERVICE_ACCEPT_SHUTDOWN | SERVICE_ACCEPT_STOP; + service_status.dwControlsAccepted = SERVICE_ACCEPT_SHUTDOWN | SERVICE_ACCEPT_STOP | SERVICE_ACCEPT_PAUSE_CONTINUE; service_status.dwWin32ExitCode = NO_ERROR; service_status.dwServiceSpecificExitCode = 0; service_status.dwCheckPoint = 0; - service_status.dwWaitHint = 1000; + service_status.dwWaitHint = NSSM_WAITHINT_MARGIN; /* Signal we AREN'T running the server */ process_handle = 0; @@ -177,6 +187,7 @@ void WINAPI service_main(unsigned long argc, char **argv) { } service_status.dwCurrentState = SERVICE_START_PENDING; + service_status.dwWaitHint = NSSM_RESET_THROTTLE_RESTART + NSSM_WAITHINT_MARGIN; SetServiceStatus(service_handle, &service_status); /* Try to create the exit action parameters; we don't care if it fails */ @@ -184,6 +195,9 @@ void WINAPI service_main(unsigned long argc, char **argv) { set_service_recovery(service_name); + /* Used for signalling a resume if the service pauses when throttled. */ + InitializeCriticalSection(&throttle_section); + monitor_service(); } @@ -230,6 +244,22 @@ unsigned long WINAPI service_control_handler(unsigned long control, unsigned lon case SERVICE_CONTROL_STOP: stop_service(0, true, true); return NO_ERROR; + + case SERVICE_CONTROL_CONTINUE: + throttle = 0; + WakeConditionVariable(&throttle_condition); + service_status.dwCurrentState = SERVICE_CONTINUE_PENDING; + service_status.dwWaitHint = throttle_milliseconds() + NSSM_WAITHINT_MARGIN; + log_event(EVENTLOG_INFORMATION_TYPE, NSSM_EVENT_RESET_THROTTLE, service_name, 0); + SetServiceStatus(service_handle, &service_status); + return NO_ERROR; + + case SERVICE_CONTROL_PAUSE: + /* + We don't accept pause messages but it isn't possible to register + only for continue messages so we have to handle this case. + */ + return ERROR_CALL_NOT_IMPLEMENTED; } /* Unknown control */ @@ -257,6 +287,9 @@ int start_service() { log_event(EVENTLOG_ERROR_TYPE, NSSM_EVENT_OUT_OF_MEMORY, "command line", "start_service", 0); return stop_service(2, true, true); } + + throttle_restart(); + if (! CreateProcess(0, cmd, 0, 0, false, 0, 0, dir, &si, &pi)) { log_event(EVENTLOG_ERROR_TYPE, NSSM_EVENT_CREATEPROCESS_FAILED, service_name, exe, GetLastError(), 0); return stop_service(3, true, true); @@ -268,6 +301,9 @@ int start_service() { service_status.dwCurrentState = SERVICE_RUNNING; SetServiceStatus(service_handle, &service_status); + /* Wait for a clean startup. */ + if (WaitForSingleObject(process_handle, NSSM_RESET_THROTTLE_RESTART) == WAIT_TIMEOUT) throttle = 0; + return 0; } @@ -281,6 +317,7 @@ int stop_service(unsigned long exitcode, bool graceful, bool default_action) { /* Signal we are stopping */ if (graceful) { service_status.dwCurrentState = SERVICE_STOP_PENDING; + service_status.dwWaitHint = NSSM_KILL_WINDOW_GRACE_PERIOD + NSSM_KILL_THREADS_GRACE_PERIOD + NSSM_WAITHINT_MARGIN; SetServiceStatus(service_handle, &service_status); } @@ -383,3 +420,26 @@ void CALLBACK end_service(void *arg, unsigned char why) { break; } } + +void throttle_restart() { + /* This can't be a restart if the service is already running. */ + if (! throttle++) return; + + int ms = throttle_milliseconds(); + + if (throttle > 7) throttle = 8; + + char threshold[8], milliseconds[8]; + _snprintf(threshold, sizeof(threshold), "%d", NSSM_RESET_THROTTLE_RESTART); + _snprintf(milliseconds, sizeof(milliseconds), "%d", ms); + log_event(EVENTLOG_WARNING_TYPE, NSSM_EVENT_THROTTLED, service_name, threshold, milliseconds, 0); + + EnterCriticalSection(&throttle_section); + + service_status.dwCurrentState = SERVICE_PAUSED; + SetServiceStatus(service_handle, &service_status); + + SleepConditionVariableCS(&throttle_condition, &throttle_section, ms); + + LeaveCriticalSection(&throttle_section); +} diff --git a/service.h b/service.h index fb52c16..6914661 100644 --- a/service.h +++ b/service.h @@ -16,5 +16,6 @@ int monitor_service(); int start_service(); int stop_service(unsigned long, bool, bool); void CALLBACK end_service(void *, unsigned char); +void throttle_restart(); #endif