change JobEngine stop code and add a extra check for thread removed on watchdog timeout checks
parent
3029080d9b
commit
baf8e762a6
|
@ -40,6 +40,8 @@ namespace OpenSim.Framework.Monitoring
|
||||||
|
|
||||||
public int LogLevel { get; set; }
|
public int LogLevel { get; set; }
|
||||||
|
|
||||||
|
private object JobLock = new object();
|
||||||
|
|
||||||
public string Name { get; private set; }
|
public string Name { get; private set; }
|
||||||
|
|
||||||
public string LoggingName { get; private set; }
|
public string LoggingName { get; private set; }
|
||||||
|
@ -95,7 +97,7 @@ namespace OpenSim.Framework.Monitoring
|
||||||
|
|
||||||
public void Start()
|
public void Start()
|
||||||
{
|
{
|
||||||
lock (this)
|
lock (JobLock)
|
||||||
{
|
{
|
||||||
if (IsRunning)
|
if (IsRunning)
|
||||||
return;
|
return;
|
||||||
|
@ -119,43 +121,22 @@ namespace OpenSim.Framework.Monitoring
|
||||||
|
|
||||||
public void Stop()
|
public void Stop()
|
||||||
{
|
{
|
||||||
lock (this)
|
lock (JobLock)
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
if (!IsRunning)
|
if (!IsRunning)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
m_log.DebugFormat("[JobEngine] Stopping {0}", Name);
|
||||||
|
|
||||||
IsRunning = false;
|
IsRunning = false;
|
||||||
|
|
||||||
int requestsLeft = m_jobQueue.Count;
|
m_finishedProcessingAfterStop.Reset();
|
||||||
|
if(m_jobQueue.Count <= 0)
|
||||||
if (requestsLeft <= 0)
|
|
||||||
{
|
|
||||||
m_cancelSource.Cancel();
|
m_cancelSource.Cancel();
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
m_log.InfoFormat("[{0}]: Waiting to write {1} events after stop.", LoggingName, requestsLeft);
|
|
||||||
|
|
||||||
while (requestsLeft > 0)
|
m_finishedProcessingAfterStop.WaitOne(RequestProcessTimeoutOnStop);
|
||||||
{
|
|
||||||
if (!m_finishedProcessingAfterStop.WaitOne(RequestProcessTimeoutOnStop))
|
|
||||||
{
|
|
||||||
// After timeout no events have been written
|
|
||||||
if (requestsLeft == m_jobQueue.Count)
|
|
||||||
{
|
|
||||||
m_log.WarnFormat(
|
|
||||||
"[{0}]: No requests processed after {1} ms wait. Discarding remaining {2} requests",
|
|
||||||
LoggingName, RequestProcessTimeoutOnStop, requestsLeft);
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
requestsLeft = m_jobQueue.Count;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
finally
|
finally
|
||||||
{
|
{
|
||||||
|
@ -244,48 +225,51 @@ namespace OpenSim.Framework.Monitoring
|
||||||
|
|
||||||
private void ProcessRequests()
|
private void ProcessRequests()
|
||||||
{
|
{
|
||||||
try
|
while(IsRunning || m_jobQueue.Count > 0)
|
||||||
{
|
{
|
||||||
while (IsRunning || m_jobQueue.Count > 0)
|
try
|
||||||
{
|
{
|
||||||
try
|
CurrentJob = m_jobQueue.Take(m_cancelSource.Token);
|
||||||
{
|
|
||||||
CurrentJob = m_jobQueue.Take(m_cancelSource.Token);
|
|
||||||
}
|
|
||||||
catch (ObjectDisposedException e)
|
|
||||||
{
|
|
||||||
// If we see this whilst not running then it may be due to a race where this thread checks
|
|
||||||
// IsRunning after the stopping thread sets it to false and disposes of the cancellation source.
|
|
||||||
if (IsRunning)
|
|
||||||
throw e;
|
|
||||||
else
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (LogLevel >= 1)
|
|
||||||
m_log.DebugFormat("[{0}]: Processing job {1}", LoggingName, CurrentJob.Name);
|
|
||||||
|
|
||||||
try
|
|
||||||
{
|
|
||||||
CurrentJob.Action();
|
|
||||||
}
|
|
||||||
catch (Exception e)
|
|
||||||
{
|
|
||||||
m_log.Error(
|
|
||||||
string.Format(
|
|
||||||
"[{0}]: Job {1} failed, continuing. Exception ", LoggingName, CurrentJob.Name), e);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (LogLevel >= 1)
|
|
||||||
m_log.DebugFormat("[{0}]: Processed job {1}", LoggingName, CurrentJob.Name);
|
|
||||||
|
|
||||||
CurrentJob = null;
|
|
||||||
}
|
}
|
||||||
}
|
catch(ObjectDisposedException e)
|
||||||
catch (OperationCanceledException)
|
{
|
||||||
{
|
// If we see this whilst not running then it may be due to a race where this thread checks
|
||||||
|
// IsRunning after the stopping thread sets it to false and disposes of the cancellation source.
|
||||||
|
if(IsRunning)
|
||||||
|
throw e;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
m_log.DebugFormat("[JobEngine] {0} stopping ignoring {1} jobs in queue",
|
||||||
|
Name,m_jobQueue.Count);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch(OperationCanceledException)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(LogLevel >= 1)
|
||||||
|
m_log.DebugFormat("[{0}]: Processing job {1}",LoggingName,CurrentJob.Name);
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
CurrentJob.Action();
|
||||||
|
}
|
||||||
|
catch(Exception e)
|
||||||
|
{
|
||||||
|
m_log.Error(
|
||||||
|
string.Format(
|
||||||
|
"[{0}]: Job {1} failed, continuing. Exception ",LoggingName,CurrentJob.Name),e);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(LogLevel >= 1)
|
||||||
|
m_log.DebugFormat("[{0}]: Processed job {1}",LoggingName,CurrentJob.Name);
|
||||||
|
|
||||||
|
CurrentJob = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Watchdog.RemoveThread(false);
|
||||||
m_finishedProcessingAfterStop.Set();
|
m_finishedProcessingAfterStop.Set();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -333,39 +333,45 @@ namespace OpenSim.Framework.Monitoring
|
||||||
{
|
{
|
||||||
List<ThreadWatchdogInfo> callbackInfos = null;
|
List<ThreadWatchdogInfo> callbackInfos = null;
|
||||||
|
|
||||||
|
// get a copy since we may change m_threads
|
||||||
|
List<ThreadWatchdogInfo> threadsInfo;
|
||||||
lock (m_threads)
|
lock (m_threads)
|
||||||
{
|
threadsInfo = m_threads.Values.ToList();
|
||||||
// get a copy since we may change m_threads
|
|
||||||
List<ThreadWatchdogInfo> threadsInfo = m_threads.Values.ToList();
|
|
||||||
foreach (ThreadWatchdogInfo threadInfo in threadsInfo)
|
|
||||||
{
|
|
||||||
if (threadInfo.Thread.ThreadState == ThreadState.Stopped)
|
|
||||||
{
|
|
||||||
RemoveThread(threadInfo.Thread.ManagedThreadId);
|
|
||||||
|
|
||||||
if (callbackInfos == null)
|
foreach (ThreadWatchdogInfo threadInfo in threadsInfo)
|
||||||
|
{
|
||||||
|
lock (m_threads)
|
||||||
|
{
|
||||||
|
if(!m_threads.ContainsValue(threadInfo))
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(threadInfo.Thread.ThreadState == ThreadState.Stopped)
|
||||||
|
{
|
||||||
|
RemoveThread(threadInfo.Thread.ManagedThreadId);
|
||||||
|
|
||||||
|
if(callbackInfos == null)
|
||||||
|
callbackInfos = new List<ThreadWatchdogInfo>();
|
||||||
|
|
||||||
|
callbackInfos.Add(threadInfo);
|
||||||
|
}
|
||||||
|
else if(!threadInfo.IsTimedOut && now - threadInfo.LastTick >= threadInfo.Timeout)
|
||||||
|
{
|
||||||
|
threadInfo.IsTimedOut = true;
|
||||||
|
|
||||||
|
if(threadInfo.AlarmIfTimeout)
|
||||||
|
{
|
||||||
|
if(callbackInfos == null)
|
||||||
callbackInfos = new List<ThreadWatchdogInfo>();
|
callbackInfos = new List<ThreadWatchdogInfo>();
|
||||||
|
|
||||||
callbackInfos.Add(threadInfo);
|
// Send a copy of the watchdog info to prevent race conditions where the watchdog
|
||||||
}
|
// thread updates the monitoring info after an alarm has been sent out.
|
||||||
else if (!threadInfo.IsTimedOut && now - threadInfo.LastTick >= threadInfo.Timeout)
|
callbackInfos.Add(new ThreadWatchdogInfo(threadInfo));
|
||||||
{
|
|
||||||
threadInfo.IsTimedOut = true;
|
|
||||||
|
|
||||||
if (threadInfo.AlarmIfTimeout)
|
|
||||||
{
|
|
||||||
if (callbackInfos == null)
|
|
||||||
callbackInfos = new List<ThreadWatchdogInfo>();
|
|
||||||
|
|
||||||
// Send a copy of the watchdog info to prevent race conditions where the watchdog
|
|
||||||
// thread updates the monitoring info after an alarm has been sent out.
|
|
||||||
callbackInfos.Add(new ThreadWatchdogInfo(threadInfo));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (callbackInfos != null)
|
if(callbackInfos != null)
|
||||||
foreach (ThreadWatchdogInfo callbackInfo in callbackInfos)
|
foreach (ThreadWatchdogInfo callbackInfo in callbackInfos)
|
||||||
callback(callbackInfo);
|
callback(callbackInfo);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue