diff --git a/OpenSim/Framework/Util.cs b/OpenSim/Framework/Util.cs
index 6be172246c..2de596e8f5 100644
--- a/OpenSim/Framework/Util.cs
+++ b/OpenSim/Framework/Util.cs
@@ -51,6 +51,7 @@ using Nwc.XmlRpc;
using OpenMetaverse;
using OpenMetaverse.StructuredData;
using Amib.Threading;
+using System.Collections.Concurrent;
namespace OpenSim.Framework
{
@@ -148,6 +149,9 @@ namespace OpenSim.Framework
///
private static SmartThreadPool m_ThreadPool;
+ // Watchdog timer that aborts threads that have timed-out
+ private static Timer m_threadPoolWatchdog;
+
// Unix-epoch starts at January 1st 1970, 00:00:00 UTC. And all our times in the server are (or at least should be) in UTC.
public static readonly DateTime UnixEpoch =
DateTime.ParseExact("1970-01-01 00:00:00 +0", "yyyy-MM-dd hh:mm:ss z", DateTimeFormatInfo.InvariantInfo).ToUniversalTime();
@@ -1883,6 +1887,7 @@ namespace OpenSim.Framework
startInfo.MinWorkerThreads = minThreads;
m_ThreadPool = new SmartThreadPool(startInfo);
+ m_threadPoolWatchdog = new Timer(ThreadPoolWatchdog, null, 0, 1000);
}
public static int FireAndForgetCount()
@@ -1907,16 +1912,117 @@ namespace OpenSim.Framework
}
+ ///
+ /// Additional information about threads in the main thread pool. Used to time how long the
+ /// thread has been running, and abort it if it has timed-out.
+ ///
+ private class ThreadInfo
+ {
+ public long ThreadFuncNum { get; set; }
+ public string StackTrace { get; set; }
+ private string context;
+
+ public IWorkItemResult WorkItem { get; set; }
+ public Thread Thread { get; set; }
+ public bool Running { get; set; }
+ public bool Aborted { get; set; }
+ private int started;
+
+ public ThreadInfo(long threadFuncNum, string context)
+ {
+ ThreadFuncNum = threadFuncNum;
+ this.context = context;
+ Thread = null;
+ Running = false;
+ Aborted = false;
+ }
+
+ public void Started()
+ {
+ Thread = Thread.CurrentThread;
+ started = EnvironmentTickCount();
+ Running = true;
+ }
+
+ public void Ended()
+ {
+ Running = false;
+ }
+
+ public int Elapsed()
+ {
+ return EnvironmentTickCountSubtract(started);
+ }
+
+ public void Abort()
+ {
+ Aborted = true;
+ WorkItem.Cancel(true);
+ }
+
+ ///
+ /// Returns the thread's stack trace.
+ ///
+ ///
+ /// May return one of two stack traces. First, tries to get the thread's active stack
+ /// trace. But this can fail, so as a fallback this method will return the stack
+ /// trace that was active when the task was queued.
+ ///
+ public string GetStackTrace()
+ {
+ string ret = (context == null) ? "" : ("(" + context + ") ");
+
+ StackTrace activeStackTrace = Util.GetStackTrace(Thread);
+ if (activeStackTrace != null)
+ ret += activeStackTrace.ToString();
+ else if (StackTrace != null)
+ ret += "(Stack trace when queued) " + StackTrace;
+ // else, no stack trace available
+
+ return ret;
+ }
+ }
+
+
private static long nextThreadFuncNum = 0;
private static long numQueuedThreadFuncs = 0;
private static long numRunningThreadFuncs = 0;
private static Int32 threadFuncOverloadMode = 0;
+ // Maps (ThreadFunc number -> Thread)
+ private static ConcurrentDictionary activeThreads = new ConcurrentDictionary();
+
+ private static readonly int THREAD_TIMEOUT = 60 * 1000;
+
+ ///
+ /// Finds threads in the main thread pool that have timed-out, and aborts them.
+ ///
+ private static void ThreadPoolWatchdog(object state)
+ {
+ foreach (KeyValuePair entry in activeThreads)
+ {
+ ThreadInfo t = entry.Value;
+ if (t.Running && !t.Aborted && (t.Elapsed() >= THREAD_TIMEOUT))
+ {
+ m_log.WarnFormat("Timeout in threadfunc {0} ({1}) {2}", t.ThreadFuncNum, t.Thread.Name, t.GetStackTrace());
+ t.Abort();
+
+ ThreadInfo dummy;
+ activeThreads.TryRemove(entry.Key, out dummy);
+
+ // It's possible that the thread won't abort. To make sure the thread pool isn't
+ // depleted, increase the pool size.
+ m_ThreadPool.MaxThreads++;
+ }
+ }
+ }
+
+
public static void FireAndForget(System.Threading.WaitCallback callback, object obj)
{
FireAndForget(callback, obj, null);
}
-
+
public static void FireAndForget(System.Threading.WaitCallback callback, object obj, string context)
{
WaitCallback realCallback;
@@ -1924,6 +2030,7 @@ namespace OpenSim.Framework
bool loggingEnabled = (DebugFlags & DebugFlagsEnum.LogThreadPool) != 0;
long threadFuncNum = Interlocked.Increment(ref nextThreadFuncNum);
+ ThreadInfo threadInfo = new ThreadInfo(threadFuncNum, context);
if (FireAndForgetMethod == FireAndForgetMethod.RegressionTest)
{
@@ -1939,6 +2046,8 @@ namespace OpenSim.Framework
{
long numQueued1 = Interlocked.Decrement(ref numQueuedThreadFuncs);
long numRunning1 = Interlocked.Increment(ref numRunningThreadFuncs);
+ threadInfo.Started();
+ activeThreads[threadFuncNum] = threadInfo;
try
{
@@ -1949,15 +2058,22 @@ namespace OpenSim.Framework
callback(o);
}
+ catch (ThreadAbortException e)
+ {
+ m_log.Error(string.Format("Aborted threadfunc {0} ", threadFuncNum), e);
+ }
catch (Exception e)
{
- m_log.Error("[UTIL]: FireAndForget thread terminated with error ", e);
+ m_log.Error(string.Format("[UTIL]: Util STP threadfunc {0} terminated with error ", threadFuncNum), e);
}
finally
{
Interlocked.Decrement(ref numRunningThreadFuncs);
+ threadInfo.Ended();
+ ThreadInfo dummy;
+ activeThreads.TryRemove(threadFuncNum, out dummy);
if (loggingEnabled || (threadFuncOverloadMode == 1))
- m_log.Debug("Exit threadfunc " + threadFuncNum);
+ m_log.DebugFormat("Exit threadfunc {0} ({1})", threadFuncNum, FormatDuration(threadInfo.Elapsed()));
}
};
}
@@ -1979,10 +2095,13 @@ namespace OpenSim.Framework
if (loggingEnabled || (threadFuncOverloadMode == 1))
{
+ string full, partial;
+ GetFireAndForgetStackTrace(out full, out partial);
m_log.DebugFormat("Queue threadfunc {0} (Queued {1}, Running {2}) {3}{4}",
threadFuncNum, numQueued, numRunningThreadFuncs,
(context == null) ? "" : ("(" + context + ") "),
- GetFireAndForgetStackTrace(loggingEnabled));
+ loggingEnabled ? full : partial);
+ threadInfo.StackTrace = full;
}
switch (FireAndForgetMethod)
@@ -2004,7 +2123,7 @@ namespace OpenSim.Framework
case FireAndForgetMethod.SmartThreadPool:
if (m_ThreadPool == null)
InitThreadPool(2, 15);
- m_ThreadPool.QueueWorkItem((cb, o) => cb(o), realCallback, obj);
+ threadInfo.WorkItem = m_ThreadPool.QueueWorkItem((cb, o) => cb(o), realCallback, obj);
break;
case FireAndForgetMethod.Thread:
Thread thread = new Thread(delegate(object o) { realCallback(o); });
@@ -2017,6 +2136,8 @@ namespace OpenSim.Framework
catch (Exception)
{
Interlocked.Decrement(ref numQueuedThreadFuncs);
+ ThreadInfo dummy;
+ activeThreads.TryRemove(threadFuncNum, out dummy);
throw;
}
}
@@ -2024,8 +2145,9 @@ namespace OpenSim.Framework
///
/// Returns a stack trace for a thread added using FireAndForget().
///
- /// True: return full stack trace; False: return only the first frame
- private static string GetFireAndForgetStackTrace(bool full)
+ /// Will contain the full stack trace
+ /// Will contain only the first frame of the stack trace
+ private static void GetFireAndForgetStackTrace(out string full, out string partial)
{
string src = Environment.StackTrace;
string[] lines = src.Split(new string[] { Environment.NewLine }, StringSplitOptions.None);
@@ -2034,6 +2156,7 @@ namespace OpenSim.Framework
bool started = false;
bool first = true;
+ partial = "";
for (int i = 0; i < lines.Length; i++)
{
@@ -2051,21 +2174,102 @@ namespace OpenSim.Framework
{
line = line.TrimStart();
first = false;
+ partial = line;
}
- bool last = (i == lines.Length - 1) || !full;
+ bool last = (i == lines.Length - 1);
if (last)
dest.Append(line);
else
dest.AppendLine(line);
-
- if (!full)
- break;
}
- return dest.ToString();
+ full = dest.ToString();
}
+#pragma warning disable 0618
+ ///
+ /// Return the stack trace of a different thread.
+ ///
+ ///
+ /// This is complicated because the thread needs to be paused in order to get its stack
+ /// trace. And pausing another thread can cause a deadlock. This method attempts to
+ /// avoid deadlock by using a short timeout (200ms), after which it gives up and
+ /// returns 'null' instead of the stack trace.
+ ///
+ /// Take from: http://stackoverflow.com/a/14935378
+ ///
+ /// WARNING: this doesn't work in Mono. See https://bugzilla.novell.com/show_bug.cgi?id=571691
+ ///
+ ///
+ /// The stack trace, or null if failed to get it
+ private static StackTrace GetStackTrace(Thread targetThread)
+ {
+ if (IsPlatformMono)
+ {
+ // This doesn't work in Mono
+ return null;
+ }
+
+ ManualResetEventSlim fallbackThreadReady = new ManualResetEventSlim();
+ ManualResetEventSlim exitedSafely = new ManualResetEventSlim();
+
+ try
+ {
+ new Thread(delegate()
+ {
+ fallbackThreadReady.Set();
+ while (!exitedSafely.Wait(200))
+ {
+ try
+ {
+ targetThread.Resume();
+ }
+ catch (Exception)
+ {
+ // Whatever happens, do never stop to resume the main-thread regularly until the main-thread has exited safely.
+ }
+ }
+ }).Start();
+
+ fallbackThreadReady.Wait();
+ // From here, you have about 200ms to get the stack-trace
+
+ targetThread.Suspend();
+
+ StackTrace trace = null;
+ try
+ {
+ trace = new StackTrace(targetThread, true);
+ }
+ catch (ThreadStateException)
+ {
+ //failed to get stack trace, since the fallback-thread resumed the thread
+ //possible reasons:
+ //1.) This thread was just too slow
+ //2.) A deadlock ocurred
+ //Automatic retry seems too risky here, so just return null.
+ }
+
+ try
+ {
+ targetThread.Resume();
+ }
+ catch (ThreadStateException)
+ {
+ // Thread is running again already
+ }
+
+ return trace;
+ }
+ finally
+ {
+ // Signal the fallack-thread to stop
+ exitedSafely.Set();
+ }
+ }
+#pragma warning restore 0618
+
///
/// Get information about the current state of the smart thread pool.
///
@@ -2149,6 +2353,60 @@ namespace OpenSim.Framework
return tcA - tcB;
}
+ ///
+ /// Formats a duration (given in milliseconds).
+ ///
+ public static string FormatDuration(int ms)
+ {
+ TimeSpan span = new TimeSpan(ms * TimeSpan.TicksPerMillisecond);
+
+ string str = "";
+ string suffix = null;
+
+ int hours = (int)span.TotalHours;
+ if (hours > 0)
+ {
+ str += hours.ToString(str.Length == 0 ? "0" : "00");
+ suffix = "hours";
+ }
+
+ if ((hours > 0) || (span.Minutes > 0))
+ {
+ if (str.Length > 0)
+ str += ":";
+ str += span.Minutes.ToString(str.Length == 0 ? "0" : "00");
+ if (suffix == null)
+ suffix = "min";
+ }
+
+ if ((hours > 0) || (span.Minutes > 0) || (span.Seconds > 0))
+ {
+ if (str.Length > 0)
+ str += ":";
+ str += span.Seconds.ToString(str.Length == 0 ? "0" : "00");
+ if (suffix == null)
+ suffix = "sec";
+ }
+
+ if (suffix == null)
+ suffix = "ms";
+
+ if (span.TotalMinutes < 1)
+ {
+ int ms1 = span.Milliseconds;
+ if (str.Length > 0)
+ {
+ ms1 /= 100;
+ str += ".";
+ }
+ str += ms1.ToString("0");
+ }
+
+ str += " " + suffix;
+
+ return str;
+ }
+
///
/// Prints the call stack at any given point. Useful for debugging.
///