Allow the thread watchdog to accept an alarm method that is invoked if the timeout is breached.

This alarm can then invoke this to log extra information.
This is used in LLUDPServer to show which client was being processed when incoming and outgoing udp watchdog alarms are triggered.
0.7.4.1
Justin Clark-Casey (justincc) 2012-06-07 02:44:13 +01:00
parent 514dd85199
commit 98b46d48fe
4 changed files with 96 additions and 25 deletions

View File

@ -66,6 +66,7 @@ namespace OpenSim.Framework.Servers.HttpServer
ThreadPriority.Normal, ThreadPriority.Normal,
false, false,
true, true,
null,
int.MaxValue); int.MaxValue);
} }
@ -75,6 +76,7 @@ namespace OpenSim.Framework.Servers.HttpServer
ThreadPriority.Normal, ThreadPriority.Normal,
false, false,
true, true,
null,
1000 * 60 * 10); 1000 * 60 * 10);
} }

View File

@ -42,7 +42,7 @@ namespace OpenSim.Framework
const double WATCHDOG_INTERVAL_MS = 2500.0d; const double WATCHDOG_INTERVAL_MS = 2500.0d;
/// <summary>Maximum timeout in milliseconds before a thread is considered dead</summary> /// <summary>Maximum timeout in milliseconds before a thread is considered dead</summary>
const int WATCHDOG_TIMEOUT_MS = 5000; public const int WATCHDOG_TIMEOUT_MS = 5000;
[System.Diagnostics.DebuggerDisplay("{Thread.Name}")] [System.Diagnostics.DebuggerDisplay("{Thread.Name}")]
public class ThreadWatchdogInfo public class ThreadWatchdogInfo
@ -58,7 +58,7 @@ namespace OpenSim.Framework
public int FirstTick { get; private set; } public int FirstTick { get; private set; }
/// <summary> /// <summary>
/// First time this heartbeat update was invoked /// Last time this heartbeat update was invoked
/// </summary> /// </summary>
public int LastTick { get; set; } public int LastTick { get; set; }
@ -77,6 +77,11 @@ namespace OpenSim.Framework
/// </summary> /// </summary>
public bool AlarmIfTimeout { get; set; } public bool AlarmIfTimeout { get; set; }
/// <summary>
/// Method execute if alarm goes off. If null then no alarm method is fired.
/// </summary>
public Func<string> AlarmMethod { get; set; }
public ThreadWatchdogInfo(Thread thread, int timeout) public ThreadWatchdogInfo(Thread thread, int timeout)
{ {
Thread = thread; Thread = thread;
@ -87,16 +92,10 @@ namespace OpenSim.Framework
} }
/// <summary> /// <summary>
/// This event is called whenever a tracked thread is stopped or /// This event is called whenever a tracked thread is
/// has not called UpdateThread() in time /// stopped or has not called UpdateThread() in time<
/// </summary> /// /summary>
/// <param name="thread">The thread that has been identified as dead</param> public static event Action<ThreadWatchdogInfo> OnWatchdogTimeout;
/// <param name="lastTick">The last time this thread called UpdateThread()</param>
public delegate void WatchdogTimeout(Thread thread, int lastTick);
/// <summary>This event is called whenever a tracked thread is
/// stopped or has not called UpdateThread() in time</summary>
public static event WatchdogTimeout OnWatchdogTimeout;
private static readonly ILog m_log = LogManager.GetLogger(System.Reflection.MethodBase.GetCurrentMethod().DeclaringType); private static readonly ILog m_log = LogManager.GetLogger(System.Reflection.MethodBase.GetCurrentMethod().DeclaringType);
private static Dictionary<int, ThreadWatchdogInfo> m_threads; private static Dictionary<int, ThreadWatchdogInfo> m_threads;
@ -123,7 +122,7 @@ namespace OpenSim.Framework
public static Thread StartThread( public static Thread StartThread(
ThreadStart start, string name, ThreadPriority priority, bool isBackground, bool alarmIfTimeout) ThreadStart start, string name, ThreadPriority priority, bool isBackground, bool alarmIfTimeout)
{ {
return StartThread(start, name, priority, isBackground, alarmIfTimeout, WATCHDOG_TIMEOUT_MS); return StartThread(start, name, priority, isBackground, alarmIfTimeout, null, WATCHDOG_TIMEOUT_MS);
} }
/// <summary> /// <summary>
@ -135,17 +134,24 @@ namespace OpenSim.Framework
/// <param name="isBackground">True to run this thread as a background /// <param name="isBackground">True to run this thread as a background
/// thread, otherwise false</param> /// thread, otherwise false</param>
/// <param name="alarmIfTimeout">Trigger an alarm function is we have timed out</param> /// <param name="alarmIfTimeout">Trigger an alarm function is we have timed out</param>
/// <param name="alarmMethod">
/// Alarm method to call if alarmIfTimeout is true and there is a timeout.
/// Normally, this will just return some useful debugging information.
/// </param>
/// <param name="timeout">Number of milliseconds to wait until we issue a warning about timeout.</param> /// <param name="timeout">Number of milliseconds to wait until we issue a warning about timeout.</param>
/// <returns>The newly created Thread object</returns> /// <returns>The newly created Thread object</returns>
public static Thread StartThread( public static Thread StartThread(
ThreadStart start, string name, ThreadPriority priority, bool isBackground, bool alarmIfTimeout, int timeout) ThreadStart start, string name, ThreadPriority priority, bool isBackground,
bool alarmIfTimeout, Func<string> alarmMethod, int timeout)
{ {
Thread thread = new Thread(start); Thread thread = new Thread(start);
thread.Name = name; thread.Name = name;
thread.Priority = priority; thread.Priority = priority;
thread.IsBackground = isBackground; thread.IsBackground = isBackground;
ThreadWatchdogInfo twi = new ThreadWatchdogInfo(thread, timeout) { AlarmIfTimeout = alarmIfTimeout }; ThreadWatchdogInfo twi
= new ThreadWatchdogInfo(thread, timeout)
{ AlarmIfTimeout = alarmIfTimeout, AlarmMethod = alarmMethod };
m_log.DebugFormat( m_log.DebugFormat(
"[WATCHDOG]: Started tracking thread {0}, ID {1}", twi.Thread.Name, twi.Thread.ManagedThreadId); "[WATCHDOG]: Started tracking thread {0}, ID {1}", twi.Thread.Name, twi.Thread.ManagedThreadId);
@ -258,7 +264,7 @@ namespace OpenSim.Framework
/// <param name="e"></param> /// <param name="e"></param>
private static void WatchdogTimerElapsed(object sender, System.Timers.ElapsedEventArgs e) private static void WatchdogTimerElapsed(object sender, System.Timers.ElapsedEventArgs e)
{ {
WatchdogTimeout callback = OnWatchdogTimeout; Action<ThreadWatchdogInfo> callback = OnWatchdogTimeout;
if (callback != null) if (callback != null)
{ {
@ -296,7 +302,7 @@ namespace OpenSim.Framework
if (callbackInfos != null) if (callbackInfos != null)
foreach (ThreadWatchdogInfo callbackInfo in callbackInfos) foreach (ThreadWatchdogInfo callbackInfo in callbackInfos)
callback(callbackInfo.Thread, callbackInfo.LastTick); callback(callbackInfo);
} }
m_watchdogTimer.Start(); m_watchdogTimer.Start();

View File

@ -438,12 +438,16 @@ namespace OpenSim
} }
} }
private void WatchdogTimeoutHandler(System.Threading.Thread thread, int lastTick) private void WatchdogTimeoutHandler(Watchdog.ThreadWatchdogInfo twi)
{ {
int now = Environment.TickCount & Int32.MaxValue; int now = Environment.TickCount & Int32.MaxValue;
m_log.ErrorFormat("[WATCHDOG]: Timeout detected for thread \"{0}\". ThreadState={1}. Last tick was {2}ms ago", m_log.ErrorFormat(
thread.Name, thread.ThreadState, now - lastTick); "[WATCHDOG]: Timeout detected for thread \"{0}\". ThreadState={1}. Last tick was {2}ms ago. {3}",
twi.Thread.Name,
twi.Thread.ThreadState,
now - twi.LastTick,
twi.AlarmMethod != null ? string.Format("Data: {0}", twi.AlarmMethod()) : "");
} }
#region Console Commands #region Console Commands

View File

@ -163,6 +163,16 @@ namespace OpenSim.Region.ClientStack.LindenUDP
private int m_malformedCount = 0; // Guard against a spamming attack private int m_malformedCount = 0; // Guard against a spamming attack
/// <summary>
/// Record current outgoing client for monitoring purposes.
/// </summary>
private IClientAPI m_currentOutgoingClient;
/// <summary>
/// Recording current incoming client for monitoring purposes.
/// </summary>
private IClientAPI m_currentIncomingClient;
public LLUDPServer(IPAddress listenIP, ref uint port, int proxyPortOffsetParm, bool allow_alternate_port, IConfigSource configSource, AgentCircuitManager circuitManager) public LLUDPServer(IPAddress listenIP, ref uint port, int proxyPortOffsetParm, bool allow_alternate_port, IConfigSource configSource, AgentCircuitManager circuitManager)
: base(listenIP, (int)port) : base(listenIP, (int)port)
{ {
@ -244,19 +254,56 @@ namespace OpenSim.Region.ClientStack.LindenUDP
if (m_scene == null) if (m_scene == null)
throw new InvalidOperationException("[LLUDPSERVER]: Cannot LLUDPServer.Start() without an IScene reference"); throw new InvalidOperationException("[LLUDPSERVER]: Cannot LLUDPServer.Start() without an IScene reference");
m_log.Info("[LLUDPSERVER]: Starting the LLUDP server in " + (m_asyncPacketHandling ? "asynchronous" : "synchronous") + " mode"); m_log.InfoFormat(
"[LLUDPSERVER]: Starting the LLUDP server in {0} mode",
m_asyncPacketHandling ? "asynchronous" : "synchronous");
base.Start(m_recvBufferSize, m_asyncPacketHandling); base.Start(m_recvBufferSize, m_asyncPacketHandling);
// Start the packet processing threads // Start the packet processing threads
Watchdog.StartThread( Watchdog.StartThread(
IncomingPacketHandler, "Incoming Packets (" + m_scene.RegionInfo.RegionName + ")", ThreadPriority.Normal, false, true); IncomingPacketHandler,
string.Format("Incoming Packets ({0})", m_scene.RegionInfo.RegionName),
ThreadPriority.Normal,
false,
true,
GetWatchdogIncomingAlarmData,
Watchdog.WATCHDOG_TIMEOUT_MS);
Watchdog.StartThread( Watchdog.StartThread(
OutgoingPacketHandler, "Outgoing Packets (" + m_scene.RegionInfo.RegionName + ")", ThreadPriority.Normal, false, true); OutgoingPacketHandler,
string.Format("Outgoing Packets ({0})", m_scene.RegionInfo.RegionName),
ThreadPriority.Normal,
false,
true,
GetWatchdogOutgoingAlarmData,
Watchdog.WATCHDOG_TIMEOUT_MS);
m_elapsedMSSinceLastStatReport = Environment.TickCount; m_elapsedMSSinceLastStatReport = Environment.TickCount;
} }
/// <summary>
/// If the outgoing UDP thread times out, then return client that was being processed to help with debugging.
/// </summary>
/// <returns></returns>
private string GetWatchdogIncomingAlarmData()
{
return string.Format(
"Client is {0}",
m_currentIncomingClient != null ? m_currentIncomingClient.Name : "none");
}
/// <summary>
/// If the outgoing UDP thread times out, then return client that was being processed to help with debugging.
/// </summary>
/// <returns></returns>
private string GetWatchdogOutgoingAlarmData()
{
return string.Format(
"Client is {0}",
m_currentOutgoingClient != null ? m_currentOutgoingClient.Name : "none");
}
public new void Stop() public new void Stop()
{ {
m_log.Info("[LLUDPSERVER]: Shutting down the LLUDP server for " + m_scene.RegionInfo.RegionName); m_log.Info("[LLUDPSERVER]: Shutting down the LLUDP server for " + m_scene.RegionInfo.RegionName);
@ -1173,6 +1220,8 @@ namespace OpenSim.Region.ClientStack.LindenUDP
// client. m_packetSent will be set to true if a packet is sent // client. m_packetSent will be set to true if a packet is sent
m_scene.ForEachClient(clientPacketHandler); m_scene.ForEachClient(clientPacketHandler);
m_currentOutgoingClient = null;
// If nothing was sent, sleep for the minimum amount of time before a // If nothing was sent, sleep for the minimum amount of time before a
// token bucket could get more tokens // token bucket could get more tokens
if (!m_packetSent) if (!m_packetSent)
@ -1191,6 +1240,8 @@ namespace OpenSim.Region.ClientStack.LindenUDP
private void ClientOutgoingPacketHandler(IClientAPI client) private void ClientOutgoingPacketHandler(IClientAPI client)
{ {
m_currentOutgoingClient = client;
try try
{ {
if (client is LLClientView) if (client is LLClientView)
@ -1216,8 +1267,8 @@ namespace OpenSim.Region.ClientStack.LindenUDP
} }
catch (Exception ex) catch (Exception ex)
{ {
m_log.Error("[LLUDPSERVER]: OutgoingPacketHandler iteration for " + client.Name + m_log.Error(
" threw an exception: " + ex.Message, ex); string.Format("[LLUDPSERVER]: OutgoingPacketHandler iteration for {0} threw ", client.Name), ex);
} }
} }
@ -1243,6 +1294,8 @@ namespace OpenSim.Region.ClientStack.LindenUDP
{ {
nticks++; nticks++;
watch1.Start(); watch1.Start();
m_currentOutgoingClient = client;
try try
{ {
if (client is LLClientView) if (client is LLClientView)
@ -1344,6 +1397,8 @@ namespace OpenSim.Region.ClientStack.LindenUDP
// Make sure this client is still alive // Make sure this client is still alive
if (m_scene.TryGetClient(udpClient.AgentID, out client)) if (m_scene.TryGetClient(udpClient.AgentID, out client))
{ {
m_currentIncomingClient = client;
try try
{ {
// Process this packet // Process this packet
@ -1361,6 +1416,10 @@ namespace OpenSim.Region.ClientStack.LindenUDP
m_log.ErrorFormat("[LLUDPSERVER]: Client packet handler for {0} for packet {1} threw an exception", udpClient.AgentID, packet.Type); m_log.ErrorFormat("[LLUDPSERVER]: Client packet handler for {0} for packet {1} threw an exception", udpClient.AgentID, packet.Type);
m_log.Error(e.Message, e); m_log.Error(e.Message, e);
} }
finally
{
m_currentIncomingClient = null;
}
} }
else else
{ {