From ed21576ce04a88783d9b1c12ff452ac68dabe48b Mon Sep 17 00:00:00 2001 From: "Justin Clark-Casey (justincc)" Date: Thu, 7 Jun 2012 02:44:13 +0100 Subject: [PATCH] Allow the thread watchdog to accept an alarm method that is invoked if the timeout is breached. This alarm can then invoke this to log extra information. This is used in LLUDPServer to show which client was being processed when incoming and outgoing udp watchdog alarms are triggered. --- .../HttpServer/PollServiceRequestManager.cs | 2 + OpenSim/Framework/Watchdog.cs | 40 ++++++----- OpenSim/Region/Application/OpenSim.cs | 10 ++- .../ClientStack/Linden/UDP/LLUDPServer.cs | 69 +++++++++++++++++-- 4 files changed, 96 insertions(+), 25 deletions(-) diff --git a/OpenSim/Framework/Servers/HttpServer/PollServiceRequestManager.cs b/OpenSim/Framework/Servers/HttpServer/PollServiceRequestManager.cs index 0062d4ef15..f96fd1fa20 100644 --- a/OpenSim/Framework/Servers/HttpServer/PollServiceRequestManager.cs +++ b/OpenSim/Framework/Servers/HttpServer/PollServiceRequestManager.cs @@ -66,6 +66,7 @@ namespace OpenSim.Framework.Servers.HttpServer ThreadPriority.Normal, false, true, + null, int.MaxValue); } @@ -75,6 +76,7 @@ namespace OpenSim.Framework.Servers.HttpServer ThreadPriority.Normal, false, true, + null, 1000 * 60 * 10); } diff --git a/OpenSim/Framework/Watchdog.cs b/OpenSim/Framework/Watchdog.cs index e93e50e2ca..7552cd15b6 100644 --- a/OpenSim/Framework/Watchdog.cs +++ b/OpenSim/Framework/Watchdog.cs @@ -42,7 +42,7 @@ namespace OpenSim.Framework const double WATCHDOG_INTERVAL_MS = 2500.0d; /// Maximum timeout in milliseconds before a thread is considered dead - const int WATCHDOG_TIMEOUT_MS = 5000; + public const int WATCHDOG_TIMEOUT_MS = 5000; [System.Diagnostics.DebuggerDisplay("{Thread.Name}")] public class ThreadWatchdogInfo @@ -58,7 +58,7 @@ namespace OpenSim.Framework public int FirstTick { get; private set; } /// - /// First time this heartbeat update was invoked + /// Last time this heartbeat update was invoked /// public int LastTick { get; set; } @@ -77,6 +77,11 @@ namespace OpenSim.Framework /// public bool AlarmIfTimeout { get; set; } + /// + /// Method execute if alarm goes off. If null then no alarm method is fired. + /// + public Func AlarmMethod { get; set; } + public ThreadWatchdogInfo(Thread thread, int timeout) { Thread = thread; @@ -87,16 +92,10 @@ namespace OpenSim.Framework } /// - /// This event is called whenever a tracked thread is stopped or - /// has not called UpdateThread() in time - /// - /// The thread that has been identified as dead - /// The last time this thread called UpdateThread() - public delegate void WatchdogTimeout(Thread thread, int lastTick); - - /// This event is called whenever a tracked thread is - /// stopped or has not called UpdateThread() in time - public static event WatchdogTimeout OnWatchdogTimeout; + /// This event is called whenever a tracked thread is + /// stopped or has not called UpdateThread() in time< + /// /summary> + public static event Action OnWatchdogTimeout; private static readonly ILog m_log = LogManager.GetLogger(System.Reflection.MethodBase.GetCurrentMethod().DeclaringType); private static Dictionary m_threads; @@ -123,7 +122,7 @@ namespace OpenSim.Framework public static Thread StartThread( ThreadStart start, string name, ThreadPriority priority, bool isBackground, bool alarmIfTimeout) { - return StartThread(start, name, priority, isBackground, alarmIfTimeout, WATCHDOG_TIMEOUT_MS); + return StartThread(start, name, priority, isBackground, alarmIfTimeout, null, WATCHDOG_TIMEOUT_MS); } /// @@ -135,17 +134,24 @@ namespace OpenSim.Framework /// True to run this thread as a background /// thread, otherwise false /// Trigger an alarm function is we have timed out + /// + /// Alarm method to call if alarmIfTimeout is true and there is a timeout. + /// Normally, this will just return some useful debugging information. + /// /// Number of milliseconds to wait until we issue a warning about timeout. /// The newly created Thread object public static Thread StartThread( - ThreadStart start, string name, ThreadPriority priority, bool isBackground, bool alarmIfTimeout, int timeout) + ThreadStart start, string name, ThreadPriority priority, bool isBackground, + bool alarmIfTimeout, Func alarmMethod, int timeout) { Thread thread = new Thread(start); thread.Name = name; thread.Priority = priority; thread.IsBackground = isBackground; - ThreadWatchdogInfo twi = new ThreadWatchdogInfo(thread, timeout) { AlarmIfTimeout = alarmIfTimeout }; + ThreadWatchdogInfo twi + = new ThreadWatchdogInfo(thread, timeout) + { AlarmIfTimeout = alarmIfTimeout, AlarmMethod = alarmMethod }; m_log.DebugFormat( "[WATCHDOG]: Started tracking thread {0}, ID {1}", twi.Thread.Name, twi.Thread.ManagedThreadId); @@ -258,7 +264,7 @@ namespace OpenSim.Framework /// private static void WatchdogTimerElapsed(object sender, System.Timers.ElapsedEventArgs e) { - WatchdogTimeout callback = OnWatchdogTimeout; + Action callback = OnWatchdogTimeout; if (callback != null) { @@ -296,7 +302,7 @@ namespace OpenSim.Framework if (callbackInfos != null) foreach (ThreadWatchdogInfo callbackInfo in callbackInfos) - callback(callbackInfo.Thread, callbackInfo.LastTick); + callback(callbackInfo); } m_watchdogTimer.Start(); diff --git a/OpenSim/Region/Application/OpenSim.cs b/OpenSim/Region/Application/OpenSim.cs index ca0432fb6c..c3590abba0 100644 --- a/OpenSim/Region/Application/OpenSim.cs +++ b/OpenSim/Region/Application/OpenSim.cs @@ -438,12 +438,16 @@ namespace OpenSim } } - private void WatchdogTimeoutHandler(System.Threading.Thread thread, int lastTick) + private void WatchdogTimeoutHandler(Watchdog.ThreadWatchdogInfo twi) { int now = Environment.TickCount & Int32.MaxValue; - m_log.ErrorFormat("[WATCHDOG]: Timeout detected for thread \"{0}\". ThreadState={1}. Last tick was {2}ms ago", - thread.Name, thread.ThreadState, now - lastTick); + m_log.ErrorFormat( + "[WATCHDOG]: Timeout detected for thread \"{0}\". ThreadState={1}. Last tick was {2}ms ago. {3}", + twi.Thread.Name, + twi.Thread.ThreadState, + now - twi.LastTick, + twi.AlarmMethod != null ? string.Format("Data: {0}", twi.AlarmMethod()) : ""); } #region Console Commands diff --git a/OpenSim/Region/ClientStack/Linden/UDP/LLUDPServer.cs b/OpenSim/Region/ClientStack/Linden/UDP/LLUDPServer.cs index 32ba590718..e37adb86b8 100644 --- a/OpenSim/Region/ClientStack/Linden/UDP/LLUDPServer.cs +++ b/OpenSim/Region/ClientStack/Linden/UDP/LLUDPServer.cs @@ -163,6 +163,16 @@ namespace OpenSim.Region.ClientStack.LindenUDP private int m_malformedCount = 0; // Guard against a spamming attack + /// + /// Record current outgoing client for monitoring purposes. + /// + private IClientAPI m_currentOutgoingClient; + + /// + /// Recording current incoming client for monitoring purposes. + /// + private IClientAPI m_currentIncomingClient; + public LLUDPServer(IPAddress listenIP, ref uint port, int proxyPortOffsetParm, bool allow_alternate_port, IConfigSource configSource, AgentCircuitManager circuitManager) : base(listenIP, (int)port) { @@ -244,19 +254,56 @@ namespace OpenSim.Region.ClientStack.LindenUDP if (m_scene == null) throw new InvalidOperationException("[LLUDPSERVER]: Cannot LLUDPServer.Start() without an IScene reference"); - m_log.Info("[LLUDPSERVER]: Starting the LLUDP server in " + (m_asyncPacketHandling ? "asynchronous" : "synchronous") + " mode"); + m_log.InfoFormat( + "[LLUDPSERVER]: Starting the LLUDP server in {0} mode", + m_asyncPacketHandling ? "asynchronous" : "synchronous"); base.Start(m_recvBufferSize, m_asyncPacketHandling); // Start the packet processing threads Watchdog.StartThread( - IncomingPacketHandler, "Incoming Packets (" + m_scene.RegionInfo.RegionName + ")", ThreadPriority.Normal, false, true); + IncomingPacketHandler, + string.Format("Incoming Packets ({0})", m_scene.RegionInfo.RegionName), + ThreadPriority.Normal, + false, + true, + GetWatchdogIncomingAlarmData, + Watchdog.WATCHDOG_TIMEOUT_MS); + Watchdog.StartThread( - OutgoingPacketHandler, "Outgoing Packets (" + m_scene.RegionInfo.RegionName + ")", ThreadPriority.Normal, false, true); + OutgoingPacketHandler, + string.Format("Outgoing Packets ({0})", m_scene.RegionInfo.RegionName), + ThreadPriority.Normal, + false, + true, + GetWatchdogOutgoingAlarmData, + Watchdog.WATCHDOG_TIMEOUT_MS); m_elapsedMSSinceLastStatReport = Environment.TickCount; } + /// + /// If the outgoing UDP thread times out, then return client that was being processed to help with debugging. + /// + /// + private string GetWatchdogIncomingAlarmData() + { + return string.Format( + "Client is {0}", + m_currentIncomingClient != null ? m_currentIncomingClient.Name : "none"); + } + + /// + /// If the outgoing UDP thread times out, then return client that was being processed to help with debugging. + /// + /// + private string GetWatchdogOutgoingAlarmData() + { + return string.Format( + "Client is {0}", + m_currentOutgoingClient != null ? m_currentOutgoingClient.Name : "none"); + } + public new void Stop() { m_log.Info("[LLUDPSERVER]: Shutting down the LLUDP server for " + m_scene.RegionInfo.RegionName); @@ -1173,6 +1220,8 @@ namespace OpenSim.Region.ClientStack.LindenUDP // client. m_packetSent will be set to true if a packet is sent m_scene.ForEachClient(clientPacketHandler); + m_currentOutgoingClient = null; + // If nothing was sent, sleep for the minimum amount of time before a // token bucket could get more tokens if (!m_packetSent) @@ -1191,6 +1240,8 @@ namespace OpenSim.Region.ClientStack.LindenUDP private void ClientOutgoingPacketHandler(IClientAPI client) { + m_currentOutgoingClient = client; + try { if (client is LLClientView) @@ -1216,8 +1267,8 @@ namespace OpenSim.Region.ClientStack.LindenUDP } catch (Exception ex) { - m_log.Error("[LLUDPSERVER]: OutgoingPacketHandler iteration for " + client.Name + - " threw an exception: " + ex.Message, ex); + m_log.Error( + string.Format("[LLUDPSERVER]: OutgoingPacketHandler iteration for {0} threw ", client.Name), ex); } } @@ -1243,6 +1294,8 @@ namespace OpenSim.Region.ClientStack.LindenUDP { nticks++; watch1.Start(); + m_currentOutgoingClient = client; + try { if (client is LLClientView) @@ -1344,6 +1397,8 @@ namespace OpenSim.Region.ClientStack.LindenUDP // Make sure this client is still alive if (m_scene.TryGetClient(udpClient.AgentID, out client)) { + m_currentIncomingClient = client; + try { // Process this packet @@ -1361,6 +1416,10 @@ namespace OpenSim.Region.ClientStack.LindenUDP m_log.ErrorFormat("[LLUDPSERVER]: Client packet handler for {0} for packet {1} threw an exception", udpClient.AgentID, packet.Type); m_log.Error(e.Message, e); } + finally + { + m_currentIncomingClient = null; + } } else {