From 859b1c5ba41198c57e1e84cf6bbf3e3634cc3d58 Mon Sep 17 00:00:00 2001 From: "Justin Clark-Casey (justincc)" Date: Tue, 9 Sep 2014 18:14:56 +0100 Subject: [PATCH] Add experimental job engine to see if queueing some existing async work during root agent entry to a region improves perf rather than always attempting to execute everything concurrently Job engine is controlled via "debug jobengine start|stop|status". Can only currently be enabled and disabled dynamically at runtime. Currently only applies to code sending initial region data (objects, other avatar data) to a client. --- OpenSim/Framework/Monitoring/JobEngine.cs | 292 ++++++++++++++++++ OpenSim/Framework/Monitoring/Watchdog.cs | 18 ++ .../Region/Framework/Scenes/ScenePresence.cs | 2 +- 3 files changed, 311 insertions(+), 1 deletion(-) create mode 100644 OpenSim/Framework/Monitoring/JobEngine.cs diff --git a/OpenSim/Framework/Monitoring/JobEngine.cs b/OpenSim/Framework/Monitoring/JobEngine.cs new file mode 100644 index 0000000000..b401a5cbe3 --- /dev/null +++ b/OpenSim/Framework/Monitoring/JobEngine.cs @@ -0,0 +1,292 @@ +/* + * Copyright (c) Contributors, http://opensimulator.org/ + * See CONTRIBUTORS.TXT for a full list of copyright holders. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the OpenSimulator Project nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE DEVELOPERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +using System; +using System.Collections.Concurrent; +using System.Reflection; +using System.Threading; +using log4net; +using OpenSim.Framework; + +namespace OpenSim.Framework.Monitoring +{ + public class Job + { + public string Name; + public WaitCallback Callback; + public object O; + + public Job(string name, WaitCallback callback, object o) + { + Name = name; + Callback = callback; + O = o; + } + } + + public class JobEngine + { + private static readonly ILog m_log = LogManager.GetLogger(MethodBase.GetCurrentMethod().DeclaringType); + + public bool IsRunning { get; private set; } + + /// + /// The timeout in milliseconds to wait for at least one event to be written when the recorder is stopping. + /// + public int RequestProcessTimeoutOnStop { get; set; } + + /// + /// Controls whether we need to warn in the log about exceeding the max queue size. + /// + /// + /// This is flipped to false once queue max has been exceeded and back to true when it falls below max, in + /// order to avoid spamming the log with lots of warnings. + /// + private bool m_warnOverMaxQueue = true; + + private BlockingCollection m_requestQueue; + + private CancellationTokenSource m_cancelSource = new CancellationTokenSource(); + + private Stat m_requestsWaitingStat; + + private Job m_currentJob; + + /// + /// Used to signal that we are ready to complete stop. + /// + private ManualResetEvent m_finishedProcessingAfterStop = new ManualResetEvent(false); + + public JobEngine() + { + RequestProcessTimeoutOnStop = 5000; + + MainConsole.Instance.Commands.AddCommand( + "Debug", + false, + "debug jobengine", + "debug jobengine ", + "Start, stop or get status of the job engine.", + "If stopped then all jobs are processed immediately.", + HandleControlCommand); + } + + public void Start() + { + lock (this) + { + if (IsRunning) + return; + + IsRunning = true; + + m_finishedProcessingAfterStop.Reset(); + + m_requestQueue = new BlockingCollection(new ConcurrentQueue(), 5000); + + m_requestsWaitingStat = + new Stat( + "JobsWaiting", + "Number of jobs waiting for processing.", + "", + "", + "server", + "jobengine", + StatType.Pull, + MeasuresOfInterest.None, + stat => stat.Value = m_requestQueue.Count, + StatVerbosity.Debug); + + StatsManager.RegisterStat(m_requestsWaitingStat); + + Watchdog.StartThread( + ProcessRequests, + "JobEngineThread", + ThreadPriority.Normal, + false, + true, + null, + int.MaxValue); + } + } + + public void Stop() + { + lock (this) + { + try + { + if (!IsRunning) + return; + + IsRunning = false; + + int requestsLeft = m_requestQueue.Count; + + if (requestsLeft <= 0) + { + m_cancelSource.Cancel(); + } + else + { + m_log.InfoFormat("[JOB ENGINE]: Waiting to write {0} events after stop.", requestsLeft); + + while (requestsLeft > 0) + { + if (!m_finishedProcessingAfterStop.WaitOne(RequestProcessTimeoutOnStop)) + { + // After timeout no events have been written + if (requestsLeft == m_requestQueue.Count) + { + m_log.WarnFormat( + "[JOB ENGINE]: No requests processed after {0} ms wait. Discarding remaining {1} requests", + RequestProcessTimeoutOnStop, requestsLeft); + + break; + } + } + + requestsLeft = m_requestQueue.Count; + } + } + } + finally + { + m_cancelSource.Dispose(); + StatsManager.DeregisterStat(m_requestsWaitingStat); + m_requestsWaitingStat = null; + m_requestQueue = null; + } + } + } + + public bool QueueRequest(string name, WaitCallback req, object o) + { + m_log.DebugFormat("[JOB ENGINE]: Queued job {0}", name); + + if (m_requestQueue.Count < m_requestQueue.BoundedCapacity) + { + // m_log.DebugFormat( + // "[OUTGOING QUEUE REFILL ENGINE]: Adding request for categories {0} for {1} in {2}", + // categories, client.AgentID, m_udpServer.Scene.Name); + + m_requestQueue.Add(new Job(name, req, o)); + + if (!m_warnOverMaxQueue) + m_warnOverMaxQueue = true; + + return true; + } + else + { + if (m_warnOverMaxQueue) + { +// m_log.WarnFormat( +// "[JOB ENGINE]: Request queue at maximum capacity, not recording request from {0} in {1}", +// client.AgentID, m_udpServer.Scene.Name); + + m_log.WarnFormat("[JOB ENGINE]: Request queue at maximum capacity, not recording job"); + + m_warnOverMaxQueue = false; + } + + return false; + } + } + + private void ProcessRequests() + { + try + { + while (IsRunning || m_requestQueue.Count > 0) + { + m_currentJob = m_requestQueue.Take(m_cancelSource.Token); + + // QueueEmpty callback = req.Client.OnQueueEmpty; + // + // if (callback != null) + // { + // try + // { + // callback(req.Categories); + // } + // catch (Exception e) + // { + // m_log.Error("[OUTGOING QUEUE REFILL ENGINE]: ProcessRequests(" + req.Categories + ") threw an exception: " + e.Message, e); + // } + // } + + m_log.DebugFormat("[JOB ENGINE]: Processing job {0}", m_currentJob.Name); + m_currentJob.Callback.Invoke(m_currentJob.O); + m_log.DebugFormat("[JOB ENGINE]: Processed job {0}", m_currentJob.Name); + m_currentJob = null; + } + } + catch (OperationCanceledException) + { + } + + m_finishedProcessingAfterStop.Set(); + } + + private void HandleControlCommand(string module, string[] args) + { +// if (SceneManager.Instance.CurrentScene != null && SceneManager.Instance.CurrentScene != m_udpServer.Scene) +// return; + + if (args.Length != 3) + { + MainConsole.Instance.Output("Usage: debug jobengine "); + return; + } + + string subCommand = args[2]; + + if (subCommand == "stop") + { + Stop(); + MainConsole.Instance.OutputFormat("Stopped job engine."); + } + else if (subCommand == "start") + { + Start(); + MainConsole.Instance.OutputFormat("Started job engine."); + } + else if (subCommand == "status") + { + MainConsole.Instance.OutputFormat("Job engine running: {0}", IsRunning); + MainConsole.Instance.OutputFormat("Current job {0}", m_currentJob != null ? m_currentJob.Name : "none"); + MainConsole.Instance.OutputFormat( + "Jobs waiting: {0}", IsRunning ? m_requestQueue.Count.ToString() : "n/a"); + } + else + { + MainConsole.Instance.OutputFormat("Unrecognized job engine subcommand {0}", subCommand); + } + } + } +} \ No newline at end of file diff --git a/OpenSim/Framework/Monitoring/Watchdog.cs b/OpenSim/Framework/Monitoring/Watchdog.cs index 0fcb195857..b00aa5c16b 100644 --- a/OpenSim/Framework/Monitoring/Watchdog.cs +++ b/OpenSim/Framework/Monitoring/Watchdog.cs @@ -133,6 +133,8 @@ namespace OpenSim.Framework.Monitoring /// /summary> public static event Action OnWatchdogTimeout; + private static JobEngine m_jobEngine; + /// /// Is this watchdog active? /// @@ -173,6 +175,7 @@ namespace OpenSim.Framework.Monitoring static Watchdog() { + m_jobEngine = new JobEngine(); m_threads = new Dictionary(); m_watchdogTimer = new System.Timers.Timer(WATCHDOG_INTERVAL_MS); m_watchdogTimer.AutoReset = false; @@ -450,5 +453,20 @@ namespace OpenSim.Framework.Monitoring m_watchdogTimer.Start(); } + + public static void RunWhenPossible(string jobType, WaitCallback callback, string name, object obj, bool log = false) + { + if (Util.FireAndForgetMethod == FireAndForgetMethod.RegressionTest) + { + Culture.SetCurrentCulture(); + callback(obj); + return; + } + + if (m_jobEngine.IsRunning) + m_jobEngine.QueueRequest(name, callback, obj); + else + RunInThread(callback, name, obj, log); + } } } \ No newline at end of file diff --git a/OpenSim/Region/Framework/Scenes/ScenePresence.cs b/OpenSim/Region/Framework/Scenes/ScenePresence.cs index 87063c6d4c..2718785127 100644 --- a/OpenSim/Region/Framework/Scenes/ScenePresence.cs +++ b/OpenSim/Region/Framework/Scenes/ScenePresence.cs @@ -3362,7 +3362,7 @@ namespace OpenSim.Region.Framework.Scenes SentInitialDataToClient = true; // Send all scene object to the new client - Watchdog.RunInThread(delegate + Watchdog.RunWhenPossible("SendInitialDataToClient", delegate { // m_log.DebugFormat( // "[SCENE PRESENCE]: Sending initial data to {0} agent {1} in {2}, tp flags {3}",