* Add oar saving timeout

* If an oar save fails to get responses to all asset requests to the asset service then timeout after 60 seconds
* Timeout executes abort, since missing assets in an OAR seems bad
* This means that oar saves won't permanently hang and instead can be retried if something goes wrong with the asset service
* This is not a solution to mantis 3714.  Hopefully a fix will be along shortly since I can now consistently reproduce that problem
0.6.6-post-fixes
Justin Clarke Casey 2009-06-05 13:48:43 +00:00
parent 9f932a4205
commit bfea077508
5 changed files with 138 additions and 4 deletions

View File

@ -28,18 +28,22 @@
using System; using System;
using System.IO; using System.IO;
using System.Net; using System.Net;
using System.Reflection;
using System.Text; using System.Text;
using System.Xml; using System.Xml;
using System.Xml.Serialization; using System.Xml.Serialization;
using log4net;
namespace OpenSim.Framework.Servers.HttpServer namespace OpenSim.Framework.Servers.HttpServer
{ {
public class AsynchronousRestObjectRequester public class AsynchronousRestObjectRequester
{ {
//private static readonly ILog m_log = LogManager.GetLogger(MethodBase.GetCurrentMethod().DeclaringType);
/// <summary> /// <summary>
/// Perform an asynchronous REST request. /// Perform an asynchronous REST request.
/// </summary> /// </summary>
/// <param name="verb"></param> /// <param name="verb">GET or POST</param>
/// <param name="requestUrl"></param> /// <param name="requestUrl"></param>
/// <param name="obj"></param> /// <param name="obj"></param>
/// <param name="action"></param> /// <param name="action"></param>
@ -52,6 +56,8 @@ namespace OpenSim.Framework.Servers.HttpServer
public static void MakeRequest<TRequest, TResponse>(string verb, public static void MakeRequest<TRequest, TResponse>(string verb,
string requestUrl, TRequest obj, Action<TResponse> action) string requestUrl, TRequest obj, Action<TResponse> action)
{ {
//m_log.DebugFormat("[ASYNC REQUEST]: Starting {0} on {1}", verb, requestUrl);
Type type = typeof (TRequest); Type type = typeof (TRequest);
WebRequest request = WebRequest.Create(requestUrl); WebRequest request = WebRequest.Create(requestUrl);
@ -119,6 +125,8 @@ namespace OpenSim.Framework.Servers.HttpServer
{ {
} }
// m_log.DebugFormat("[ASYNC REQUEST]: Received {0}", deserial.ToString());
action(deserial); action(deserial);
}, null); }, null);
} }

View File

@ -193,7 +193,6 @@ namespace OpenSim
CreatePIDFile(pidFile); CreatePIDFile(pidFile);
userStatsURI = startupConfig.GetString("Stats_URI", String.Empty); userStatsURI = startupConfig.GetString("Stats_URI", String.Empty);
} }
base.StartupSpecific(); base.StartupSpecific();

View File

@ -149,5 +149,13 @@ namespace OpenSim.Region.CoreModules.World.Archiver
if (m_assetsWritten % LOG_ASSET_LOAD_NOTIFICATION_INTERVAL == 0) if (m_assetsWritten % LOG_ASSET_LOAD_NOTIFICATION_INTERVAL == 0)
m_log.InfoFormat("[ARCHIVER]: Added {0} assets to archive", m_assetsWritten); m_log.InfoFormat("[ARCHIVER]: Added {0} assets to archive", m_assetsWritten);
} }
/// <summary>
/// Only call this if you need to force a close on the underlying writer.
/// </summary>
public void ForceClose()
{
m_archiveWriter.Close();
}
} }
} }

View File

@ -29,6 +29,7 @@ using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Reflection; using System.Reflection;
using System.Threading; using System.Threading;
using System.Timers;
using log4net; using log4net;
using OpenMetaverse; using OpenMetaverse;
using OpenSim.Framework; using OpenSim.Framework;
@ -44,6 +45,37 @@ namespace OpenSim.Region.CoreModules.World.Archiver
{ {
private static readonly ILog m_log = LogManager.GetLogger(MethodBase.GetCurrentMethod().DeclaringType); private static readonly ILog m_log = LogManager.GetLogger(MethodBase.GetCurrentMethod().DeclaringType);
enum RequestState
{
Initial,
Running,
Completed,
Aborted
};
/// <value>
/// Timeout threshold if we still need assets or missing asset notifications but have stopped receiving them
/// from the asset service
/// </value>
protected const int TIMEOUT = 60 * 1000;
/// <value>
/// If a timeout does occur, limit the amount of UUID information put to the console.
/// </value>
protected const int MAX_UUID_DISPLAY_ON_TIMEOUT = 3;
protected System.Timers.Timer m_requestCallbackTimer;
/// <value>
/// State of this request
/// </value>
private RequestState m_requestState = RequestState.Initial;
/// <value>
/// Record whether the request has completed.
/// </value>
private bool m_requestCompleted;
/// <value> /// <value>
/// uuids to request /// uuids to request
/// </value> /// </value>
@ -85,20 +117,92 @@ namespace OpenSim.Region.CoreModules.World.Archiver
m_assetsRequestCallback = assetsRequestCallback; m_assetsRequestCallback = assetsRequestCallback;
m_assetService = assetService; m_assetService = assetService;
m_repliesRequired = uuids.Count; m_repliesRequired = uuids.Count;
m_requestCallbackTimer = new System.Timers.Timer(TIMEOUT);
m_requestCallbackTimer.AutoReset = false;
m_requestCallbackTimer.Elapsed += new ElapsedEventHandler(OnRequestCallbackTimeout);
} }
protected internal void Execute() protected internal void Execute()
{ {
m_requestState = RequestState.Running;
m_log.DebugFormat("[ARCHIVER]: AssetsRequest executed looking for {0} assets", m_repliesRequired); m_log.DebugFormat("[ARCHIVER]: AssetsRequest executed looking for {0} assets", m_repliesRequired);
// We can stop here if there are no assets to fetch // We can stop here if there are no assets to fetch
if (m_repliesRequired == 0) if (m_repliesRequired == 0)
{
m_requestState = RequestState.Completed;
PerformAssetsRequestCallback(); PerformAssetsRequestCallback();
return;
}
foreach (UUID uuid in m_uuids) foreach (UUID uuid in m_uuids)
{ {
m_assetService.Get(uuid.ToString(), this, AssetRequestCallback); m_assetService.Get(uuid.ToString(), this, AssetRequestCallback);
} }
m_requestCallbackTimer.Enabled = true;
}
protected void OnRequestCallbackTimeout(object source, ElapsedEventArgs args)
{
try
{
lock (this)
{
// Take care of the possibilty that this thread started but was paused just outside the lock before
// the final request came in (assuming that such a thing is possible)
if (m_requestState == RequestState.Completed)
return;
m_requestState = RequestState.Aborted;
}
// Calculate which uuids were not found. This is an expensive way of doing it, but this is a failure
// case anyway.
List<UUID> uuids = new List<UUID>();
foreach (UUID uuid in m_uuids)
{
uuids.Add(uuid);
}
foreach (UUID uuid in m_foundAssetUuids)
{
uuids.Remove(uuid);
}
foreach (UUID uuid in m_notFoundAssetUuids)
{
uuids.Remove(uuid);
}
m_log.ErrorFormat(
"[ARCHIVER]: Asset service failed to return information about {0} requested assets", uuids.Count);
int i = 0;
foreach (UUID uuid in uuids)
{
m_log.ErrorFormat("[ARCHIVER]: No information about asset {0} received", uuid);
if (++i >= MAX_UUID_DISPLAY_ON_TIMEOUT)
break;
}
if (uuids.Count > MAX_UUID_DISPLAY_ON_TIMEOUT)
m_log.ErrorFormat(
"[ARCHIVER]: (... {0} more not shown)", uuids.Count - MAX_UUID_DISPLAY_ON_TIMEOUT);
m_log.Error("[ARCHIVER]: OAR save aborted.");
}
catch (Exception e)
{
m_log.ErrorFormat("[ARCHIVER]: Timeout handler exception {0}", e);
}
finally
{
m_assetsArchiver.ForceClose();
}
} }
/// <summary> /// <summary>
@ -114,6 +218,15 @@ namespace OpenSim.Region.CoreModules.World.Archiver
{ {
//m_log.DebugFormat("[ARCHIVER]: Received callback for asset {0}", id); //m_log.DebugFormat("[ARCHIVER]: Received callback for asset {0}", id);
m_requestCallbackTimer.Stop();
if (m_requestState == RequestState.Aborted)
{
m_log.WarnFormat(
"[ARCHIVER]: Received information about asset {0} after archive save abortion. Ignoring.",
id);
}
if (asset != null) if (asset != null)
{ {
m_foundAssetUuids.Add(asset.FullID); m_foundAssetUuids.Add(asset.FullID);
@ -126,6 +239,8 @@ namespace OpenSim.Region.CoreModules.World.Archiver
if (m_foundAssetUuids.Count + m_notFoundAssetUuids.Count == m_repliesRequired) if (m_foundAssetUuids.Count + m_notFoundAssetUuids.Count == m_repliesRequired)
{ {
m_requestState = RequestState.Completed;
m_log.DebugFormat( m_log.DebugFormat(
"[ARCHIVER]: Successfully added {0} assets ({1} assets notified missing)", "[ARCHIVER]: Successfully added {0} assets ({1} assets notified missing)",
m_foundAssetUuids.Count, m_notFoundAssetUuids.Count); m_foundAssetUuids.Count, m_notFoundAssetUuids.Count);
@ -136,6 +251,10 @@ namespace OpenSim.Region.CoreModules.World.Archiver
newThread.Name = "OpenSimulator archiving thread post assets receipt"; newThread.Name = "OpenSimulator archiving thread post assets receipt";
newThread.Start(); newThread.Start();
} }
else
{
m_requestCallbackTimer.Start();
}
} }
} }
catch (Exception e) catch (Exception e)