OpenSim.Modules.EMail/src/MailKit/MessageThreader.cs

604 lines
18 KiB
C#

//
// MessageThreader.cs
//
// Author: Jeffrey Stedfast <jestedfa@microsoft.com>
//
// Copyright (c) 2013-2020 .NET Foundation and Contributors
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
using System;
using System.Text;
using System.Collections.Generic;
using MimeKit;
using MimeKit.Utils;
using MailKit.Search;
namespace MailKit {
/// <summary>
/// Threads messages according to the algorithms defined in rfc5256.
/// </summary>
/// <remarks>
/// Threads messages according to the algorithms defined in rfc5256.
/// </remarks>
public static class MessageThreader
{
internal class ThreadableNode : IMessageSummary
{
public readonly List<ThreadableNode> Children = new List<ThreadableNode> ();
public IMessageSummary Message;
public ThreadableNode Parent;
public ThreadableNode (IMessageSummary message)
{
Message = message;
}
public bool HasParent {
get { return Parent != null; }
}
public bool HasChildren {
get { return Children.Count > 0; }
}
public IMailFolder Folder => null;
public MessageSummaryItems Fields {
get { return MessageSummaryItems.UniqueId | MessageSummaryItems.Envelope | MessageSummaryItems.ModSeq | MessageSummaryItems.Size; }
}
public BodyPart Body => null;
public BodyPartText TextBody => null;
public BodyPartText HtmlBody => null;
public IEnumerable<BodyPartBasic> BodyParts => null;
public IEnumerable<BodyPartBasic> Attachments => null;
public string PreviewText => null;
public Envelope Envelope {
get { return Message != null ? Message.Envelope : Children[0].Envelope; }
}
public string NormalizedSubject {
get { return Message != null ? Message.NormalizedSubject : Children[0].NormalizedSubject; }
}
public DateTimeOffset Date {
get { return Message != null ? Message.Date : Children[0].Date; }
}
public bool IsReply {
get { return Message != null && Message.IsReply; }
}
public MessageFlags? Flags => null;
public HashSet<string> Keywords => null;
[Obsolete]
public HashSet<string> UserFlags => null;
public IList<Annotation> Annotations {
get { return Message != null ? Message.Annotations : Children[0].Annotations; }
}
public HeaderList Headers => null;
public DateTimeOffset? InternalDate => null;
public uint? Size {
get { return Message != null ? Message.Size : Children[0].Size; }
}
public ulong? ModSeq {
get { return Message != null ? Message.ModSeq : Children[0].ModSeq; }
}
public MessageIdList References {
get { return Message != null ? Message.References : Children[0].References; }
}
public string EmailId => null;
[Obsolete]
public string Id => null;
public string ThreadId => null;
public UniqueId UniqueId {
get { return Message != null ? Message.UniqueId : Children[0].UniqueId; }
}
public int Index {
get { return Message != null ? Message.Index : Children[0].Index; }
}
public ulong? GMailMessageId => null;
public ulong? GMailThreadId => null;
public IList<string> GMailLabels => null;
}
static IDictionary<string, ThreadableNode> CreateIdTable (IEnumerable<IMessageSummary> messages)
{
var ids = new Dictionary<string, ThreadableNode> (StringComparer.OrdinalIgnoreCase);
ThreadableNode node;
foreach (var message in messages) {
if (message.Envelope == null)
throw new ArgumentException ("One or more messages is missing information needed for threading.", nameof (messages));
var id = message.Envelope.MessageId;
if (string.IsNullOrEmpty (id))
id = MimeUtils.GenerateMessageId ();
if (ids.TryGetValue (id, out node)) {
if (node.Message == null) {
// a previously processed message referenced this message
node.Message = message;
} else {
// a duplicate message-id, just create a dummy id and use that
id = MimeUtils.GenerateMessageId ();
node = null;
}
}
if (node == null) {
// create a new ThreadContainer for this message and add it to ids
node = new ThreadableNode (message);
ids.Add (id, node);
}
ThreadableNode parent = null;
foreach (var reference in message.References) {
ThreadableNode referenced;
if (!ids.TryGetValue (reference, out referenced)) {
// create a dummy container for the referenced message
referenced = new ThreadableNode (null);
ids.Add (reference, referenced);
}
// chain up the references, disallowing loops
if (parent != null && referenced.Parent == null && parent != referenced && !parent.Children.Contains (referenced)) {
parent.Children.Add (referenced);
referenced.Parent = parent;
}
parent = referenced;
}
// don't allow loops
if (parent != null && (parent == node || node.Children.Contains (parent)))
parent = null;
if (node.HasParent) {
// unlink from our old parent
node.Parent.Children.Remove (node);
node.Parent = null;
}
if (parent != null) {
// add it as a child of our new parent
parent.Children.Add (node);
node.Parent = parent;
}
}
return ids;
}
static ThreadableNode CreateRoot (IDictionary<string, ThreadableNode> ids)
{
var root = new ThreadableNode (null);
foreach (var message in ids.Values) {
if (message.Parent == null)
root.Children.Add (message);
}
return root;
}
static void PruneEmptyContainers (ThreadableNode root)
{
for (int i = 0; i < root.Children.Count; i++) {
var node = root.Children[i];
if (node.Message == null && node.Children.Count == 0) {
// this is an empty container with no children, nuke it.
root.Children.RemoveAt (i);
i--;
} else if (node.Message == null && node.HasChildren && (node.HasParent || node.Children.Count == 1)) {
// If the Container has no Message, but does have children, remove this container but promote
// its children to this level (that is, splice them in to the current child list.)
//
// Do not promote the children if doing so would promote them to the root set -- unless there
// is only one child, in which case, do.
root.Children.RemoveAt (i);
for (int j = 0; j < node.Children.Count; j++) {
node.Children[j].Parent = node.Parent;
root.Children.Add (node.Children[j]);
}
node.Children.Clear ();
i--;
} else if (node.HasChildren) {
PruneEmptyContainers (node);
}
}
}
static void GroupBySubject (ThreadableNode root)
{
var subjects = new Dictionary<string, ThreadableNode> (StringComparer.OrdinalIgnoreCase);
ThreadableNode match;
int count = 0;
for (int i = 0; i < root.Children.Count; i++) {
var current = root.Children[i];
var subject = current.NormalizedSubject;
// don't thread messages with empty subjects
if (string.IsNullOrEmpty (subject))
continue;
if (!subjects.TryGetValue (subject, out match) ||
(current.Message == null && match.Message != null) ||
(match.Message != null && match.Message.IsReply &&
current.Message != null && !current.Message.IsReply)) {
subjects[subject] = current;
count++;
}
}
if (count == 0)
return;
for (int i = 0; i < root.Children.Count; i++) {
var current = root.Children[i];
var subject = current.NormalizedSubject;
// don't thread messages with empty subjects
if (string.IsNullOrEmpty (subject))
continue;
match = subjects[subject];
if (match == current)
continue;
// remove the second message with the same subject
root.Children.RemoveAt (i--);
// group these messages together...
if (match.Message == null && current.Message == null) {
// If both messages are dummies, append the current message's children
// to the children of the message in the subject table (the children of
// both messages become siblings), and then delete the current message.
match.Children.AddRange (current.Children);
} else if (match.Message == null && current.Message != null) {
// If the message in the subject table is a dummy and the current message
// is not, make the current message a child of the message in the subject
// table (a sibling of its children).
match.Children.Add (current);
} else if (current.Message.IsReply && !match.Message.IsReply) {
// If the current message is a reply or forward and the message in the
// subject table is not, make the current message a child of the message
// in the subject table (a sibling of its children).
match.Children.Add (current);
} else {
// Otherwise, create a new dummy message and make both the current message
// and the message in the subject table children of the dummy. Then replace
// the message in the subject table with the dummy message.
// Note: if we re-use the node already in the subject table and the root, then
// we won't have to insert the new dummy node at the matched node's location
var dummy = match;
// clone the message already in the subject table
match = new ThreadableNode (dummy.Message);
match.Children.AddRange (dummy.Children);
// empty out the old match node (aka the new dummy node)
dummy.Children.Clear ();
dummy.Message = null;
// now add both messages to the dummy
dummy.Children.Add (match);
dummy.Children.Add (current);
}
}
}
static void GetThreads (ThreadableNode root, IList<MessageThread> threads, IList<OrderBy> orderBy)
{
root.Children.Sort (orderBy);
for (int i = 0; i < root.Children.Count; i++) {
var message = root.Children[i].Message;
var thread = new MessageThread (message);
GetThreads (root.Children[i], thread.Children, orderBy);
threads.Add (thread);
}
}
static IList<MessageThread> ThreadByReferences (IEnumerable<IMessageSummary> messages, IList<OrderBy> orderBy)
{
var threads = new List<MessageThread> ();
var ids = CreateIdTable (messages);
var root = CreateRoot (ids);
PruneEmptyContainers (root);
GroupBySubject (root);
GetThreads (root, threads, orderBy);
return threads;
}
static IList<MessageThread> ThreadBySubject (IEnumerable<IMessageSummary> messages, IList<OrderBy> orderBy)
{
var threads = new List<MessageThread> ();
var root = new ThreadableNode (null);
foreach (var message in messages) {
if (message.Envelope == null)
throw new ArgumentException ("One or more messages is missing information needed for threading.", nameof (messages));
var node = new ThreadableNode (message);
root.Children.Add (node);
}
GroupBySubject (root);
GetThreads (root, threads, orderBy);
return threads;
}
/// <summary>
/// Thread the messages according to the specified threading algorithm.
/// </summary>
/// <remarks>
/// Thread the messages according to the specified threading algorithm.
/// </remarks>
/// <returns>The threaded messages.</returns>
/// <param name="messages">The messages.</param>
/// <param name="algorithm">The threading algorithm.</param>
/// <exception cref="System.ArgumentNullException">
/// <paramref name="messages"/> is <c>null</c>.
/// </exception>
/// <exception cref="System.ArgumentOutOfRangeException">
/// <paramref name="algorithm"/> is not a valid threading algorithm.
/// </exception>
/// <exception cref="System.ArgumentException">
/// <paramref name="messages"/> contains one or more items that is missing information needed for threading.
/// </exception>
public static IList<MessageThread> Thread (this IEnumerable<IMessageSummary> messages, ThreadingAlgorithm algorithm)
{
return Thread (messages, algorithm, new [] { OrderBy.Arrival });
}
/// <summary>
/// Threads the messages according to the specified threading algorithm
/// and sorts the resulting threads by the specified ordering.
/// </summary>
/// <remarks>
/// Threads the messages according to the specified threading algorithm
/// and sorts the resulting threads by the specified ordering.
/// </remarks>
/// <returns>The threaded messages.</returns>
/// <param name="messages">The messages.</param>
/// <param name="algorithm">The threading algorithm.</param>
/// <param name="orderBy">The requested sort ordering.</param>
/// <exception cref="System.ArgumentNullException">
/// <para><paramref name="messages"/> is <c>null</c>.</para>
/// <para>-or-</para>
/// <para><paramref name="orderBy"/> is <c>null</c>.</para>
/// </exception>
/// <exception cref="System.ArgumentOutOfRangeException">
/// <paramref name="algorithm"/> is not a valid threading algorithm.
/// </exception>
/// <exception cref="System.ArgumentException">
/// <para><paramref name="messages"/> contains one or more items that is missing information needed for threading or sorting.</para>
/// <para>-or-</para>
/// <para><paramref name="orderBy"/> is an empty list.</para>
/// </exception>
public static IList<MessageThread> Thread (this IEnumerable<IMessageSummary> messages, ThreadingAlgorithm algorithm, IList<OrderBy> orderBy)
{
if (messages == null)
throw new ArgumentNullException (nameof (messages));
if (orderBy == null)
throw new ArgumentNullException (nameof (orderBy));
if (orderBy.Count == 0)
throw new ArgumentException ("No sort order provided.", nameof (orderBy));
switch (algorithm) {
case ThreadingAlgorithm.OrderedSubject: return ThreadBySubject (messages, orderBy);
case ThreadingAlgorithm.References: return ThreadByReferences (messages, orderBy);
default: throw new ArgumentOutOfRangeException (nameof (algorithm));
}
}
static bool IsForward (string subject, int index)
{
return (subject[index] == 'F' || subject[index] == 'f') &&
(subject[index + 1] == 'W' || subject[index + 1] == 'w') &&
(subject[index + 2] == 'D' || subject[index + 2] == 'd') &&
subject[index + 3] == ':';
}
static bool IsReply (string subject, int index)
{
return (subject[index] == 'R' || subject[index] == 'r') &&
(subject[index + 1] == 'E' || subject[index + 1] == 'e');
}
static void SkipWhiteSpace (string subject, ref int index)
{
while (index < subject.Length && char.IsWhiteSpace (subject[index]))
index++;
}
static bool IsMailingListName (char c)
{
return c == '-' || c == '_' || char.IsLetterOrDigit (c);
}
static void SkipMailingListName (string subject, ref int index)
{
while (index < subject.Length && IsMailingListName (subject[index]))
index++;
}
static bool SkipDigits (string subject, ref int index, out int value)
{
int startIndex = index;
value = 0;
while (index < subject.Length && char.IsDigit (subject[index])) {
value = (value * 10) + (subject[index] - '0');
index++;
}
return index > startIndex;
}
/// <summary>
/// Gets the threadable subject.
/// </summary>
/// <remarks>
/// Gets the threadable subject.
/// </remarks>
/// <returns>The threadable subject.</returns>
/// <param name="subject">The Subject header value.</param>
/// <param name="replyDepth">The reply depth.</param>
/// <exception cref="System.ArgumentNullException">
/// <paramref name="subject"/> is <c>null</c>.
/// </exception>
public static string GetThreadableSubject (string subject, out int replyDepth)
{
if (subject == null)
throw new ArgumentNullException (nameof (subject));
replyDepth = 0;
int endIndex = subject.Length;
int startIndex = 0;
int index, count;
int left;
do {
SkipWhiteSpace (subject, ref startIndex);
index = startIndex;
if ((left = (endIndex - index)) < 3)
break;
if (left >= 4 && IsForward (subject, index)) {
// skip over the "Fwd:" prefix
startIndex = index + 4;
replyDepth++;
continue;
}
if (IsReply (subject, index)) {
if (subject[index + 2] == ':') {
// skip over the "Re:" prefix
startIndex = index + 3;
replyDepth++;
continue;
}
if (subject[index + 2] == '[' || subject[index + 2] == '(') {
char close = subject[index + 2] == '[' ? ']' : ')';
// skip over "Re[" or "Re("
index += 3;
// if this is followed by "###]:" or "###):", then it's a condensed "Re:"
if (SkipDigits (subject, ref index, out count) && (endIndex - index) >= 2 &&
subject[index] == close && subject[index + 1] == ':') {
startIndex = index + 2;
replyDepth += count;
continue;
}
}
} else if (subject[index] == '[' && char.IsLetterOrDigit (subject[index + 1])) {
// possibly a mailing-list prefix
index += 2;
SkipMailingListName (subject, ref index);
if ((endIndex - index) >= 1 && subject[index] == ']') {
startIndex = index + 1;
continue;
}
}
break;
} while (true);
// trim trailing whitespace
while (endIndex > 0 && char.IsWhiteSpace (subject[endIndex - 1]))
endIndex--;
// canonicalize the remainder of the subject, condensing multiple spaces into 1
var builder = new StringBuilder ();
bool lwsp = false;
for (int i = startIndex; i < endIndex; i++) {
if (char.IsWhiteSpace (subject[i])) {
if (!lwsp) {
builder.Append (' ');
lwsp = true;
}
} else {
builder.Append (subject[i]);
lwsp = false;
}
}
var canonicalized = builder.ToString ();
if (canonicalized.Equals ("(no subject)", StringComparison.OrdinalIgnoreCase))
canonicalized = string.Empty;
return canonicalized;
}
}
}