C # 中的自然排序顺序

有没有人有一个好的资源或者提供一个自然顺序排序的例子在 C # 中用于 FileInfo数组?我正在以自己的方式实现 IComparer接口。

65441 次浏览

最简单的方法就是 P/调用 Windows 中的内置函数,并将其用作 IComparer中的比较函数:

[DllImport("shlwapi.dll", CharSet = CharSet.Unicode)]
private static extern int StrCmpLogicalW(string psz1, string psz2);

迈克尔卡普兰有一些 这个函数如何工作的例子,并作出了改变,使 Vista 更直观地工作。这个函数的优点在于,它将具有与运行它的 Windows 版本相同的行为,但这确实意味着不同版本的 Windows 之间存在差异,所以你需要考虑这是否会给你带来问题。

所以一个完整的实现应该是这样的:

[SuppressUnmanagedCodeSecurity]
internal static class SafeNativeMethods
{
[DllImport("shlwapi.dll", CharSet = CharSet.Unicode)]
public static extern int StrCmpLogicalW(string psz1, string psz2);
}


public sealed class NaturalStringComparer : IComparer<string>
{
public int Compare(string a, string b)
{
return SafeNativeMethods.StrCmpLogicalW(a, b);
}
}


public sealed class NaturalFileInfoNameComparer : IComparer<FileInfo>
{
public int Compare(FileInfo a, FileInfo b)
{
return SafeNativeMethods.StrCmpLogicalW(a.Name, b.Name);
}
}

您确实需要小心——我依稀记得读到过 StrCmpLogicalW 或类似的东西并不是严格传递的,我已经观察到了。NET 的排序方法,如果比较函数打破了这个规则,有时会陷入无限循环。

传递比较总是报告 a < c,如果 a < b 和 b < c。存在一个函数,它执行自然的排序顺序比较,并不总是满足这个条件,但是我想不起来它是 StrCmpLogicalW 还是其他什么。

添加到 Greg Beech 的回答(因为我刚刚在搜索它) ,如果您想从 Linq 中使用它,您可以使用接受 IComparerOrderBy。例如:

var items = new List<MyItem>();


// fill items


var sorted = items.OrderBy(item => item.Name, new NaturalStringComparer());

Linq orderby 的纯 C # 解决方案:

Http://zootfroot.blogspot.com/2009/09/natural-sort-compare-with-linq-orderby.html

public class NaturalSortComparer<T> : IComparer<string>, IDisposable
{
private bool isAscending;


public NaturalSortComparer(bool inAscendingOrder = true)
{
this.isAscending = inAscendingOrder;
}


#region IComparer<string> Members


public int Compare(string x, string y)
{
throw new NotImplementedException();
}


#endregion


#region IComparer<string> Members


int IComparer<string>.Compare(string x, string y)
{
if (x == y)
return 0;


string[] x1, y1;


if (!table.TryGetValue(x, out x1))
{
x1 = Regex.Split(x.Replace(" ", ""), "([0-9]+)");
table.Add(x, x1);
}


if (!table.TryGetValue(y, out y1))
{
y1 = Regex.Split(y.Replace(" ", ""), "([0-9]+)");
table.Add(y, y1);
}


int returnVal;


for (int i = 0; i < x1.Length && i < y1.Length; i++)
{
if (x1[i] != y1[i])
{
returnVal = PartCompare(x1[i], y1[i]);
return isAscending ? returnVal : -returnVal;
}
}


if (y1.Length > x1.Length)
{
returnVal = 1;
}
else if (x1.Length > y1.Length)
{
returnVal = -1;
}
else
{
returnVal = 0;
}


return isAscending ? returnVal : -returnVal;
}


private static int PartCompare(string left, string right)
{
int x, y;
if (!int.TryParse(left, out x))
return left.CompareTo(right);


if (!int.TryParse(right, out y))
return left.CompareTo(right);


return x.CompareTo(y);
}


#endregion


private Dictionary<string, string[]> table = new Dictionary<string, string[]>();


public void Dispose()
{
table.Clear();
table = null;
}
}

没有一个现有的实现看起来很棒,所以我自己编写了一个。其结果与现代版本的文件资源管理器(Windows 7/8)所使用的排序方式几乎完全相同。我看到的唯一区别是1)尽管 Windows 过去(例如 XP)处理任何长度的数字,现在它被限制在19位——我的是无限制的,2) Windows 给出的结果与某些 Unicode 数字集不一致——我的工作正常(尽管它不会从数字上比较代理对的数字; Windows 也不会) ,3)我不能区分不同类型的非主排序权重,如果它们出现在不同的部分(例如“ e-1é”vs“ é1e-”-前后的部分有分音符号和标点符号权重的差异)。

public static int CompareNatural(string strA, string strB) {
return CompareNatural(strA, strB, CultureInfo.CurrentCulture, CompareOptions.IgnoreCase);
}


public static int CompareNatural(string strA, string strB, CultureInfo culture, CompareOptions options) {
CompareInfo cmp = culture.CompareInfo;
int iA = 0;
int iB = 0;
int softResult = 0;
int softResultWeight = 0;
while (iA < strA.Length && iB < strB.Length) {
bool isDigitA = Char.IsDigit(strA[iA]);
bool isDigitB = Char.IsDigit(strB[iB]);
if (isDigitA != isDigitB) {
return cmp.Compare(strA, iA, strB, iB, options);
}
else if (!isDigitA && !isDigitB) {
int jA = iA + 1;
int jB = iB + 1;
while (jA < strA.Length && !Char.IsDigit(strA[jA])) jA++;
while (jB < strB.Length && !Char.IsDigit(strB[jB])) jB++;
int cmpResult = cmp.Compare(strA, iA, jA - iA, strB, iB, jB - iB, options);
if (cmpResult != 0) {
// Certain strings may be considered different due to "soft" differences that are
// ignored if more significant differences follow, e.g. a hyphen only affects the
// comparison if no other differences follow
string sectionA = strA.Substring(iA, jA - iA);
string sectionB = strB.Substring(iB, jB - iB);
if (cmp.Compare(sectionA + "1", sectionB + "2", options) ==
cmp.Compare(sectionA + "2", sectionB + "1", options))
{
return cmp.Compare(strA, iA, strB, iB, options);
}
else if (softResultWeight < 1) {
softResult = cmpResult;
softResultWeight = 1;
}
}
iA = jA;
iB = jB;
}
else {
char zeroA = (char)(strA[iA] - (int)Char.GetNumericValue(strA[iA]));
char zeroB = (char)(strB[iB] - (int)Char.GetNumericValue(strB[iB]));
int jA = iA;
int jB = iB;
while (jA < strA.Length && strA[jA] == zeroA) jA++;
while (jB < strB.Length && strB[jB] == zeroB) jB++;
int resultIfSameLength = 0;
do {
isDigitA = jA < strA.Length && Char.IsDigit(strA[jA]);
isDigitB = jB < strB.Length && Char.IsDigit(strB[jB]);
int numA = isDigitA ? (int)Char.GetNumericValue(strA[jA]) : 0;
int numB = isDigitB ? (int)Char.GetNumericValue(strB[jB]) : 0;
if (isDigitA && (char)(strA[jA] - numA) != zeroA) isDigitA = false;
if (isDigitB && (char)(strB[jB] - numB) != zeroB) isDigitB = false;
if (isDigitA && isDigitB) {
if (numA != numB && resultIfSameLength == 0) {
resultIfSameLength = numA < numB ? -1 : 1;
}
jA++;
jB++;
}
}
while (isDigitA && isDigitB);
if (isDigitA != isDigitB) {
// One number has more digits than the other (ignoring leading zeros) - the longer
// number must be larger
return isDigitA ? 1 : -1;
}
else if (resultIfSameLength != 0) {
// Both numbers are the same length (ignoring leading zeros) and at least one of
// the digits differed - the first difference determines the result
return resultIfSameLength;
}
int lA = jA - iA;
int lB = jB - iB;
if (lA != lB) {
// Both numbers are equivalent but one has more leading zeros
return lA > lB ? -1 : 1;
}
else if (zeroA != zeroB && softResultWeight < 2) {
softResult = cmp.Compare(strA, iA, 1, strB, iB, 1, options);
softResultWeight = 2;
}
iA = jA;
iB = jB;
}
}
if (iA < strA.Length || iB < strB.Length) {
return iA < strA.Length ? 1 : -1;
}
else if (softResult != 0) {
return softResult;
}
return 0;
}

签名与 Comparison<string>委托匹配:

string[] files = Directory.GetFiles(@"C:\");
Array.Sort(files, CompareNatural);

下面是一个用作 IComparer<string>的包装类:

public class CustomComparer<T> : IComparer<T> {
private Comparison<T> _comparison;


public CustomComparer(Comparison<T> comparison) {
_comparison = comparison;
}


public int Compare(T x, T y) {
return _comparison(x, y);
}
}

例如:

string[] files = Directory.EnumerateFiles(@"C:\")
.OrderBy(f => f, new CustomComparer<string>(CompareNatural))
.ToArray();

下面是我用于测试的一组很好的文件名:

Func<string, string> expand = (s) => { int o; while ((o = s.IndexOf('\\')) != -1) { int p = o + 1;
int z = 1; while (s[p] == '0') { z++; p++; } int c = Int32.Parse(s.Substring(p, z));
s = s.Substring(0, o) + new string(s[o - 1], c) + s.Substring(p + z); } return s; };
string encodedFileNames =
"KDEqLW4xMiotbjEzKjAwMDFcMDY2KjAwMlwwMTcqMDA5XDAxNyowMlwwMTcqMDlcMDE3KjEhKjEtISox" +
"LWEqMS4yNT8xLjI1KjEuNT8xLjUqMSoxXDAxNyoxXDAxOCoxXDAxOSoxXDA2NioxXDA2NyoxYSoyXDAx" +
"NyoyXDAxOCo5XDAxNyo5XDAxOCo5XDA2Nio9MSphMDAxdGVzdDAxKmEwMDF0ZXN0aW5nYTBcMzEqYTAw" +
"Mj9hMDAyIGE/YTAwMiBhKmEwMDIqYTAwMmE/YTAwMmEqYTAxdGVzdGluZ2EwMDEqYTAxdnNmcyphMSph" +
"MWEqYTF6KmEyKmIwMDAzcTYqYjAwM3E0KmIwM3E1KmMtZSpjZCpjZipmIDEqZipnP2cgMT9oLW4qaG8t" +
"bipJKmljZS1jcmVhbT9pY2VjcmVhbT9pY2VjcmVhbS0/ajBcNDE/ajAwMWE/ajAxP2shKmsnKmstKmsx" +
"KmthKmxpc3QqbTAwMDNhMDA1YSptMDAzYTAwMDVhKm0wMDNhMDA1Km0wMDNhMDA1YSpuMTIqbjEzKm8t" +
"bjAxMypvLW4xMipvLW40P28tbjQhP28tbjR6P28tbjlhLWI1Km8tbjlhYjUqb24wMTMqb24xMipvbjQ/" +
"b240IT9vbjR6P29uOWEtYjUqb245YWI1Km/CrW4wMTMqb8KtbjEyKnAwMCpwMDEqcDAxwr0hKnAwMcK9" +
"KnAwMcK9YSpwMDHCvcK+KnAwMipwMMK9KnEtbjAxMypxLW4xMipxbjAxMypxbjEyKnItMDAhKnItMDAh" +
"NSpyLTAwIe+8lSpyLTAwYSpyLe+8kFwxIS01KnIt77yQXDEhLe+8lSpyLe+8kFwxISpyLe+8kFwxITUq" +
"ci3vvJBcMSHvvJUqci3vvJBcMWEqci3vvJBcMyE1KnIwMCEqcjAwLTUqcjAwLjUqcjAwNSpyMDBhKnIw" +
"NSpyMDYqcjQqcjUqctmg2aYqctmkKnLZpSpy27Dbtipy27Qqctu1KnLfgN+GKnLfhCpy34UqcuClpuCl" +
"rCpy4KWqKnLgpasqcuCnpuCnrCpy4KeqKnLgp6sqcuCppuCprCpy4KmqKnLgqasqcuCrpuCrrCpy4Kuq" +
"KnLgq6sqcuCtpuCtrCpy4K2qKnLgrasqcuCvpuCvrCpy4K+qKnLgr6sqcuCxpuCxrCpy4LGqKnLgsasq" +
"cuCzpuCzrCpy4LOqKnLgs6sqcuC1puC1rCpy4LWqKnLgtasqcuC5kOC5lipy4LmUKnLguZUqcuC7kOC7" +
"lipy4LuUKnLgu5UqcuC8oOC8pipy4LykKnLgvKUqcuGBgOGBhipy4YGEKnLhgYUqcuGCkOGClipy4YKU" +
"KnLhgpUqcuGfoOGfpipy4Z+kKnLhn6UqcuGgkOGglipy4aCUKnLhoJUqcuGlhuGljCpy4aWKKnLhpYsq" +
"cuGnkOGnlipy4aeUKnLhp5UqcuGtkOGtlipy4a2UKnLhrZUqcuGusOGutipy4a60KnLhrrUqcuGxgOGx" +
"hipy4bGEKnLhsYUqcuGxkOGxlipy4bGUKnLhsZUqcuqYoFwx6pilKnLqmKDqmKUqcuqYoOqYpipy6pik" +
"KnLqmKUqcuqjkOqjlipy6qOUKnLqo5UqcuqkgOqkhipy6qSEKnLqpIUqcuqpkOqplipy6qmUKnLqqZUq" +
"cvCQkqAqcvCQkqUqcvCdn5gqcvCdn50qcu+8kFwxISpy77yQXDEt77yVKnLvvJBcMS7vvJUqcu+8kFwx" +
"YSpy77yQXDHqmKUqcu+8kFwx77yO77yVKnLvvJBcMe+8lSpy77yQ77yVKnLvvJDvvJYqcu+8lCpy77yV" +
"KnNpKnPEsSp0ZXN02aIqdGVzdNmi2aAqdGVzdNmjKnVBZS0qdWFlKnViZS0qdUJlKnVjZS0xw6kqdWNl" +
"McOpLSp1Y2Uxw6kqdWPDqS0xZSp1Y8OpMWUtKnVjw6kxZSp3ZWlhMSp3ZWlhMip3ZWlzczEqd2Vpc3My" +
"KndlaXoxKndlaXoyKndlacOfMSp3ZWnDnzIqeSBhMyp5IGE0KnknYTMqeSdhNCp5K2EzKnkrYTQqeS1h" +
"Myp5LWE0KnlhMyp5YTQqej96IDA1MD96IDIxP3ohMjE/ejIwP3oyMj96YTIxP3rCqTIxP1sxKl8xKsKt" +
"bjEyKsKtbjEzKsSwKg==";
string[] fileNames = Encoding.UTF8.GetString(Convert.FromBase64String(encodedFileNames))
.Replace("*", ".txt?").Split(new[] { "?" }, StringSplitOptions.RemoveEmptyEntries)
.Select(n => expand(n)).ToArray();

我的解决办法是:

void Main()
{
new[] {"a4","a3","a2","a10","b5","b4","b400","1","C1d","c1d2"}.OrderBy(x => x, new NaturalStringComparer()).Dump();
}


public class NaturalStringComparer : IComparer<string>
{
private static readonly Regex _re = new Regex(@"(?<=\D)(?=\d)|(?<=\d)(?=\D)", RegexOptions.Compiled);


public int Compare(string x, string y)
{
x = x.ToLower();
y = y.ToLower();
if(string.Compare(x, 0, y, 0, Math.Min(x.Length, y.Length)) == 0)
{
if(x.Length == y.Length) return 0;
return x.Length < y.Length ? -1 : 1;
}
var a = _re.Split(x);
var b = _re.Split(y);
int i = 0;
while(true)
{
int r = PartCompare(a[i], b[i]);
if(r != 0) return r;
++i;
}
}


private static int PartCompare(string x, string y)
{
int a, b;
if(int.TryParse(x, out a) && int.TryParse(y, out b))
return a.CompareTo(b);
return x.CompareTo(y);
}
}

结果:

1
a2
a3
a4
a10
b4
b5
b400
C1d
c1d2

我只是想补充一点(用我能找到的最简洁的解决方案) :

public static IOrderedEnumerable<T> OrderByAlphaNumeric<T>(this IEnumerable<T> source, Func<T, string> selector)
{
int max = source
.SelectMany(i => Regex.Matches(selector(i), @"\d+").Cast<Match>().Select(m => (int?)m.Value.Length))
.Max() ?? 0;


return source.OrderBy(i => Regex.Replace(selector(i), @"\d+", m => m.Value.PadLeft(max, '0')));
}

上面的代码将字符串中的任何数字填充到所有字符串中所有数字的最大长度,并使用结果字符串进行排序。

转换为(int?)是为了允许没有任何数字的字符串集合(在空可枚举数上的 .Max()抛出一个 InvalidOperationException)。

我们需要一种自然的排序来处理以下模式的文本:

"Test 1-1-1 something"
"Test 1-2-3 something"
...

出于某种原因,当我第一次看到 SO 时,我没有找到这篇文章,而是实现了我们自己的。与这里提出的一些解决方案相比,虽然概念相似,但它可能具有更简单和更容易理解的好处。然而,尽管我确实试图查看性能瓶颈,但它的实现速度仍然比默认的 OrderBy()慢得多。

下面是我实现的扩展方法:

public static class EnumerableExtensions
{
// set up the regex parser once and for all
private static readonly Regex Regex = new Regex(@"\d+|\D+", RegexOptions.Compiled | RegexOptions.Singleline);


// stateless comparer can be built once
private static readonly AggregateComparer Comparer = new AggregateComparer();


public static IEnumerable<T> OrderByNatural<T>(this IEnumerable<T> source, Func<T, string> selector)
{
// first extract string from object using selector
// then extract digit and non-digit groups
Func<T, IEnumerable<IComparable>> splitter =
s => Regex.Matches(selector(s))
.Cast<Match>()
.Select(m => Char.IsDigit(m.Value[0]) ? (IComparable) int.Parse(m.Value) : m.Value);
return source.OrderBy(splitter, Comparer);
}


/// <summary>
/// This comparer will compare two lists of objects against each other
/// </summary>
/// <remarks>Objects in each list are compare to their corresponding elements in the other
/// list until a difference is found.</remarks>
private class AggregateComparer : IComparer<IEnumerable<IComparable>>
{
public int Compare(IEnumerable<IComparable> x, IEnumerable<IComparable> y)
{
return
x.Zip(y, (a, b) => new {a, b})              // walk both lists
.Select(pair => pair.a.CompareTo(pair.b))  // compare each object
.FirstOrDefault(result => result != 0);    // until a difference is found
}
}
}

其思想是将原始字符串分成数字和非数字("\d+|\D+")。由于这是一个潜在的昂贵任务,因此每个条目只执行一次。然后我们使用一个可比对象的比较器(对不起,我找不到更合适的方式来表达它)。它将每个块与另一个字符串中的对应块进行比较。

我希望得到有关如何改进以及主要缺陷是什么的反馈。请注意,可维护性在这一点上对我们很重要,我们目前还没有在极大的数据集中使用它。

Matthews Horsley 的回答是最快的方法,它不会根据程序运行在哪个版本的窗口上而改变行为。但是,通过创建一次正则表达式并使用 RegexOptions,它可以更快。编译完毕。我还添加了插入字符串比较器的选项,这样您可以在需要时忽略大小写,并略微提高了可读性。

    public static IEnumerable<T> OrderByNatural<T>(this IEnumerable<T> items, Func<T, string> selector, StringComparer stringComparer = null)
{
var regex = new Regex(@"\d+", RegexOptions.Compiled);


int maxDigits = items
.SelectMany(i => regex.Matches(selector(i)).Cast<Match>().Select(digitChunk => (int?)digitChunk.Value.Length))
.Max() ?? 0;


return items.OrderBy(i => regex.Replace(selector(i), match => match.Value.PadLeft(maxDigits, '0')), stringComparer ?? StringComparer.CurrentCulture);
}

使用者

var sortedEmployees = employees.OrderByNatural(emp => emp.Name);

这需要450ms 对100,000个字符串进行排序,而对于 default. net 字符串比较需要300ms-非常快!

在前面几个答案的基础上,利用扩展方法,我得出了以下结论,它没有潜在的多个可枚举枚举的警告,也没有与使用多个正则表达式对象有关的性能问题,也没有不必要地调用正则表达式,即是说,它确实使用了 ToList () ,这可以抵消较大集合的好处。

选择器支持泛型类型,允许分配任何委托,源集合中的元素由选择器进行更改,然后用 ToString ()转换为字符串。

    private static readonly Regex _NaturalOrderExpr = new Regex(@"\d+", RegexOptions.Compiled);


public static IEnumerable<TSource> OrderByNatural<TSource, TKey>(
this IEnumerable<TSource> source, Func<TSource, TKey> selector)
{
int max = 0;


var selection = source.Select(
o =>
{
var v = selector(o);
var s = v != null ? v.ToString() : String.Empty;


if (!String.IsNullOrWhiteSpace(s))
{
var mc = _NaturalOrderExpr.Matches(s);


if (mc.Count > 0)
{
max = Math.Max(max, mc.Cast<Match>().Max(m => m.Value.Length));
}
}


return new
{
Key = o,
Value = s
};
}).ToList();


return
selection.OrderBy(
o =>
String.IsNullOrWhiteSpace(o.Value) ? o.Value : _NaturalOrderExpr.Replace(o.Value, m => m.Value.PadLeft(max, '0')))
.Select(o => o.Key);
}


public static IEnumerable<TSource> OrderByDescendingNatural<TSource, TKey>(
this IEnumerable<TSource> source, Func<TSource, TKey> selector)
{
int max = 0;


var selection = source.Select(
o =>
{
var v = selector(o);
var s = v != null ? v.ToString() : String.Empty;


if (!String.IsNullOrWhiteSpace(s))
{
var mc = _NaturalOrderExpr.Matches(s);


if (mc.Count > 0)
{
max = Math.Max(max, mc.Cast<Match>().Max(m => m.Value.Length));
}
}


return new
{
Key = o,
Value = s
};
}).ToList();


return
selection.OrderByDescending(
o =>
String.IsNullOrWhiteSpace(o.Value) ? o.Value : _NaturalOrderExpr.Replace(o.Value, m => m.Value.PadLeft(max, '0')))
.Select(o => o.Key);
}

这是我对同时包含字母和数字字符的字符串进行排序的代码。

首先,这种扩展方法:

public static IEnumerable<string> AlphanumericSort(this IEnumerable<string> me)
{
return me.OrderBy(x => Regex.Replace(x, @"\d+", m => m.Value.PadLeft(50, '0')));
}

然后,像下面这样在代码中的任何地方使用它:

List<string> test = new List<string>() { "The 1st", "The 12th", "The 2nd" };
test = test.AlphanumericSort();

它是如何工作的? 用零代替:

  Original  | Regex Replace |      The      |   Returned
List    | Apply PadLeft |    Sorting    |     List
|               |               |
"The 1st"  |  "The 001st"  |  "The 001st"  |  "The 1st"
"The 12th" |  "The 012th"  |  "The 002nd"  |  "The 2nd"
"The 2nd"  |  "The 002nd"  |  "The 012th"  |  "The 12th"

使用多个数字:

 Alphabetical Sorting | Alphanumeric Sorting
|
"Page 21, Line 42"   | "Page 3, Line 7"
"Page 21, Line 5"    | "Page 3, Line 32"
"Page 3, Line 32"    | "Page 21, Line 5"
"Page 3, Line 7"     | "Page 21, Line 42"

希望能有所帮助。

下面是一个相对简单的示例,它不使用 P/Invoke,并且在执行期间避免任何分配。

请随意使用这里的代码,或者如果更容易的话,还有一个 NuGet 包:

Https://www.nuget.org/packages/naturalsort

Https://github.com/drewnoakes/natural-sort

internal sealed class NaturalStringComparer : IComparer<string>
{
public static NaturalStringComparer Instance { get; } = new NaturalStringComparer();


public int Compare(string x, string y)
{
// sort nulls to the start
if (x == null)
return y == null ? 0 : -1;
if (y == null)
return 1;


var ix = 0;
var iy = 0;


while (true)
{
// sort shorter strings to the start
if (ix >= x.Length)
return iy >= y.Length ? 0 : -1;
if (iy >= y.Length)
return 1;


var cx = x[ix];
var cy = y[iy];


int result;
if (char.IsDigit(cx) && char.IsDigit(cy))
result = CompareInteger(x, y, ref ix, ref iy);
else
result = cx.CompareTo(y[iy]);


if (result != 0)
return result;


ix++;
iy++;
}
}


private static int CompareInteger(string x, string y, ref int ix, ref int iy)
{
var lx = GetNumLength(x, ix);
var ly = GetNumLength(y, iy);


// shorter number first (note, doesn't handle leading zeroes)
if (lx != ly)
return lx.CompareTo(ly);


for (var i = 0; i < lx; i++)
{
var result = x[ix++].CompareTo(y[iy++]);
if (result != 0)
return result;
}


return 0;
}


private static int GetNumLength(string s, int i)
{
var length = 0;
while (i < s.Length && char.IsDigit(s[i++]))
length++;
return length;
}
}

它不会忽略前导零,所以 01排在 2之后。

相应的单元测试:

public class NumericStringComparerTests
{
[Fact]
public void OrdersCorrectly()
{
AssertEqual("", "");
AssertEqual(null, null);
AssertEqual("Hello", "Hello");
AssertEqual("Hello123", "Hello123");
AssertEqual("123", "123");
AssertEqual("123Hello", "123Hello");


AssertOrdered("", "Hello");
AssertOrdered(null, "Hello");
AssertOrdered("Hello", "Hello1");
AssertOrdered("Hello123", "Hello124");
AssertOrdered("Hello123", "Hello133");
AssertOrdered("Hello123", "Hello223");
AssertOrdered("123", "124");
AssertOrdered("123", "133");
AssertOrdered("123", "223");
AssertOrdered("123", "1234");
AssertOrdered("123", "2345");
AssertOrdered("0", "1");
AssertOrdered("123Hello", "124Hello");
AssertOrdered("123Hello", "133Hello");
AssertOrdered("123Hello", "223Hello");
AssertOrdered("123Hello", "1234Hello");
}


private static void AssertEqual(string x, string y)
{
Assert.Equal(0, NaturalStringComparer.Instance.Compare(x, y));
Assert.Equal(0, NaturalStringComparer.Instance.Compare(y, x));
}


private static void AssertOrdered(string x, string y)
{
Assert.Equal(-1, NaturalStringComparer.Instance.Compare(x, y));
Assert.Equal( 1, NaturalStringComparer.Instance.Compare(y, x));
}
}

我实际上已经在 StringComparer上将它作为一个扩展方法来实现,例如:

  • StringComparer.CurrentCulture.WithNaturalSort()
  • StringComparer.OrdinalIgnoreCase.WithNaturalSort().

由此产生的 IComparer<string>可用于所有场合,如 OrderByOrderByDescendingThenByThenByDescendingSortedSet<string>等。你仍然可以轻松地调整大小写敏感性、文化等等。

该实现相当琐碎,即使在大型序列上也应该能够很好地执行。


我还把它作为一个小型的 NuGet 包发表了,所以你可以这样做:

Install-Package NaturalSort.Extension

包含 XML 文档注释和 一系列的测试的代码在 NaturalSort 扩展 < strong > GitHub 存储库 中可用。


整个代码是这样的(如果你还不能使用 C # 7,只需要安装 NuGet 包) :

public static class StringComparerNaturalSortExtension
{
public static IComparer<string> WithNaturalSort(this StringComparer stringComparer) => new NaturalSortComparer(stringComparer);


private class NaturalSortComparer : IComparer<string>
{
public NaturalSortComparer(StringComparer stringComparer)
{
_stringComparer = stringComparer;
}


private readonly StringComparer _stringComparer;
private static readonly Regex NumberSequenceRegex = new Regex(@"(\d+)", RegexOptions.Compiled | RegexOptions.CultureInvariant);
private static string[] Tokenize(string s) => s == null ? new string[] { } : NumberSequenceRegex.Split(s);
private static ulong ParseNumberOrZero(string s) => ulong.TryParse(s, NumberStyles.None, CultureInfo.InvariantCulture, out var result) ? result : 0;


public int Compare(string s1, string s2)
{
var tokens1 = Tokenize(s1);
var tokens2 = Tokenize(s2);


var zipCompare = tokens1.Zip(tokens2, TokenCompare).FirstOrDefault(x => x != 0);
if (zipCompare != 0)
return zipCompare;


var lengthCompare = tokens1.Length.CompareTo(tokens2.Length);
return lengthCompare;
}
        

private int TokenCompare(string token1, string token2)
{
var number1 = ParseNumberOrZero(token1);
var number2 = ParseNumberOrZero(token2);


var numberCompare = number1.CompareTo(number2);
if (numberCompare != 0)
return numberCompare;


var stringCompare = _stringComparer.Compare(token1, token2);
return stringCompare;
}
}
}

下面是一个简单的单行 regex-less LINQ 方法(借用自 python) :

var alphaStrings = new List<string>() { "10","2","3","4","50","11","100","a12","b12" };
var orderedString = alphaStrings.OrderBy(g => new Tuple<int, string>(g.ToCharArray().All(char.IsDigit)? int.Parse(g) : int.MaxValue, g));
// Order Now: ["2","3","4","10","11","50","100","a12","b12"]

受 Michael Parker 解决方案的启发,这里有一个 IComparer实现,你可以使用任何 linq 订购方法:

private class NaturalStringComparer : IComparer<string>
{
public int Compare(string left, string right)
{
int max = new[] { left, right }
.SelectMany(x => Regex.Matches(x, @"\d+").Cast<Match>().Select(y => (int?)y.Value.Length))
.Max() ?? 0;


var leftPadded = Regex.Replace(left, @"\d+", m => m.Value.PadLeft(max, '0'));
var rightPadded = Regex.Replace(right, @"\d+", m => m.Value.PadLeft(max, '0'));


return string.Compare(leftPadded, rightPadded);
}
}

让我解释一下我的问题,以及我是如何解决它的。

问题:-从目录中检索的 FileInfo 对象中基于 FileName 对文件进行排序。

解决方案:-我从 FileInfo 中选择了文件名,并修剪了“。文件名的一部分。现在,只要做李斯特。Sort () ,按照自然排序顺序对文件名进行排序。根据我的测试,我发现。Png 扰乱了排序顺序。看看下面的代码

var imageNameList = new DirectoryInfo(@"C:\Temp\Images").GetFiles("*.png").Select(x =>x.Name.Substring(0, x.Name.Length - 4)).ToList();
imageNameList.Sort();

一个更容易阅读/维护的版本。

public class NaturalStringComparer : IComparer<string>
{
public static NaturalStringComparer Instance { get; } = new NaturalStringComparer();


public int Compare(string x, string y) {
const int LeftIsSmaller = -1;
const int RightIsSmaller = 1;
const int Equal = 0;


var leftString = x;
var rightString = y;


var stringComparer = CultureInfo.CurrentCulture.CompareInfo;


int rightIndex;
int leftIndex;


for (leftIndex = 0, rightIndex = 0;
leftIndex < leftString.Length && rightIndex < rightString.Length;
leftIndex++, rightIndex++) {
var leftChar = leftString[leftIndex];
var rightChar = rightString[leftIndex];


var leftIsNumber = char.IsNumber(leftChar);
var rightIsNumber = char.IsNumber(rightChar);


if (!leftIsNumber && !rightIsNumber) {
var result = stringComparer.Compare(leftString, leftIndex, 1, rightString, leftIndex, 1);
if (result != 0) return result;
} else if (leftIsNumber && !rightIsNumber) {
return LeftIsSmaller;
} else if (!leftIsNumber && rightIsNumber) {
return RightIsSmaller;
} else {
var leftNumberLength = NumberLength(leftString, leftIndex, out var leftNumber);
var rightNumberLength = NumberLength(rightString, rightIndex, out var rightNumber);


if (leftNumberLength < rightNumberLength) {
return LeftIsSmaller;
} else if (leftNumberLength > rightNumberLength) {
return RightIsSmaller;
} else {
if(leftNumber < rightNumber) {
return LeftIsSmaller;
} else if(leftNumber > rightNumber) {
return RightIsSmaller;
}
}
}
}


if (leftString.Length < rightString.Length) {
return LeftIsSmaller;
} else if(leftString.Length > rightString.Length) {
return RightIsSmaller;
}


return Equal;
}


public int NumberLength(string str, int offset, out int number) {
if (string.IsNullOrWhiteSpace(str)) throw new ArgumentNullException(nameof(str));
if (offset >= str.Length) throw new ArgumentOutOfRangeException(nameof(offset), offset, "Offset must be less than the length of the string.");


var currentOffset = offset;


var curChar = str[currentOffset];


if (!char.IsNumber(curChar))
throw new ArgumentException($"'{curChar}' is not a number.", nameof(offset));


int length = 1;


var numberString = string.Empty;


for (currentOffset = offset + 1;
currentOffset < str.Length;
currentOffset++, length++) {


curChar = str[currentOffset];
numberString += curChar;


if (!char.IsNumber(curChar)) {
number = int.Parse(numberString);


return length;
}
}


number = int.Parse(numberString);


return length;
}
}

这是一个针对.NET Core 2.1 +/. NET 5.0 + 的版本,使用 span 来避免分配

public class NaturalSortStringComparer : IComparer<string>
{
public static NaturalSortStringComparer Ordinal { get; } = new NaturalSortStringComparer(StringComparison.Ordinal);
public static NaturalSortStringComparer OrdinalIgnoreCase { get; } = new NaturalSortStringComparer(StringComparison.OrdinalIgnoreCase);
public static NaturalSortStringComparer CurrentCulture { get; } = new NaturalSortStringComparer(StringComparison.CurrentCulture);
public static NaturalSortStringComparer CurrentCultureIgnoreCase { get; } = new NaturalSortStringComparer(StringComparison.CurrentCultureIgnoreCase);
public static NaturalSortStringComparer InvariantCulture { get; } = new NaturalSortStringComparer(StringComparison.InvariantCulture);
public static NaturalSortStringComparer InvariantCultureIgnoreCase { get; } = new NaturalSortStringComparer(StringComparison.InvariantCultureIgnoreCase);


private readonly StringComparison _comparison;


public NaturalSortStringComparer(StringComparison comparison)
{
_comparison = comparison;
}


public int Compare(string x, string y)
{
// Let string.Compare handle the case where x or y is null
if (x is null || y is null)
return string.Compare(x, y, _comparison);


var xSegments = GetSegments(x);
var ySegments = GetSegments(y);


while (xSegments.MoveNext() && ySegments.MoveNext())
{
int cmp;


// If they're both numbers, compare the value
if (xSegments.CurrentIsNumber && ySegments.CurrentIsNumber)
{
var xValue = long.Parse(xSegments.Current);
var yValue = long.Parse(ySegments.Current);
cmp = xValue.CompareTo(yValue);
if (cmp != 0)
return cmp;
}
// If x is a number and y is not, x is "lesser than" y
else if (xSegments.CurrentIsNumber)
{
return -1;
}
// If y is a number and x is not, x is "greater than" y
else if (ySegments.CurrentIsNumber)
{
return 1;
}


// OK, neither are number, compare the segments as text
cmp = xSegments.Current.CompareTo(ySegments.Current, _comparison);
if (cmp != 0)
return cmp;
}


// At this point, either all segments are equal, or one string is shorter than the other


// If x is shorter, it's "lesser than" y
if (x.Length < y.Length)
return -1;
// If x is longer, it's "greater than" y
if (x.Length > y.Length)
return 1;


// If they have the same length, they're equal
return 0;
}


private static StringSegmentEnumerator GetSegments(string s) => new StringSegmentEnumerator(s);


private struct StringSegmentEnumerator
{
private readonly string _s;
private int _start;
private int _length;


public StringSegmentEnumerator(string s)
{
_s = s;
_start = -1;
_length = 0;
CurrentIsNumber = false;
}


public ReadOnlySpan<char> Current => _s.AsSpan(_start, _length);
        

public bool CurrentIsNumber { get; private set; }


public bool MoveNext()
{
var currentPosition = _start >= 0
? _start + _length
: 0;


if (currentPosition >= _s.Length)
return false;


int start = currentPosition;
bool isFirstCharDigit = Char.IsDigit(_s[currentPosition]);


while (++currentPosition < _s.Length && Char.IsDigit(_s[currentPosition]) == isFirstCharDigit)
{
}


_start = start;
_length = currentPosition - start;
CurrentIsNumber = isFirstCharDigit;


return true;
}
}
}