用 LINQ 将集合拆分成 n 个部分?

有没有一个很好的方法来分割一个集合到 n的部分与 LINQ? 当然不一定是平均的。

也就是说,我想将集合划分为子集合,每个子集合包含元素的一个子集,最后一个集合可以是不规则的。

90938 次浏览

编辑: 好吧,看来我误解了这个问题。我把它读作“长度为 n 的片段”而不是“ n 片段”。噢!考虑到删除答案..。

(原答案)

我不相信有一种内置的分区方式,尽管我打算在我的 LINQtoObjects 附件集中写一个。Marc Gravell 有一个 implementation here,尽管我可能会修改它以返回一个只读视图:

public static IEnumerable<IEnumerable<T>> Partition<T>
(this IEnumerable<T> source, int size)
{
T[] array = null;
int count = 0;
foreach (T item in source)
{
if (array == null)
{
array = new T[size];
}
array[count] = item;
count++;
if (count == size)
{
yield return new ReadOnlyCollection<T>(array);
array = null;
count = 0;
}
}
if (array != null)
{
Array.Resize(ref array, count);
yield return new ReadOnlyCollection<T>(array);
}
}

如果这些部分的顺序不是很重要,你可以试试这个:

int[] array = new int[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
int n = 3;


var result =
array.Select((value, index) => new { Value = value, Index = index }).GroupBy(i => i.Index % n, i => i.Value);


// or
var result2 =
from i in array.Select((value, index) => new { Value = value, Index = index })
group i.Value by i.Index % n into g
select g;

然而,由于某些原因,它们不能被强制转换为 IEnumable < IEnumable < int > ..。

纯 linq 和最简单的解决方案如下所示。

static class LinqExtensions
{
public static IEnumerable<IEnumerable<T>> Split<T>(this IEnumerable<T> list, int parts)
{
int i = 0;
var splits = from item in list
group item by i++ % parts into part
select part.AsEnumerable();
return splits;
}
}
int[] items = new int[] { 0,1,2,3,4,5,6,7,8,9, 10 };


int itemIndex = 0;
int groupSize = 2;
int nextGroup = groupSize;


var seqItems = from aItem in items
group aItem by
(itemIndex++ < nextGroup)
?
nextGroup / groupSize
:
(nextGroup += groupSize) / groupSize
into itemGroup
select itemGroup.AsEnumerable();

这是我的代码,简明扼要。

 <Extension()> Public Function Chunk(Of T)(ByVal this As IList(Of T), ByVal size As Integer) As List(Of List(Of T))
Dim result As New List(Of List(Of T))
For i = 0 To CInt(Math.Ceiling(this.Count / size)) - 1
result.Add(New List(Of T)(this.GetRange(i * size, Math.Min(size, this.Count - (i * size)))))
Next
Return result
End Function

我用这个:

public static IEnumerable<IEnumerable<T>> Partition<T>(this IEnumerable<T> instance, int partitionSize)
{
return instance
.Select((value, index) => new { Index = index, Value = value })
.GroupBy(i => i.Index / partitionSize)
.Select(i => i.Select(i2 => i2.Value));
}

I have been using the Partition function I posted earlier quite often. The only bad thing about it was that is wasn't completely streaming. This is not a problem if you work with few elements in your sequence. I needed a new solution when i started working with 100.000+ elements in my sequence.

下面的解决方案要复杂得多(代码也更多!) ,但是它非常有效。

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Collections;


namespace LuvDaSun.Linq
{
public static class EnumerableExtensions
{
public static IEnumerable<IEnumerable<T>> Partition<T>(this IEnumerable<T> enumerable, int partitionSize)
{
/*
return enumerable
.Select((item, index) => new { Item = item, Index = index, })
.GroupBy(item => item.Index / partitionSize)
.Select(group => group.Select(item => item.Item)                )
;
*/


return new PartitioningEnumerable<T>(enumerable, partitionSize);
}


}




class PartitioningEnumerable<T> : IEnumerable<IEnumerable<T>>
{
IEnumerable<T> _enumerable;
int _partitionSize;
public PartitioningEnumerable(IEnumerable<T> enumerable, int partitionSize)
{
_enumerable = enumerable;
_partitionSize = partitionSize;
}


public IEnumerator<IEnumerable<T>> GetEnumerator()
{
return new PartitioningEnumerator<T>(_enumerable.GetEnumerator(), _partitionSize);
}


IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
}




class PartitioningEnumerator<T> : IEnumerator<IEnumerable<T>>
{
IEnumerator<T> _enumerator;
int _partitionSize;
public PartitioningEnumerator(IEnumerator<T> enumerator, int partitionSize)
{
_enumerator = enumerator;
_partitionSize = partitionSize;
}


public void Dispose()
{
_enumerator.Dispose();
}


IEnumerable<T> _current;
public IEnumerable<T> Current
{
get { return _current; }
}
object IEnumerator.Current
{
get { return _current; }
}


public void Reset()
{
_current = null;
_enumerator.Reset();
}


public bool MoveNext()
{
bool result;


if (_enumerator.MoveNext())
{
_current = new PartitionEnumerable<T>(_enumerator, _partitionSize);
result = true;
}
else
{
_current = null;
result = false;
}


return result;
}


}






class PartitionEnumerable<T> : IEnumerable<T>
{
IEnumerator<T> _enumerator;
int _partitionSize;
public PartitionEnumerable(IEnumerator<T> enumerator, int partitionSize)
{
_enumerator = enumerator;
_partitionSize = partitionSize;
}


public IEnumerator<T> GetEnumerator()
{
return new PartitionEnumerator<T>(_enumerator, _partitionSize);
}


IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
}




class PartitionEnumerator<T> : IEnumerator<T>
{
IEnumerator<T> _enumerator;
int _partitionSize;
int _count;
public PartitionEnumerator(IEnumerator<T> enumerator, int partitionSize)
{
_enumerator = enumerator;
_partitionSize = partitionSize;
}


public void Dispose()
{
}


public T Current
{
get { return _enumerator.Current; }
}
object IEnumerator.Current
{
get { return _enumerator.Current; }
}
public void Reset()
{
if (_count > 0) throw new InvalidOperationException();
}


public bool MoveNext()
{
bool result;


if (_count < _partitionSize)
{
if (_count > 0)
{
result = _enumerator.MoveNext();
}
else
{
result = true;
}
_count++;
}
else
{
result = false;
}


return result;
}


}
}

好好享受吧!

好吧,我也加入进来。我的算法的优点是:

  1. 没有昂贵的乘法、除法或模运算符
  2. 所有操作都是 O (1)(见下面的说明)
  3. 适用于 IEnumable < > source (不需要 Count 属性)
  4. 很简单

密码:

public static IEnumerable<IEnumerable<T>>
Section<T>(this IEnumerable<T> source, int length)
{
if (length <= 0)
throw new ArgumentOutOfRangeException("length");


var section = new List<T>(length);


foreach (var item in source)
{
section.Add(item);


if (section.Count == length)
{
yield return section.AsReadOnly();
section = new List<T>(length);
}
}


if (section.Count > 0)
yield return section.AsReadOnly();
}

正如下面的评论中指出的那样,这种方法实际上并没有解决最初的问题,即要求固定长度大致相等的章节数量。也就是说,你仍然可以用我的方法来解决最初的问题,你可以这样称呼它:

myEnum.Section(myEnum.Count() / number_of_sections + 1)

以这种方式使用时,该方法不再是 O (1) ,因为 Count ()操作是 O (N)。

Interesting thread. To get a streaming version of Split/Partition, one can use enumerators and yield sequences from the enumerator using extension methods. Converting imperative code to functional code using yield is a very powerful technique indeed.

First an enumerator extension that turns a count of elements into a lazy sequence:

public static IEnumerable<T> TakeFromCurrent<T>(this IEnumerator<T> enumerator, int count)
{
while (count > 0)
{
yield return enumerator.Current;
if (--count > 0 && !enumerator.MoveNext()) yield break;
}
}

然后是一个可枚举扩展,它分割一个序列:

public static IEnumerable<IEnumerable<T>> Partition<T>(this IEnumerable<T> seq, int partitionSize)
{
var enumerator = seq.GetEnumerator();


while (enumerator.MoveNext())
{
yield return enumerator.TakeFromCurrent(partitionSize);
}
}

最终的结果是一个依赖于非常简单的代码的高效、流化和惰性的实现。

Enjoy!

static class LinqExtensions
{
public static IEnumerable<IEnumerable<T>> Split<T>(this IEnumerable<T> list, int parts)
{
return list.Select((item, index) => new {index, item})
.GroupBy(x => x.index % parts)
.Select(x => x.Select(y => y.item));
}
}

This is memory efficient and defers execution as much as possible (per batch) and operates in linear time O(n)

    public static IEnumerable<IEnumerable<T>> InBatchesOf<T>(this IEnumerable<T> items, int batchSize)
{
List<T> batch = new List<T>(batchSize);
foreach (var item in items)
{
batch.Add(item);


if (batch.Count >= batchSize)
{
yield return batch;
batch = new List<T>();
}
}


if (batch.Count != 0)
{
//can't be batch size or would've yielded above
batch.TrimExcess();
yield return batch;
}
}

这与公认的答案相同,但更简单的表述是:

public static IEnumerable<IEnumerable<T>> Split<T>(this IEnumerable<T> items,
int numOfParts)
{
int i = 0;
return items.GroupBy(x => i++ % numOfParts);
}

上述方法将 IEnumerable<T>分成 N 个大小相同或接近相同的块。

public static IEnumerable<IEnumerable<T>> Partition<T>(this IEnumerable<T> items,
int partitionSize)
{
int i = 0;
return items.GroupBy(x => i++ / partitionSize).ToArray();
}

上面的方法将 IEnumerable<T>分割成所需的固定大小的块,块的总数并不重要——这不是问题所在。

Split方法的问题在于,除了速度较慢之外,它还扰乱了输出,因为分组将以每个位置的第九个 N 的倍数为基础,或者换句话说,你不能按照原来的顺序得到块。

这里的几乎所有答案要么不能保持秩序,要么只是关于分区而不是分割,要么就是完全错误的。试试这个更快的,保持秩序但是更加冗长的方法:

public static IEnumerable<IEnumerable<T>> Split<T>(this ICollection<T> items,
int numberOfChunks)
{
if (numberOfChunks <= 0 || numberOfChunks > items.Count)
throw new ArgumentOutOfRangeException("numberOfChunks");


int sizePerPacket = items.Count / numberOfChunks;
int extra = items.Count % numberOfChunks;


for (int i = 0; i < numberOfChunks - extra; i++)
yield return items.Skip(i * sizePerPacket).Take(sizePerPacket);


int alreadyReturnedCount = (numberOfChunks - extra) * sizePerPacket;
int toReturnCount = extra == 0 ? 0 : (items.Count - numberOfChunks) / extra + 1;
for (int i = 0; i < extra; i++)
yield return items.Skip(alreadyReturnedCount + i * toReturnCount).Take(toReturnCount);
}

Partition操作 给你的等效方法

刚好碰到这个线程,这里的大多数解决方案都涉及到向集合中添加条目,在返回之前有效地实现每个页面。这是不好的两个原因-首先,如果您的页面很大,有一个内存开销填充页面,其次有迭代器,使以前的记录无效,当您前进到下一个(例如,如果您包装一个数据读取器在一个枚举器方法)。

此解决方案使用两个嵌套枚举器方法,以避免将项缓存到临时集合中的任何需要。由于外部迭代器和内部迭代器遍历相同的枚举器,因此它们必须共享相同的枚举器,所以在处理完当前页面之前不要推进外部的枚举器非常重要。也就是说,如果您决定不迭代当前页面的所有过程,当您移动到下一个页面时,这个解决方案将自动迭代到页面边界。

using System.Collections.Generic;


public static class EnumerableExtensions
{
/// <summary>
/// Partitions an enumerable into individual pages of a specified size, still scanning the source enumerable just once
/// </summary>
/// <typeparam name="T">The element type</typeparam>
/// <param name="enumerable">The source enumerable</param>
/// <param name="pageSize">The number of elements to return in each page</param>
/// <returns></returns>
public static IEnumerable<IEnumerable<T>> Partition<T>(this IEnumerable<T> enumerable, int pageSize)
{
var enumerator = enumerable.GetEnumerator();


while (enumerator.MoveNext())
{
var indexWithinPage = new IntByRef { Value = 0 };


yield return SubPartition(enumerator, pageSize, indexWithinPage);


// Continue iterating through any remaining items in the page, to align with the start of the next page
for (; indexWithinPage.Value < pageSize; indexWithinPage.Value++)
{
if (!enumerator.MoveNext())
{
yield break;
}
}
}
}


private static IEnumerable<T> SubPartition<T>(IEnumerator<T> enumerator, int pageSize, IntByRef index)
{
for (; index.Value < pageSize; index.Value++)
{
yield return enumerator.Current;


if (!enumerator.MoveNext())
{
yield break;
}
}
}


private class IntByRef
{
public int Value { get; set; }
}
}

这就是我的方法,列出项目并逐列中断行

  int repat_count=4;


arrItems.ForEach((x, i) => {
if (i % repat_count == 0)
row = tbo.NewElement(el_tr, cls_min_height);
var td = row.NewElement(el_td);
td.innerHTML = x.Name;
});
   protected List<List<int>> MySplit(int MaxNumber, int Divider)
{
List<List<int>> lst = new List<List<int>>();
int ListCount = 0;
int d = MaxNumber / Divider;
lst.Add(new List<int>());
for (int i = 1; i <= MaxNumber; i++)
{
lst[ListCount].Add(i);
if (i != 0 && i % d == 0)
{
ListCount++;
d += MaxNumber / Divider;
lst.Add(new List<int>());
}
}
return lst;
}

我在寻找一个类似于字符串的分割,所以整个 List 是根据一些规则来分割的,不仅仅是第一部分,这是我的解决方案

List<int> sequence = new List<int>();
for (int i = 0; i < 2000; i++)
{
sequence.Add(i);
}
int splitIndex = 900;
List<List<int>> splitted = new List<List<int>>();
while (sequence.Count != 0)
{
splitted.Add(sequence.Take(splitIndex).ToList() );
sequence.RemoveRange(0, Math.Min(splitIndex, sequence.Count));
}

Here is a little tweak for the number of items instead of the number of parts:

public static class MiscExctensions
{
public static IEnumerable<IEnumerable<T>> Split<T>(this IEnumerable<T> list, int nbItems)
{
return (
list
.Select((o, n) => new { o, n })
.GroupBy(g => (int)(g.n / nbItems))
.Select(g => g.Select(x => x.o))
);
}
}

对于这个问题(以及它的表亲)有许多很棒的答案。我自己也需要这样做,并且创建了一个解决方案,该解决方案的设计目标是在源集合可以作为列表处理的场景中提高效率和容错性。它不使用任何延迟迭代,因此它可能不适合可能应用内存压力的未知大小的集合。

static public IList<T[]> GetChunks<T>(this IEnumerable<T> source, int batchsize)
{
IList<T[]> result = null;
if (source != null && batchsize > 0)
{
var list = source as List<T> ?? source.ToList();
if (list.Count > 0)
{
result = new List<T[]>();
for (var index = 0; index < list.Count; index += batchsize)
{
var rangesize = Math.Min(batchsize, list.Count - index);
result.Add(list.GetRange(index, rangesize).ToArray());
}
}
}
return result ?? Enumerable.Empty<T[]>().ToList();
}


static public void TestGetChunks()
{
var ids = Enumerable.Range(1, 163).Select(i => i.ToString());
foreach (var chunk in ids.GetChunks(20))
{
Console.WriteLine("[{0}]", String.Join(",", chunk));
}
}

在这个使用 GetRange 和 Math 的问题系列中,我看到了一些答案。阿敏。但是我相信总的来说,这是一个在错误检查和效率方面更加完整的解决方案。

伟大的答案,对于我的情况下,我测试了接受的答案,似乎不保持秩序。也有伟大的回答 Nawfal 保持秩序。 但在我的场景中,我想用正规化的方法分割余数, 我看到的所有答案都把余数或者开头或者结尾分散开来。

我的答案还需要余数以更规范的方式扩展。

 static class Program
{
static void Main(string[] args)
{
var input = new List<String>();
for (int k = 0; k < 18; ++k)
{
input.Add(k.ToString());
}
var result = splitListIntoSmallerLists(input, 15);
int i = 0;
foreach(var resul in result){
Console.WriteLine("------Segment:" + i.ToString() + "--------");
foreach(var res in resul){
Console.WriteLine(res);
}
i++;
}
Console.ReadLine();
}


private static List<List<T>> splitListIntoSmallerLists<T>(List<T> i_bigList,int i_numberOfSmallerLists)
{
if (i_numberOfSmallerLists <= 0)
throw new ArgumentOutOfRangeException("Illegal value of numberOfSmallLists");


int normalizedSpreadRemainderCounter = 0;
int normalizedSpreadNumber = 0;
//e.g 7 /5 > 0 ==> output size is 5 , 2 /5 < 0 ==> output is 2
int minimumNumberOfPartsInEachSmallerList = i_bigList.Count / i_numberOfSmallerLists;
int remainder = i_bigList.Count % i_numberOfSmallerLists;
int outputSize = minimumNumberOfPartsInEachSmallerList > 0 ? i_numberOfSmallerLists : remainder;
//In case remainder > 0 we want to spread the remainder equally between the others
if (remainder > 0)
{
if (minimumNumberOfPartsInEachSmallerList > 0)
{
normalizedSpreadNumber = (int)Math.Floor((double)i_numberOfSmallerLists / remainder);
}
else
{
normalizedSpreadNumber = 1;
}
}
List<List<T>> retVal = new List<List<T>>(outputSize);
int inputIndex = 0;
for (int i = 0; i < outputSize; ++i)
{
retVal.Add(new List<T>());
if (minimumNumberOfPartsInEachSmallerList > 0)
{
retVal[i].AddRange(i_bigList.GetRange(inputIndex, minimumNumberOfPartsInEachSmallerList));
inputIndex += minimumNumberOfPartsInEachSmallerList;
}
//If we have remainder take one from it, if our counter is equal to normalizedSpreadNumber.
if (remainder > 0)
{
if (normalizedSpreadRemainderCounter == normalizedSpreadNumber-1)
{
retVal[i].Add(i_bigList[inputIndex]);
remainder--;
inputIndex++;
normalizedSpreadRemainderCounter=0;
}
else
{
normalizedSpreadRemainderCounter++;
}
}
}
return retVal;
}


}

below code returns both given number of chunks also with sorted data

    static IEnumerable<IEnumerable<T>> SplitSequentially<T>(int chunkParts, List<T> inputList)
{
List<int> Splits = split(inputList.Count, chunkParts);


var skipNumber = 0;
List<List<T>> list = new List<List<T>>();
foreach (var count in Splits)
{
var internalList = inputList.Skip(skipNumber).Take(count).ToList();
list.Add(internalList);
skipNumber += count;
}
return list;
}
static List<int> split(int x, int n)
{
List<int> list = new List<int>();


if (x % n == 0)
{
for (int i = 0; i < n; i++)
list.Add(x / n);
}
else
{


// upto n-(x % n) the values
// will be x / n
// after that the values
// will be x / n + 1
int zp = n - (x % n);
int pp = x / n;
for (int i = 0; i < n; i++)
{


if (i >= zp)
list.Add((pp + 1));
else
list.Add(pp);
}
}
return list;
}