def chunk(lst):out = []for x in xrange(2, len(lst) + 1):if not len(lst) % x:factor = len(lst) / xbreakwhile lst:out.append([lst.pop(0) for x in xrange(factor)])return out
def chunks(iterable,n):"""assumes n is an integer>0"""iterable=iter(iterable)while True:result=[]for i in range(n):try:a=next(iterable)except StopIteration:breakelse:result.append(a)if result:yield resultelse:break
g1=(i*i for i in range(10))g2=chunks(g1,3)print g2'<generator object chunks at 0x0337B9B8>'print list(g2)'[[0, 1, 4], [9, 16, 25], [36, 49, 64], [81]]'
In [259]: get_in_chunks = lambda itr,n: ( (v for _,v in g) for _,g in itertools.groupby(enumerate(itr),lambda (ind,_): ind/n))
In [260]: list(list(x) for x in get_in_chunks(range(30),7))Out[260]:[[0, 1, 2, 3, 4, 5, 6],[7, 8, 9, 10, 11, 12, 13],[14, 15, 16, 17, 18, 19, 20],[21, 22, 23, 24, 25, 26, 27],[28, 29]]
from typing import List, Any
def slice_baskets(items: List[Any], maxbaskets: int) -> List[List[Any]]:n_baskets = min(maxbaskets, len(items))return [items[i::n_baskets] for i in range(n_baskets)]
from itertools import islicefrom typing import List, Any, Generator
def yield_islice_baskets(items: List[Any], maxbaskets: int) -> Generator[List[Any], None, None]:n_baskets = min(maxbaskets, len(items))for i in range(n_baskets):yield islice(items, i, None, n_baskets)
查看结果:
from pprint import pprint
items = list(range(10, 75))pprint(cycle_baskets(items, 10))pprint(slice_baskets(items, 10))pprint([list(s) for s in yield_islice_baskets(items, 10)])
更新先前的解决方案
这是另一个平衡解决方案,改编自我过去在生产中使用的函数,它使用模运算符:
def baskets_from(items, maxbaskets=25):baskets = [[] for _ in range(maxbaskets)]for i, item in enumerate(items):baskets[i % maxbaskets].append(item)return filter(None, baskets)
我创建了一个生成器,如果你把它放在一个列表中,它也会做同样的事情:
def iter_baskets_from(items, maxbaskets=3):'''generates evenly balanced baskets from indexable iterable'''item_count = len(items)baskets = min(item_count, maxbaskets)for x_i in range(baskets):yield [items[y_i] for y_i in range(x_i, item_count, baskets)]
最后,因为我看到上面所有的函数都以连续的顺序返回元素(就像给出的那样):
def iter_baskets_contiguous(items, maxbaskets=3, item_count=None):'''generates balanced baskets from iterable, contiguous contentsprovide item_count if providing a iterator that doesn't support len()'''item_count = item_count or len(items)baskets = min(item_count, maxbaskets)items = iter(items)floor = item_count // basketsceiling = floor + 1stepdown = item_count % basketsfor x_i in range(baskets):length = ceiling if x_i < stepdown else flooryield [items.next() for _ in range(length)]
def nChunks(l, n):""" Yield n successive chunks from l.Works for lists, pandas dataframes, etc"""newn = int(1.0 * len(l) / n + 0.5)for i in xrange(0, n-1):yield l[i*newn:i*newn+newn]yield l[n*newn-newn:]
def chunkList(initialList, chunkSize):"""This function chunks a list into sub liststhat have a length equals to chunkSize.
Example:lst = [3, 4, 9, 7, 1, 1, 2, 3]print(chunkList(lst, 3))returns[[3, 4, 9], [7, 1, 1], [2, 3]]"""finalList = []for i in range(0, len(initialList), chunkSize):finalList.append(initialList[i:i+chunkSize])return finalList
def chunked(iterable, size):stop = []it = iter(iterable)def _next_chunk():try:for _ in xrange(size):yield next(it)except StopIteration:stop.append(True)return
while not stop:yield _next_chunk()
for it in chunked(xrange(16), 4):print list(it)
def chunks(li, n):if li == []:returnyield li[:n]for e in chunks(li[n:], n):yield e
在python 3中:
def chunks(li, n):if li == []:returnyield li[:n]yield from chunks(li[n:], n)
此外,在大规模外星人入侵的情况下,装饰递归生成器可能会变得方便:
def dec(gen):def new_gen(li, n):for e in gen(li, n):if e == []:returnyield ereturn new_gen
@decdef chunks(li, n):yield li[:n]for e in chunks(li[n:], n):yield e
from __future__ import division # not needed in Python 3def n_even_chunks(l, n):"""Yield n as even chunks as possible from l."""last = 0for i in range(1, n+1):cur = int(round(i * (len(l) / n)))yield l[last:cur]last = cur
def paged_iter(iterat, n):itr = iter(iterat)deq = Nonetry:while(True):deq = collections.deque(maxlen=n)for q in range(n):deq.append(next(itr))yield (i for i in deq)except StopIteration:yield (i for i in deq)
import itertoolsdef split_groups(iter_in, group_size):return ((x for _, x in item) for _, item in itertools.groupby(enumerate(iter_in), key=lambda x: x[0] // group_size))
def chunks(iterable, n):"""Yield successive n-sized chunks from iterable."""values = []for i, item in enumerate(iterable, 1):values.append(item)if i % n == 0:yield valuesvalues = []if values:yield values
import timebatch_size = 7arr_len = 298937
#---------slice-------------
print("\r\nslice")start = time.time()arr = [i for i in range(0, arr_len)]while True:if not arr:break
tmp = arr[0:batch_size]arr = arr[batch_size:-1]print(time.time() - start)
#-----------index-----------
print("\r\nindex")arr = [i for i in range(0, arr_len)]start = time.time()for i in range(0, round(len(arr) / batch_size + 1)):tmp = arr[batch_size * i : batch_size * (i + 1)]print(time.time() - start)
#----------batches 1------------
def batch(iterable, n=1):l = len(iterable)for ndx in range(0, l, n):yield iterable[ndx:min(ndx + n, l)]
print("\r\nbatches 1")arr = [i for i in range(0, arr_len)]start = time.time()for x in batch(arr, batch_size):tmp = xprint(time.time() - start)
#----------batches 2------------
from itertools import islice, chain
def batch(iterable, size):sourceiter = iter(iterable)while True:batchiter = islice(sourceiter, size)yield chain([next(batchiter)], batchiter)
print("\r\nbatches 2")arr = [i for i in range(0, arr_len)]start = time.time()for x in batch(arr, batch_size):tmp = xprint(time.time() - start)
#---------chunks-------------def chunks(l, n):"""Yield successive n-sized chunks from l."""for i in range(0, len(l), n):yield l[i:i + n]print("\r\nchunks")arr = [i for i in range(0, arr_len)]start = time.time()for x in chunks(arr, batch_size):tmp = xprint(time.time() - start)
#-----------grouper-----------
from itertools import zip_longest # for Python 3.x#from six.moves import zip_longest # for both (uses the six compat library)
def grouper(iterable, n, padvalue=None):"grouper(3, 'abcdefg', 'x') --> ('a','b','c'), ('d','e','f'), ('g','x','x')"return zip_longest(*[iter(iterable)]*n, fillvalue=padvalue)
arr = [i for i in range(0, arr_len)]print("\r\ngrouper")start = time.time()for x in grouper(arr, batch_size):tmp = xprint(time.time() - start)
def proportional_dividing(N, n):"""N - length of array (bigger number)n - number of chunks (smaller number)output - arr, containing N numbers, diveded roundly to n chunks"""arr = []if N == 0:return arrelif n == 0:arr.append(N)return arrr = N // nfor i in range(n-1):arr.append(r)arr.append(N-r*(n-1))
last_n = arr[-1]# last number always will be r <= last_n < 2*r# when last_n == r it's ok, but when last_n > r ...if last_n > r:# ... and if difference too big (bigger than 1), thenif abs(r-last_n) > 1:#[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 7] # N=29, n=12# we need to give unnecessary numbers to first elements backdiff = last_n - rfor k in range(diff):arr[k] += 1arr[-1] = r# and we receive [3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2]return arr
def split_items(items, chunks):arr = proportional_dividing(len(items), chunks)splitted = []for chunk_size in arr:splitted.append(items[:chunk_size])items = items[chunk_size:]print(splitted)return splitted
items = [1,2,3,4,5,6,7,8,9,10,11]chunks = 3split_items(items, chunks)split_items(['a','b','c','d','e','f','g','h','i','g','k','l', 'm'], 3)split_items(['a','b','c','d','e','f','g','h','i','g','k','l', 'm', 'n'], 3)split_items(range(100), 4)split_items(range(99), 4)split_items(range(101), 4)
def chunks(iterable, size):
"""
Yield successive chunks from iterable, being `size` long.
https://stackoverflow.com/a/55776536/3423324
:param iterable: The object you want to split into pieces.
:param size: The size each of the resulting pieces should have.
"""
i = 0
while True:
sliced = iterable[i:i + size]
if len(sliced) == 0:
# to suppress stuff like `range(max, max)`.
break
# end if
yield sliced
if len(sliced) < size:
# our slice is not the full length, so we must have passed the end of the iterator
break
# end if
i += size # so we start the next chunk at the right place.
# end while
# end def
> from itertools import groupby
> batch_no = 3
> data = 'abcdefgh'
> [
[x[1] for x in x[1]]
for x in
groupby(
sorted(
(x[0] % batch_no, x[1])
for x in
enumerate(data)
),
key=lambda x: x[0]
)
]
[['a', 'd', 'g'], ['b', 'e', 'h'], ['c', 'f']]
comb = lambda s,n: (s[i:i+n] for i in range(0,len(s),n))
然后你可以这样称呼它:
some_list = list(range(0, 20)) # creates a list of 20 elements
generator = comb(some_list, 4) # creates a generator that will generate lists of 4 elements
for sublist in generator:
print(sublist) # prints a sublist of four elements, as it's generated
当然,您不必将生成器分配给变量; 您可以像下面这样直接遍历它:
for sublist in comb(some_list, 4):
print(sublist) # prints a sublist of four elements, as it's generated
def chunks(g, n):
"""divide a generator 'g' into small chunks
Yields:
a chunk that has 'n' or less items
"""
n = max(1, n)
buff = []
for item in g:
buff.append(item)
if len(buff) == n:
yield buff
buff = []
if buff:
yield buff
from enum import Enum
class PartialChunkOptions(Enum):
INCLUDE = 0
EXCLUDE = 1
PAD = 2
ERROR = 3
class PartialChunkException(Exception):
pass
def chunker(iterable, n, on_partial=PartialChunkOptions.INCLUDE, pad=None):
"""
A chunker yielding n-element lists from an iterable, with various options
about what to do about a partial chunk at the end.
on_partial=PartialChunkOptions.INCLUDE (the default):
include the partial chunk as a short (<n) element list
on_partial=PartialChunkOptions.EXCLUDE
do not include the partial chunk
on_partial=PartialChunkOptions.PAD
pad to an n-element list
(also pass pad=<pad_value>, default None)
on_partial=PartialChunkOptions.ERROR
raise a RuntimeError if a partial chunk is encountered
"""
on_partial = PartialChunkOptions(on_partial)
iterator = iter(iterable)
while True:
vals = []
for i in range(n):
try:
vals.append(next(iterator))
except StopIteration:
if vals:
if on_partial == PartialChunkOptions.INCLUDE:
yield vals
elif on_partial == PartialChunkOptions.EXCLUDE:
pass
elif on_partial == PartialChunkOptions.PAD:
yield vals + [pad] * (n - len(vals))
elif on_partial == PartialChunkOptions.ERROR:
raise PartialChunkException
return
return
yield vals
test.py
import chunker
chunk_size = 3
for it in (range(100, 107),
range(100, 109)):
print("\nITERABLE TO CHUNK: {}".format(it))
print("CHUNK SIZE: {}".format(chunk_size))
for option in chunker.PartialChunkOptions.__members__.values():
print("\noption {} used".format(option))
try:
for chunk in chunker.chunker(it, chunk_size, on_partial=option):
print(chunk)
except chunker.PartialChunkException:
print("PartialChunkException was raised")
print("")
test.py输出
ITERABLE TO CHUNK: range(100, 107)
CHUNK SIZE: 3
option PartialChunkOptions.INCLUDE used
[100, 101, 102]
[103, 104, 105]
[106]
option PartialChunkOptions.EXCLUDE used
[100, 101, 102]
[103, 104, 105]
option PartialChunkOptions.PAD used
[100, 101, 102]
[103, 104, 105]
[106, None, None]
option PartialChunkOptions.ERROR used
[100, 101, 102]
[103, 104, 105]
PartialChunkException was raised
ITERABLE TO CHUNK: range(100, 109)
CHUNK SIZE: 3
option PartialChunkOptions.INCLUDE used
[100, 101, 102]
[103, 104, 105]
[106, 107, 108]
option PartialChunkOptions.EXCLUDE used
[100, 101, 102]
[103, 104, 105]
[106, 107, 108]
option PartialChunkOptions.PAD used
[100, 101, 102]
[103, 104, 105]
[106, 107, 108]
option PartialChunkOptions.ERROR used
[100, 101, 102]
[103, 104, 105]
[106, 107, 108]
chunk_lists = lambda it, n: (l for l in ([],) for i, g in enumerate((it, ((),))) for e in g for l in (l[:len(l) % n] + [e][:1 - i],) if (len(l) % n == 0) != i)
import math
# length of the list len(lst) is ln
# size of a chunk is size
for num in range ( math.ceil(ln/size) ):
start, end = num*size, min((num+1)*size, ln)
print(lst[start:end])
from itertools import islice, izip_longest
def batched(iterable, n):
"Batch data into lists of length n. The last batch may be shorter."
# batched('ABCDEFG', 3) --> ABC DEF G
it = iter(iterable)
while True:
batch = list(islice(it, n))
if not batch:
return
yield batch
def grouper(iterable, n, *, incomplete='fill', fillvalue=None):
"Collect data into non-overlapping fixed-length chunks or blocks"
# grouper('ABCDEFG', 3, fillvalue='x') --> ABC DEF Gxx
# grouper('ABCDEFG', 3, incomplete='strict') --> ABC DEF ValueError
# grouper('ABCDEFG', 3, incomplete='ignore') --> ABC DEF
args = [iter(iterable)] * n
if incomplete == 'fill':
return zip_longest(*args, fillvalue=fillvalue)
if incomplete == 'strict':
return zip(*args, strict=True)
if incomplete == 'ignore':
return zip(*args)
else:
raise ValueError('Expected fill, strict, or ignore')