>>> def diff(first, second):
second = set(second)
return [item for item in first if item not in second]
>>> diff(A, B)
[1, 3, 4]
>>> diff(B, A)
[5]
>>>
def diff(a, b):
b = set(b)
return [aa for aa in a if aa not in b]
def set_diff(a, b):
return list(set(a) - set(b))
diff_lamb_hension = lambda l1,l2: [x for x in l1 if x not in l2]
diff_lamb_filter = lambda l1,l2: filter(lambda x: x not in l2, l1)
from difflib import SequenceMatcher
def squeezer(a, b):
squeeze = SequenceMatcher(None, a, b)
return reduce(lambda p,q: p+q, map(
lambda t: squeeze.a[t[1]:t[2]],
filter(lambda x:x[0]!='equal',
squeeze.get_opcodes())))
结果:
# Small
a = range(10)
b = range(10/2)
timeit[diff(a, b)]
100000 loops, best of 3: 1.97 µs per loop
timeit[set_diff(a, b)]
100000 loops, best of 3: 2.71 µs per loop
timeit[diff_lamb_hension(a, b)]
100000 loops, best of 3: 2.1 µs per loop
timeit[diff_lamb_filter(a, b)]
100000 loops, best of 3: 3.58 µs per loop
timeit[squeezer(a, b)]
10000 loops, best of 3: 36 µs per loop
# Medium
a = range(10**4)
b = range(10**4/2)
timeit[diff(a, b)]
1000 loops, best of 3: 1.17 ms per loop
timeit[set_diff(a, b)]
1000 loops, best of 3: 1.27 ms per loop
timeit[diff_lamb_hension(a, b)]
1 loops, best of 3: 736 ms per loop
timeit[diff_lamb_filter(a, b)]
1 loops, best of 3: 732 ms per loop
timeit[squeezer(a, b)]
100 loops, best of 3: 12.8 ms per loop
# Big
a = xrange(10**7)
b = xrange(10**7/2)
timeit[diff(a, b)]
1 loops, best of 3: 1.74 s per loop
timeit[set_diff(a, b)]
1 loops, best of 3: 2.57 s per loop
timeit[diff_lamb_filter(a, b)]
# too long to wait for
timeit[diff_lamb_filter(a, b)]
# too long to wait for
timeit[diff_lamb_filter(a, b)]
# TypeError: sequence index must be integer, not 'slice'
# Compares the difference of list a and b
# uses a callback function to compare items
def diff(a, b, callback):
a_missing_in_b = []
ai = 0
bi = 0
a = sorted(a, callback)
b = sorted(b, callback)
while (ai < len(a)) and (bi < len(b)):
cmp = callback(a[ai], b[bi])
if cmp < 0:
a_missing_in_b.append(a[ai])
ai += 1
elif cmp > 0:
# Item b is missing in a
bi += 1
else:
# a and b intersecting on this item
ai += 1
bi += 1
# if a and b are not of same length, we need to add the remaining items
for ai in xrange(ai, len(a)):
a_missing_in_b.append(a[ai])
return a_missing_in_b
from collections import Counter
def diff(first, second):
secondCntr = Counter(second)
second = set(second)
res = []
for i in first:
if i not in second:
res.append(i)
elif i in secondCntr:
if secondCntr[i] > 0:
secondCntr[i] -= 1
else:
res.append(i)
return res
import time
def fun1(l1, l2):
# Order and duplications in l1 are both lost, O(m) + O(n)
return set(l1) - set(l2)
def fun2(l1, l2):
# Order and duplications in l1 are both preserved, O(m) + O(n)
l2_set = set(l2)
return [item for item in l1 if item not in l2_set]
def fun3(l1, l2):
# Order and duplications in l1 are both preserved, O(m * n)
# Don't do
return [item for item in l1 if item not in l2]
A = list(range(7500))
B = list(range(5000, 10000))
loops = 100
start = time.time()
for _ in range(loops):
fun1(A, B)
print(f"fun1 time: {time.time() - start}")
start = time.time()
for _ in range(loops):
fun2(A, B)
print(f"fun2 time: {time.time() - start}")
start = time.time()
for _ in range(loops):
fun3(A, B)
print(f"fun3 time: {time.time() - start}")