s = set(temp2)temp3 = [x for x in temp1 if x not in s]
性能测试
import timeitinit = 'temp1 = list(range(100)); temp2 = [i * 2 for i in range(50)]'print timeit.timeit('list(set(temp1) - set(temp2))', init, number = 100000)print timeit.timeit('s = set(temp2);[x for x in temp1 if x not in s]', init, number = 100000)print timeit.timeit('[item for item in temp1 if item not in temp2]', init, number = 100000)
结果:
4.34620224079 # ars' answer4.2770634955 # This answer30.7715615392 # matt b's answer
from collections import Counter
def diff(a, b):""" more verbose than needs to be, for clarity """ca, cb = Counter(a), Counter(b)to_add = cb - cato_remove = ca - cbchanges = Counter(to_add)changes.subtract(to_remove)return changes
lista = ['one', 'three', 'four', 'four', 'one']listb = ['one', 'two', 'three']
In [127]: diff(lista, listb)Out[127]: Counter({'two': 1, 'one': -1, 'four': -2})# in order to go from lista to list b, you need to add a "two", remove a "one", and remove two "four"s
In [128]: diff(listb, lista)Out[128]: Counter({'four': 2, 'one': 1, 'two': -1})# in order to go from listb to lista, you must add two "four"s, add a "one", and remove a "two"
a = 'A quick fox jumps the lazy dog'.split()b = 'A quick brown mouse jumps over the dog'.split()
from difflib import SequenceMatcher
for tag, i, j, k, l in SequenceMatcher(None, a, b).get_opcodes():if tag == 'equal': print('both have', a[i:j])if tag in ('delete', 'replace'): print(' 1st has', a[i:j])if tag in ('insert', 'replace'): print(' 2nd has', b[k:l])
此输出:
both have ['A', 'quick']1st has ['fox']2nd has ['brown', 'mouse']both have ['jumps']2nd has ['over']both have ['the']1st has ['lazy']both have ['dog']
def difference(list1, list2):new_list = []for i in list1:if i not in list2:new_list.append(i)
for j in list2:if j not in list1:new_list.append(j)return new_list
t1 = timeit.Timer("difference(list1, list2)", "from __main__ import difference,list1, list2")t2 = timeit.Timer("sym_diff(list1, list2)", "from __main__ import sym_diff,list1, list2")
print('Using two for loops', t1.timeit(number=100000), 'Milliseconds')print('Using two for loops', t2.timeit(number=100000), 'Milliseconds')
返回
[7, 9, 2, 4]Using two for loops 0.11572412995155901 MillisecondsUsing symmetric_difference 0.11285737506113946 Milliseconds
Process finished with exit code 0
from difflib import SequenceMatcher
class ListDiffer:
def __init__(self, left, right, strict:bool=False):assert isinstance(left, (list, tuple, set)), "left must be list, tuple or set"assert isinstance(right, (list, tuple, set)), "right must be list, tuple or set"self.l = list(left) if isinstance(left, (tuple, set)) else leftself.r = list(right) if isinstance(left, (tuple, set)) else right
if strict:assert isinstance(left, right.__class__), \f'left type ({left.__class__.__name__}) must equal right type ({right.__class__.__name__})'
self.diffs = []self.equal = []
for tag, i, j, k, l in SequenceMatcher(None, self.l, self.r).get_opcodes():if tag in ['delete', 'replace', 'insert']:self.diffs.append((tag, i, j, k, l))elif tag == 'equal':[self.equal.append(v) for v in left[i:j]]
def has_diffs(self):return len(self.diffs) > 0
def only_left(self):a = self.l[:][a.remove(v) for v in self.equal]return a
def only_right(self):a = self.r[:][a.remove(v) for v in self.equal]return a
def __str__(self, verbose:bool=False):iD = 0sb = []if verbose:sb.append(f"left: {self.l}\n")sb.append(f"right: {self.r}\n")sb.append(f"diffs: ")for tag, i, j, k, l in self.diffs:s = f"({iD})"if iD > 0: sb.append(' | ')if tag in ('delete', 'replace'): s = f'{s} l:{self.l[i:j]}'if tag in ('insert', 'replace'): s = f'{s} r:{self.r[k:l]}'sb.append(s)iD = iD + 1
if verbose:sb.append(f"\nequal: {self.equal}")return ''.join(sb)
def __repr__(self) -> str:return "<ListDiffer> {}".format(self.__str__())
用法:
left = ['a','b','c']right = ['aa','b','c','d']# right = ('aa','b','c','d')ld = ListDiffer(left, right, strict=True)print(f'ld.has_diffs(): {ld.has_diffs()}')print(f'ld: {ld}')print(f'ld.only_left(): {ld.only_left()}')print(f'ld.only_right(): {ld.only_right()}')