def merge(d1, d2, merge_fn=lambda x,y:y):"""Merges two dictionaries, non-destructively, combiningvalues on duplicate keys as defined by the optional mergefunction. The default behavior replaces the values in d1with corresponding values in d2. (There is no other generallyapplicable merge strategy, but often you'll have homogeneoustypes in your dicts, so specifying a merge technique can bevaluable.)
Examples:
>>> d1{'a': 1, 'c': 3, 'b': 2}>>> merge(d1, d1){'a': 1, 'c': 3, 'b': 2}>>> merge(d1, d1, lambda x,y: x+y){'a': 2, 'c': 6, 'b': 4}
"""result = dict(d1)for k,v in d2.iteritems():if k in result:result[k] = merge_fn(result[k], v)else:result[k] = vreturn result
% python -m timeit -s 'x=y=dict((i,i) for i in range(20))' 'z1=dict(x.items() + y.items())'100000 loops, best of 3: 5.67 usec per loop% python -m timeit -s 'x=y=dict((i,i) for i in range(20))' 'z2=dict(x, **y)'100000 loops, best of 3: 1.53 usec per loop
% python -m timeit -s 'from htmlentitydefs import codepoint2name as x, name2codepoint as y' 'z1=dict(x.items() + y.items())'1000 loops, best of 3: 260 usec per loop% python -m timeit -s 'from htmlentitydefs import codepoint2name as x, name2codepoint as y' 'z2=dict(x, **y)'10000 loops, best of 3: 26.9 usec per loop
from itertools import chainz3 = dict(chain(x.iteritems(), y.iteritems()))
一些快速测试,例如。
% python -m timeit -s 'from itertools import chain; from htmlentitydefs import codepoint2name as x, name2codepoint as y' 'z3=dict(chain(x.iteritems(), y.iteritems()))'10000 loops, best of 3: 66 usec per loop
% python -m timeit -s 'from htmlentitydefs import codepoint2name as x, name2codepoint as y' 'z0=dict(x); z0.update(y)'10000 loops, best of 3: 26.9 usec per loop
def deepupdate(original, update):"""Recursively update a dict.Subdict's won't be overwritten but also updated."""for key, value in original.iteritems():if key not in update:update[key] = valueelif isinstance(value, dict):deepupdate(value, update[key])return update
>>> from collections import ChainMap>>> x = {'a':1, 'b': 2}>>> y = {'b':10, 'c': 11}>>> z = dict(ChainMap({}, y, x))>>> for k, v in z.items():print(k, '-->', v)
a --> 1b --> 10c --> 11
In [1]: from collections import ChainMapIn [2]: from string import ascii_uppercase as up, ascii_lowercase as lo; x = dict(zip(lo, up)); y = dict(zip(up, lo))In [3]: chainmap_dict = ChainMap(y, x)In [4]: union_dict = dict(x.items() | y.items())In [5]: timeit for k in union_dict: union_dict[k]100000 loops, best of 3: 2.15 µs per loopIn [6]: timeit for k in chainmap_dict: chainmap_dict[k]10000 loops, best of 3: 27.1 µs per loop
import timeit
n=100000su = """x = {'a':1, 'b': 2}y = {'b':10, 'c': 11}"""
def timeMerge(f,su,niter):print "{:4f} sec for: {:30s}".format(timeit.Timer(f,setup=su).timeit(n),f)
timeMerge("dict(x, **y)",su,n)timeMerge("x.update(y)",su,n)timeMerge("dict(x.items() + y.items())",su,n)timeMerge("for k in y.keys(): x[k] = k in x and x[k]+y[k] or y[k] ",su,n)
#confirm for loop adds b entries togetherx = {'a':1, 'b': 2}y = {'b':10, 'c': 11}for k in y.keys(): x[k] = k in x and x[k]+y[k] or y[k]print "confirm b elements are added:",x
结果:
0.049465 sec for: dict(x, **y)0.033729 sec for: x.update(y)0.150380 sec for: dict(x.items() + y.items())0.083120 sec for: for k in y.keys(): x[k] = k in x and x[k]+y[k] or y[k]
confirm b elements are added: {'a': 1, 'c': 11, 'b': 12}
x = {'a':1, 'b': 2}y = {'b':10, 'c': 11}
z = {}for k, v in x.items():if not k in z:z[k] = [(v)]else:z[k].append((v))for k, v in y.items():if not k in z:z[k] = [(v)]else:z[k].append((v))
{'a': [1], 'b': [2, 10], 'c': [11]}
def merge_two_dicts(x, y):"""Given two dictionaries, merge them into a new dict as a shallow copy."""z = x.copy()z.update(y)return z
然后你有一个表达式:
z = merge_two_dicts(x, y)
您还可以创建一个函数来合并任意数量的字典,从零到一个非常大的数字:
def merge_dicts(*dict_args):"""Given any number of dictionaries, shallow copy and merge into a new dict,precedence goes to key-value pairs in latter dictionaries."""result = {}for dictionary in dict_args:result.update(dictionary)return result
>>> c = dict(a.items() + b.items())Traceback (most recent call last):File "<stdin>", line 1, in <module>TypeError: unsupported operand type(s) for +: 'dict_items' and 'dict_items'
{k: v for d in dicts for k, v in d.items()} # iteritems in Python 2.7
或者在Python 2.6中(也许早在2.4中引入生成器表达式时):
dict((k, v) for d in dicts for k, v in d.items()) # iteritems in Python 2
itertools.chain将以正确的顺序将迭代器链接到键值对上:
from itertools import chainz = dict(chain(x.items(), y.items())) # iteritems in Python 2
性能分析
我只对已知行为正确的用法进行性能分析。(自包含,因此您可以自己复制和粘贴。)
from timeit import repeatfrom itertools import chain
x = dict.fromkeys('abcdefg')y = dict.fromkeys('efghijk')
def merge_two_dicts(x, y):z = x.copy()z.update(y)return z
min(repeat(lambda: {**x, **y}))min(repeat(lambda: merge_two_dicts(x, y)))min(repeat(lambda: {k: v for d in (x, y) for k, v in d.items()}))min(repeat(lambda: dict(chain(x.items(), y.items()))))min(repeat(lambda: dict(item for d in (x, y) for item in d.items())))
在Python 3.8.1中,NixOS:
>>> min(repeat(lambda: {**x, **y}))1.0804965235292912>>> min(repeat(lambda: merge_two_dicts(x, y)))1.636518670246005>>> min(repeat(lambda: {k: v for d in (x, y) for k, v in d.items()}))3.1779992282390594>>> min(repeat(lambda: dict(chain(x.items(), y.items()))))2.740647904574871>>> min(repeat(lambda: dict(item for d in (x, y) for item in d.items())))4.266070580109954
$ uname -aLinux nixos 4.19.113 #1-NixOS SMP Wed Mar 25 07:06:15 UTC 2020 x86_64 GNU/Linux
a = { 'x': 3, 'y': 4 }b = MergeDict(a) # we merge just one dictb['x'] = 5print b # will print {'x': 5, 'y': 4}print a # will print {'y': 4, 'x': 3}
以下是MergeDict的简单代码:
class MergeDict(object):def __init__(self, *originals):self.originals = ({},) + originals[::-1] # reversed
def __getitem__(self, key):for original in self.originals:try:return original[key]except KeyError:passraise KeyError(key)
def __setitem__(self, key, value):self.originals[0][key] = value
def __iter__(self):return iter(self.keys())
def __repr__(self):return '%s(%s)' % (self.__class__.__name__,', '.join(repr(original)for original in reversed(self.originals)))
def __str__(self):return '{%s}' % ', '.join('%r: %r' % i for i in self.iteritems())
def iteritems(self):found = set()for original in self.originals:for k, v in original.iteritems():if k not in found:yield k, vfound.add(k)
def items(self):return list(self.iteritems())
def keys(self):return list(k for k, _ in self.iteritems())
def values(self):return list(v for _, v in self.iteritems())
$ python2Python 2.7.13 (default, Jan 19 2017, 14:48:08)[GCC 6.3.0 20170118] on linux2Type "help", "copyright", "credits" or "license" for more information.>>> x = {'a':1, 'b': 2}>>> y = {'b':10, 'c': 11}>>> [z.update(d) for z in [{}] for d in (x, y)][None, None]>>> z{'a': 1, 'c': 11, 'b': 10}>>> ...
def merge_dict_recursive(new: dict, existing: dict):merged = new | existing
for k, v in merged.items():if isinstance(v, dict):if k not in existing:# The key is not in existing dict at all, so add entire valueexisting[k] = new[k]
merged[k] = merge_dict_recursive(new[k], existing[k])return merged
示例测试数据:
new{'dashboard': True,'depth': {'a': 1, 'b': 22222, 'c': {'d': {'e': 69}}},'intro': 'this is the dashboard','newkey': False,'show_closed_sessions': False,'version': None,'visible_sessions_limit': 9999}existing{'dashboard': True,'depth': {'a': 5},'intro': 'this is the dashboard','newkey': True,'show_closed_sessions': False,'version': '2021-08-22 12:00:30.531038+00:00'}merged{'dashboard': True,'depth': {'a': 5, 'b': 22222, 'c': {'d': {'e': 69}}},'intro': 'this is the dashboard','newkey': True,'show_closed_sessions': False,'version': '2021-08-22 12:00:30.531038+00:00','visible_sessions_limit': 9999}
from typing import List, Dictfrom copy import deepcopy
def merge_dicts(*from_dicts: List[Dict], no_copy: bool=False) -> Dict :""" no recursion deep merge of two dicts
By default creates fresh Dict and merges all to it.
no_copy = True, will merge all dicts to a fist one in a list without copy.Why? Sometime I need to combine one dictionary from "layers".The "layers" are not in use and dropped immediately after merging."""
if no_copy:xerox = lambda x:xelse:xerox = deepcopy
result = xerox(from_dicts[0])
for _from in from_dicts[1:]:merge_queue = [(result, _from)]for _to, _from in merge_queue:for k, v in _from.items():if k in _to and isinstance(_to[k], dict) and isinstance(v, dict):# key collision add both are dicts.# add to merging queuemerge_queue.append((_to[k], v))continue_to[k] = xerox(v)
return result
用法:
print("=============================")print("merge all dicts to first one without copy.")a0 = {"a":{"b":1}}a1 = {"a":{"c":{"d":4}}}a2 = {"a":{"c":{"f":5}, "d": 6}}print(f"a0 id[{id(a0)}] value:{a0}")print(f"a1 id[{id(a1)}] value:{a1}")print(f"a2 id[{id(a2)}] value:{a2}")r = merge_dicts(a0, a1, a2, no_copy=True)print(f"r id[{id(r)}] value:{r}")
print("=============================")print("create fresh copy of all")a0 = {"a":{"b":1}}a1 = {"a":{"c":{"d":4}}}a2 = {"a":{"c":{"f":5}, "d": 6}}print(f"a0 id[{id(a0)}] value:{a0}")print(f"a1 id[{id(a1)}] value:{a1}")print(f"a2 id[{id(a2)}] value:{a2}")r = merge_dicts(a0, a1, a2)print(f"r id[{id(r)}] value:{r}")