d = {'NORTH':'N','SOUTH':'S','EAST':'E','WEST':'W'}
address = "123 north anywhere street"
for k,v in d.iteritems():
address = address.upper().replace(k, v)
address is now '123 N ANYWHERE STREET'
Well, if you want to preserve case, whitespace and nested words (e.g. Southstreet should not converted to Sstreet), consider using this simple list comprehension:
import re
l = {'NORTH':'N','SOUTH':'S','EAST':'E','WEST':'W'}
address = "North 123 East Anywhere Southstreet West"
new_address = ''.join(l[p.upper()] if p.upper() in l else p for p in re.split(r'(\W+)', address))
import re
l = {'NORTH':'N','SOUTH':'S','EAST':'E','WEST':'W'}
address = "123 north anywhere street"
for k, v in l.iteritems():
t = re.compile(re.escape(k), re.IGNORECASE)
address = t.sub(v, address)
print(address)
address = "123 north anywhere street"
for word, initial in {"NORTH":"N", "SOUTH":"S" }.items():
address = address.replace(word.lower(), initial)
print address
One option I don't think anyone has yet suggested is to build a regular expression containing all of the keys and then simply do one replace on the string:
>>> import re
>>> l = {'NORTH':'N','SOUTH':'S','EAST':'E','WEST':'W'}
>>> pattern = '|'.join(sorted(re.escape(k) for k in l))
>>> address = "123 north anywhere street"
>>> re.sub(pattern, lambda m: l.get(m.group(0).upper()), address, flags=re.IGNORECASE)
'123 N anywhere street'
>>>
This has the advantage that the regular expression can ignore the case of the input string without modifying it.
If you want to operate only on complete words then you can do that too with a simple modification of the pattern:
>>> pattern = r'\b({})\b'.format('|'.join(sorted(re.escape(k) for k in l)))
>>> address2 = "123 north anywhere southstreet"
>>> re.sub(pattern, lambda m: l.get(m.group(0).upper()), address2, flags=re.IGNORECASE)
'123 N anywhere southstreet'
"Translating" a string with a dictionary is a very common requirement. I propose a function that you might want to keep in your toolkit:
def translate(text, conversion_dict, before=None):
"""
Translate words from a text using a conversion dictionary
Arguments:
text: the text to be translated
conversion_dict: the conversion dictionary
before: a function to transform the input
(by default it will to a lowercase)
"""
# if empty:
if not text: return text
# preliminary transformation:
before = before or str.lower
t = before(text)
for key, value in conversion_dict.items():
t = t.replace(key, value)
return t
Then you can write:
>>> a = {'hello':'bonjour', 'world':'tout-le-monde'}
>>> translate('hello world', a)
'bonjour tout-le-monde'
All of these answers are good, but you are missing python string substitution - it's simple and quick, but requires your string to be formatted correctly.
Both using replace() and format() are not so precise:
data = '{content} {address}'
for k,v in {"{content}":"some {address}", "{address}":"New York" }.items():
data = data.replace(k,v)
# results: some New York New York
'{ {content} {address}'.format(**{'content':'str1', 'address':'str2'})
# results: ValueError: unexpected '{' in field name
It is better to translate with re.sub() if you need precise place:
import re
def translate(text, kw, ignore_case=False):
search_keys = map(lambda x:re.escape(x), kw.keys())
if ignore_case:
kw = {k.lower():kw[k] for k in kw}
regex = re.compile('|'.join(search_keys), re.IGNORECASE)
res = regex.sub( lambda m:kw[m.group().lower()], text)
else:
regex = re.compile('|'.join(search_keys))
res = regex.sub( lambda m:kw[m.group()], text)
return res
#'score: 99.5% name:%(name)s' %{'name':'foo'}
res = translate( 'score: 99.5% name:{name}', {'{name}':'foo'})
print(res)
res = translate( 'score: 99.5% name:{NAME}', {'{name}':'foo'}, ignore_case=True)
print(res)
I would suggest to use a regular expression instead of a simple replace. With a replace you have the risk that subparts of words are replaced which is maybe not what you want.
import json
import re
with open('filePath.txt') as f:
data = f.read()
with open('filePath.json') as f:
glossar = json.load(f)
for word, initial in glossar.items():
data = re.sub(r'\b' + word + r'\b', initial, data)
print(data)
The advantage of Duncan's approach is that it is careful not to overwrite previous answers. For example if you have {"Shirt": "Tank Top", "Top": "Sweater"}, the other approaches replace "Shirt" with "Tank Sweater".
The following code extends that approach, but sorts the keys such that the longest one is always found first and it uses named groups to search case insensitively.
import re
root_synonyms = {'NORTH':'N','SOUTH':'S','EAST':'E','WEST':'W'}
# put the longest search term first. This menas the system does not replace "top" before "tank top"
synonym_keys = sorted(root_synonyms.keys(),key=len,reverse=True)
# the groups will be named w1, w2, ... . Determine what each of them should become
number_mapping = {f'w{i}':root_synonyms[key] for i,key in enumerate(synonym_keys) }
# make a regex for each word where "tank top" or "tank top" are the same
search_terms = [re.sub(r'\s+',r'\s+',re.escape(k)) for k in synonym_keys]
# give each search term a name w1 etc where
search_terms = [f'(?P<w{i}>\\b{key}\\b)' for i,key in enumerate(search_terms)]
# make one huge regex
search_terms = '|'.join(search_terms)
# compile it for speed
search_re = re.compile(search_terms,re.IGNORECASE)
query = "123 north anywhere street"
result = re.sub(search_re,lambda x: number_mapping[x.lastgroup],query)
print(result)