x = """part 1;"this is ; part 2;";'this is ; part 3';part 4;this "is ; part" 5"""
results = [[]]
quote = None
for c in x:
if c == "'" or c == '"':
if c == quote:
quote = None
elif quote == None:
quote = c
elif c == ';':
if quote == None:
results.append([])
continue
results[-1].append(c)
results = [''.join(x) for x in results]
# results = ['part 1', '"this is ; part 2;"', "'this is ; part 3'",
# 'part 4', 'this "is ; part" 5']
>>> x = '''part 1;"this is ; part 2;";'this is ; part 3';part 4;this "is ; part" 5'''
>>> import re
>>> re.findall(r'''(?:[^;'"]+|'(?:[^']|\\.)*'|"(?:[^']|\\.)*")+''', x)
['part 1', "this is ';' part 2", "'this is ; part 3'", 'part 4', 'this "is ; part" 5']
import re
reg = re.compile('(\'|").*?\\1')
pp = re.compile('.*?;')
def splitter(string):
#add a last semicolon
string += ';'
replaces = []
s = string
i = 1
#replace the content of each quote for a code
for quote in reg.finditer(string):
out = string[quote.start():quote.end()]
s = s.replace(out, '**' + str(i) + '**')
replaces.append(out)
i+=1
#split the string without quotes
res = pp.findall(s)
#add the quotes again
#TODO this part could be faster.
#(lineal instead of quadratic)
i = 1
for replace in replaces:
for x in range(len(res)):
res[x] = res[x].replace('**' + str(i) + '**', replace)
i+=1
return res
s = """part 1;"this is ; part 2;";'this is ; part 3';part 4;this "is ; part" 5"""
inQuotes = False
current = ""
results = []
currentQuote = ""
for c in s:
if not inQuotes and c == ";":
results.append(current)
current = ""
elif not inQuotes and (c == '"' or c == "'"):
currentQuote = c
inQuotes = True
elif inQuotes and c == currentQuote:
currentQuote = ""
inQuotes = False
else:
current += c
results.append(current)
print results
# ['part 1', 'this is ; part 2;', 'this is ; part 3', 'part 4', 'this is ; part 5']
import csv
from StringIO import StringIO
line = '''part 1;"this is ; part 2;";'this is ; part 3';part 4;this "is ; part" 5'''
data = StringIO(line)
reader = csv.reader(data, delimiter=';')
for row in reader:
print row
这个应该能让你 ("part 1", "this is ; part 2;", 'this is ; part 3', "part 4", "this \"is ; part\" 5")
import re
data = """part 1;"this is ; part 2;";'this is ; part 3';part 4;this "is ; part" 5"""
PATTERN = re.compile(r'''((?:[^;"']|"[^"]*"|'[^']*')+)''')
print PATTERN.split(data)[1::2]
输出是:
['part 1', '"this is ; part 2;"', "'this is ; part 3'", 'part 4', 'this "is ; part" 5']
>>> marker = ";!$%^&;"
>>> [r.replace(marker[1:-1],'') for r in PATTERN.split("aaa;;aaa;'b;;b'".replace(';;', marker))[1::2]]
['aaa', '', 'aaa', "'b;;b'"]
from pyparsing import (printables, originalTextFor, OneOrMore,
quotedString, Word, delimitedList)
# unquoted words can contain anything but a semicolon
printables_less_semicolon = printables.replace(';','')
# capture content between ';'s, and preserve original text
content = originalTextFor(
OneOrMore(quotedString | Word(printables_less_semicolon)))
# process the string
print delimitedList(content, ';').parseString(test)
给予
['part 1', '"this is ; part 2;"', "'this is ; part 3'", 'part 4',
'this "is ; part" 5']
>>> new_s = ''
>>> is_open = False
>>> for c in s:
... if c == ';' and not is_open:
... c = '\n'
... elif c in ('"',"'"):
... is_open = not is_open
... new_s += c
>>> result = new_s.split('\n')
>>> result
['part 1', '"this is ; part 2;"', "'this is ; part 3'", 'part 4', 'this "is ; part" 5']
def split_with_commas_outside_of_quotes(string):
arr = []
start, flag = 0, False
for pos, x in enumerate(string):
if x == '"':
flag= not(flag)
if flag == False and x == ',':
arr.append(string[start:pos])
start = pos+1
arr.append(string[start:pos])
return arr
# l is string to parse;
# splitchar is the separator
# ignore char is the char between which you don't want to split
def splitstring(l, splitchar, ignorechar):
result = []
string = ""
ignore = False
for c in l:
if c == ignorechar:
ignore = True if ignore == False else False
elif c == splitchar and not ignore:
result.append(string)
string = ""
else:
string += c
return result
所以你可以逃跑:
line= """part 1;"this is ; part 2;";'this is ; part 3';part 4;this "is ; part" 5"""
splitted_data = splitstring(line, ';', '"')
结果:
['part 1', '"this is ; part 2;"', "'this is ; part 3'", 'part 4', 'this "is ; part" 5']
>>> import re
>>> data = '''part 1;"this is ; part 2;";'this is ; part 3';part 4;this "is ; part" 5'''
>>> re.findall(r';([\'"][^\'"]+[\'"]|[^;]+)', ';' + data)
['part 1', '"this is ; part 2;"', "'this is ; part 3'", 'part 4', 'this "is ', ' part" 5']
import shlex
shlex.split("""part 1;"this is ; part 2;";'this is ; part 3';part 4;this "is ; part" 5 """ )
['part',
'1;this is ; part 2;;this is ; part 3;part',
'4;this',
'is ; part',
'5']