#!/usr/bin/env python
import re
import fileinput
def this_line_is_useless(line):
useless_es = [
'BEGIN TRANSACTION',
'COMMIT',
'sqlite_sequence',
'CREATE UNIQUE INDEX',
'PRAGMA foreign_keys=OFF',
]
for useless in useless_es:
if re.search(useless, line):
return True
def has_primary_key(line):
return bool(re.search(r'PRIMARY KEY', line))
searching_for_end = False
for line in fileinput.input():
if this_line_is_useless(line):
continue
# this line was necessary because '');
# would be converted to \'); which isn't appropriate
if re.match(r".*, ''\);", line):
line = re.sub(r"''\);", r'``);', line)
if re.match(r'^CREATE TABLE.*', line):
searching_for_end = True
m = re.search('CREATE TABLE "?(\w*)"?(.*)', line)
if m:
name, sub = m.groups()
line = "DROP TABLE IF EXISTS %(name)s;\nCREATE TABLE IF NOT EXISTS `%(name)s`%(sub)s\n"
line = line % dict(name=name, sub=sub)
else:
m = re.search('INSERT INTO "(\w*)"(.*)', line)
if m:
line = 'INSERT INTO %s%s\n' % m.groups()
line = line.replace('"', r'\"')
line = line.replace('"', "'")
line = re.sub(r"([^'])'t'(.)", "\1THIS_IS_TRUE\2", line)
line = line.replace('THIS_IS_TRUE', '1')
line = re.sub(r"([^'])'f'(.)", "\1THIS_IS_FALSE\2", line)
line = line.replace('THIS_IS_FALSE', '0')
# Add auto_increment if it is not there since sqlite auto_increments ALL
# primary keys
if searching_for_end:
if re.search(r"integer(?:\s+\w+)*\s*PRIMARY KEY(?:\s+\w+)*\s*,", line):
line = line.replace("PRIMARY KEY", "PRIMARY KEY AUTO_INCREMENT")
# replace " and ' with ` because mysql doesn't like quotes in CREATE commands
if line.find('DEFAULT') == -1:
line = line.replace(r'"', r'`').replace(r"'", r'`')
else:
parts = line.split('DEFAULT')
parts[0] = parts[0].replace(r'"', r'`').replace(r"'", r'`')
line = 'DEFAULT'.join(parts)
# And now we convert it back (see above)
if re.match(r".*, ``\);", line):
line = re.sub(r'``\);', r"'');", line)
if searching_for_end and re.match(r'.*\);', line):
searching_for_end = False
if re.match(r"CREATE INDEX", line):
line = re.sub('"', '`', line)
if re.match(r"AUTOINCREMENT", line):
line = re.sub("AUTOINCREMENT", "AUTO_INCREMENT", line)
print line,
自从做了这个转换,我注意到了一个以前没有注意到的问题。在我的Rails应用程序中,我的文本字段被定义为'string',这一直延续到数据库模式中。这里概述的过程导致这些在MySQL数据库中被定义为VARCHAR(255)。这就对这些字段的大小设置了255个字符的限制——超过这个值的任何内容都会在导入过程中被无声地截断。我认为,要支持大于255的文本长度,MySQL模式需要使用' text '而不是VARCHAR(255)。这里定义的流程不包括这种转换。
以下是对我的数据进行合并和修改后的Python脚本:
#!/usr/bin/env python
import re
import fileinput
def this_line_is_useless(line):
useless_es = [
'BEGIN TRANSACTION',
'COMMIT',
'sqlite_sequence',
'CREATE UNIQUE INDEX',
'PRAGMA foreign_keys=OFF'
]
for useless in useless_es:
if re.search(useless, line):
return True
def has_primary_key(line):
return bool(re.search(r'PRIMARY KEY', line))
searching_for_end = False
for line in fileinput.input():
if this_line_is_useless(line): continue
# this line was necessary because ''); was getting
# converted (inappropriately) to \');
if re.match(r".*, ''\);", line):
line = re.sub(r"''\);", r'``);', line)
if re.match(r'^CREATE TABLE.*', line):
searching_for_end = True
m = re.search('CREATE TABLE "?([A-Za-z_]*)"?(.*)', line)
if m:
name, sub = m.groups()
line = "DROP TABLE IF EXISTS %(name)s;\nCREATE TABLE IF NOT EXISTS `%(name)s`%(sub)s\n"
line = line % dict(name=name, sub=sub)
line = line.replace('AUTOINCREMENT','AUTO_INCREMENT')
line = line.replace('UNIQUE','')
line = line.replace('"','')
else:
m = re.search('INSERT INTO "([A-Za-z_]*)"(.*)', line)
if m:
line = 'INSERT INTO %s%s\n' % m.groups()
line = line.replace('"', r'\"')
line = line.replace('"', "'")
line = re.sub(r"(?<!')'t'(?=.)", r"1", line)
line = re.sub(r"(?<!')'f'(?=.)", r"0", line)
# Add auto_increment if it's not there since sqlite auto_increments ALL
# primary keys
if searching_for_end:
if re.search(r"integer(?:\s+\w+)*\s*PRIMARY KEY(?:\s+\w+)*\s*,", line):
line = line.replace("PRIMARY KEY", "PRIMARY KEY AUTO_INCREMENT")
# replace " and ' with ` because mysql doesn't like quotes in CREATE commands
# And now we convert it back (see above)
if re.match(r".*, ``\);", line):
line = re.sub(r'``\);', r"'');", line)
if searching_for_end and re.match(r'.*\);', line):
searching_for_end = False
if re.match(r"CREATE INDEX", line):
line = re.sub('"', '`', line)
print line,