如何打开和转换 sqlite 数据库熊猫数据框

我已经下载了一些数据作为 sqlite 数据库(data.db) ,我想在 python 中打开这个数据库,然后将其转换成熊猫数据框。

到目前为止,我已经完成了

import sqlite3
import pandas
dat = sqlite3.connect('data.db') #connected to database with out error
pandas.DataFrame.from_records(dat, index=None, exclude=None, columns=None, coerce_float=False, nrows=None)

但它抛出了这个错误

Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/lib/python2.7/dist-packages/pandas/core/frame.py", line 980, in from_records
coerce_float=coerce_float)
File "/usr/local/lib/python2.7/dist-packages/pandas/core/frame.py", line 5353, in _to_arrays
if not len(data):
TypeError: object of type 'sqlite3.Connection' has no len()

如何将 sqlite 数据库转换为熊猫数据框

102363 次浏览

The line

data = sqlite3.connect('data.db')

opens a connection to the database. There are no records queried up to this. So you have to execute a query afterward and provide this to the pandas DataFrame constructor.

It should look similar to this

import sqlite3
import pandas as pd


dat = sqlite3.connect('data.db')
query = dat.execute("SELECT * From <TABLENAME>")
cols = [column[0] for column in query.description]
results= pd.DataFrame.from_records(data = query.fetchall(), columns = cols)

I am not really firm with SQL commands, so you should check the correctness of the query. should be the name of the table in your database.

Despite sqlite being part of the Python Standard Library and is a nice and easy interface to SQLite databases, the Pandas tutorial states:

Note In order to use read_sql_table(), you must have the SQLAlchemy optional dependency installed.

But Pandas still supports sqlite3 access if you want to avoid installing SQLAlchemy:

import sqlite3
import pandas as pd
# Create your connection.
cnx = sqlite3.connect('file.db')


df = pd.read_sql_query("SELECT * FROM table_name", cnx)

As stated here, but you need to know the name of the used table in advance.

Search sqlalchemy, engine and database name in google (sqlite in this case):

import pandas as pd
import sqlalchemy


db_name = "data.db"
table_name = "LITTLE_BOBBY_TABLES"


engine = sqlalchemy.create_engine("sqlite:///%s" % db_name, execution_options={"sqlite_raw_colnames": True})
df = pd.read_sql_table(table_name, engine)

i have stored my data in database.sqlite table name is Reviews

import sqlite3
con=sqlite3.connect("database.sqlite")


data=pd.read_sql_query("SELECT * FROM Reviews",con)
print(data)

I wrote a piece of code up that saves tables in a database file such as .sqlite or .db and creates an excel file out of it with each table as a sheet or makes individual tables into csvs.

Note: You don't need to know the table names in advance!

import os, fnmatch
import sqlite3
import pandas as pd


#creates a directory without throwing an error
def create_dir(dir):
if not os.path.exists(dir):
os.makedirs(dir)
print("Created Directory : ", dir)
else:
print("Directory already existed : ", dir)
return dir


#finds files in a directory corresponding to a regex query
def find(pattern, path):
result = []
for root, dirs, files in os.walk(path):
for name in files:
if fnmatch.fnmatch(name, pattern):
result.append(os.path.join(root, name))
return result






#convert sqlite databases(.db,.sqlite) to pandas dataframe(excel with each table as a different sheet or individual csv sheets)
def save_db(dbpath=None,excel_path=None,csv_path=None,extension="*.sqlite",csvs=True,excels=True):
if (excels==False and csvs==False):
print("Atleast one of the parameters need to be true: csvs or excels")
return -1


#little code to find files by extension
if dbpath==None:
files=find(extension,os.getcwd())
if len(files)>1:
print("Multiple files found! Selecting the first one found!")
print("To locate your file, set dbpath=<yourpath>")
dbpath = find(extension,os.getcwd())[0] if dbpath==None else dbpath
print("Reading database file from location :",dbpath)


#path handling


external_folder,base_name=os.path.split(os.path.abspath(dbpath))
file_name=os.path.splitext(base_name)[0] #firstname without .
exten=os.path.splitext(base_name)[-1]   #.file_extension


internal_folder="Saved_Dataframes_"+file_name
main_path=os.path.join(external_folder,internal_folder)
create_dir(main_path)




excel_path=os.path.join(main_path,"Excel_Multiple_Sheets.xlsx") if excel_path==None else excel_path
csv_path=main_path if csv_path==None else csv_path


db = sqlite3.connect(dbpath)
cursor = db.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()
print(len(tables),"Tables found :")


if excels==True:
#for writing to excel(xlsx) we will be needing this!
try:
import XlsxWriter
except ModuleNotFoundError:
!pip install XlsxWriter


if (excels==True and csvs==True):
writer = pd.ExcelWriter(excel_path, engine='xlsxwriter')
i=0
for table_name in tables:
table_name = table_name[0]
table = pd.read_sql_query("SELECT * from %s" % table_name, db)
i+=1
print("Parsing Excel Sheet ",i," : ",table_name)
table.to_excel(writer, sheet_name=table_name, index=False)
print("Parsing CSV File ",i," : ",table_name)
table.to_csv(os.path.join(csv_path,table_name + '.csv'), index_label='index')


writer.save()




elif excels==True:
writer = pd.ExcelWriter(excel_path, engine='xlsxwriter')
i=0
for table_name in tables:
table_name = table_name[0]
table = pd.read_sql_query("SELECT * from %s" % table_name, db)
i+=1
print("Parsing Excel Sheet ",i," : ",table_name)
table.to_excel(writer, sheet_name=table_name, index=False)


writer.save()


elif csvs==True:
i=0
for table_name in tables:
table_name = table_name[0]
table = pd.read_sql_query("SELECT * from %s" % table_name, db)
i+=1
print("Parsing CSV File ",i," : ",table_name)
table.to_csv(os.path.join(csv_path,table_name + '.csv'), index_label='index')
cursor.close()
db.close()
return 0
save_db();

Parsing a sqlite .db into a dictionary of dataframes without knowing the table names:

def read_sqlite(dbfile):
import sqlite3
from pandas import read_sql_query, read_sql_table


with sqlite3.connect(dbfile) as dbcon:
tables = list(read_sql_query("SELECT name FROM sqlite_master WHERE type='table';", dbcon)['name'])
out = {tbl : read_sql_query(f"SELECT * from {tbl}", dbcon) for tbl in tables}


return out

If data.db is your SQLite database and table_name is one of its tables, then you can do:

import pandas as pd
df = pd.read_sql_table('table_name', 'sqlite:///data.db')

No other imports needed.