import pandas as pdimport numpy as np# we know we're gonna have 5 rows of datanumberOfRows = 5# create dataframedf = pd.DataFrame(index=np.arange(0, numberOfRows), columns=('lib', 'qty1', 'qty2') )
# now fill it up row by rowfor x in np.arange(0, numberOfRows):#loc or iloc both work here since the index is natural numbersdf.loc[x] = [np.random.randint(-1,1) for n in range(3)]In[23]: dfOut[23]:lib qty1 qty20 -1 -1 -11 0 0 02 -1 0 -13 0 -1 04 -1 0 0
速度比较
In[30]: %timeit tryThis() # function wrapper for this answerIn[31]: %timeit tryOther() # function wrapper without index (see, for example, @fred)1000 loops, best of 3: 1.23 ms per loop100 loops, best of 3: 2.31 ms per loop
# add a rowdef add_row(df, row):colnames = list(df.columns)ncol = len(colnames)assert ncol == len(row), "Length of row must be the same as width of DataFrame: %s" % rowreturn df.append(pd.DataFrame([row], columns=colnames))
您还可以像这样增强DataFrame类:
import pandas as pddef add_row(self, row):self.loc[len(self.index)] = rowpd.DataFrame.add_row = add_row
num = 10
# Generator function to generate generator objectdef numgen_func(num):for i in range(num):yield ('name_{}'.format(i), (i*i), (i*i*i))
# Generator expression to generate generator object (Only once data get populated, can not be re used)numgen_expression = (('name_{}'.format(i), (i*i), (i*i*i)) for i in range(num) )
df = pd.DataFrame(data=numgen_func(num), columns=('lib', 'qty1', 'qty2'))
# Current datadata = {"Animal":["cow", "horse"], "Color":["blue", "red"]}
# Adding a new row (be careful to ensure every column gets another value)data["Animal"].append("mouse")data["Color"].append("black")
# At the end, construct our DataFramedf = pd.DataFrame(data)# Animal Color# 0 cow blue# 1 horse red# 2 mouse black
df2 = df.to_dict()values = ["s_101", "hyderabad", 10, 20, 16, 13, 15, 12, 12, 13, 25, 26, 25, 27, "good", "bad"] # This is the total row that we are going to addi = 0for x in df.columns: # Here df.columns gives us the main dictionary keydf2[x][101] = values[i] # Here the 101 is our index number. It is also the key of the sub dictionaryi += 1
# Creates empty DataFrame and appendsdf = pd.DataFrame(columns=['A', 'B', 'C'])for a, b, c in some_function_that_yields_data():df = df.append({'A': i, 'B': b, 'C': c}, ignore_index=True)# This is equally bad:# df = pd.concat(# [df, pd.Series({'A': i, 'B': b, 'C': c})],# ignore_index=True)
NaN的空数据帧
永远不要创建NaN的DataFrame,因为列是用object(慢,不可矢量化的dtype)。
# Creates DataFrame of NaNs and overwrites values.df = pd.DataFrame(columns=['A', 'B', 'C'], index=range(5))for a, b, c in some_function_that_yields_data():df.loc[len(df)] = [a, b, c]