基于@jjurach 的回答,我编写了一个 helper 函数,作为 R str或 glimpse函数的替身,以快速获得 DataFrames 的概述。下面是带有示例的代码:
import pandas as pd
import random
# an example dataframe to test the helper function
example_df = pd.DataFrame({
"var_a": [random.choice(["foo","bar"]) for i in range(20)],
"var_b": [random.randint(0, 1) for i in range(20)],
"var_c": [random.random() for i in range(20)]
})
# helper function for viewing pandas dataframes
def glimpse_pd(df, max_width=76):
# find the max string lengths of the column names and dtypes for formatting
_max_len = max([len(col) for col in df])
_max_dtype_label_len = max([len(str(df[col].dtype)) for col in df])
# print the dimensions of the dataframe
print(f"{type(df)}: {df.shape[0]} rows of {df.shape[1]} columns")
# print the name, dtype and first few values of each column
for _column in df:
_col_vals = df[_column].head(max_width).to_list()
_col_type = str(df[_column].dtype)
output_col = f"{_column}:".ljust(_max_len+1, ' ')
output_dtype = f" {_col_type}".ljust(_max_dtype_label_len+3, ' ')
output_combined = f"{output_col} {output_dtype} {_col_vals}"
# trim the output if too long
if len(output_combined) > max_width:
output_combined = output_combined[0:(max_width-4)] + " ..."
print(output_combined)