# Simple example where dataframes index are the name on which to perform
# the join operations
import pandas as pd
import numpy as np
name = ['Sophia' ,'Emma' ,'Isabella' ,'Olivia' ,'Ava' ,'Emily' ,'Abigail' ,'Mia']
df1 = pd.DataFrame(np.random.randn(8, 3), columns=['A','B','C'], index=name)
df2 = pd.DataFrame(np.random.randn(8, 1), columns=['D'], index=name)
df3 = pd.DataFrame(np.random.randn(8, 2), columns=['E','F'], index=name)
df = df1.join(df2)
df = df.join(df3)
# If you have a 'Name' column that is not the index of your dataframe,
# one can set this column to be the index
# 1) Create a column 'Name' based on the previous index
df1['Name'] = df1.index
# 1) Select the index from column 'Name'
df1 = df1.set_index('Name')
# If indexes are different, one may have to play with parameter how
gf1 = pd.DataFrame(np.random.randn(8, 3), columns=['A','B','C'], index=range(8))
gf2 = pd.DataFrame(np.random.randn(8, 1), columns=['D'], index=range(2,10))
gf3 = pd.DataFrame(np.random.randn(8, 2), columns=['E','F'], index=range(4,12))
gf = gf1.join(gf2, how='outer')
gf = gf.join(gf3, how='outer')
dfs = [df1, df2, df3]
dfs = [df.set_index('name') for df in dfs]
# cant not run if index not unique
dfs = pd.concat(dfs, join='outer', axis = 1)
functools.reduce
dfs = [df1, df2, df3, df4]
# still run with index not unique
import functools as ft
df_final = ft.reduce(lambda left, right: pd.merge(left, right, on='name', how = 'outer'), dfs)
join
# cant not run if index not unique
dfs = [df1, df2, df3]
dfs = [df.set_index('name') for df in dfs]
dfs[0].join(dfs[1:], how = 'outer')