In [116]: frame = DataFrame(np.random.randn(4, 3), columns=list('bde'), index=['Utah', 'Ohio', 'Texas', 'Oregon'])
In [117]: frame
Out[117]:
b d e
Utah -0.029638 1.081563 1.280300
Ohio 0.647747 0.831136 -1.549481
Texas 0.513416 -0.884417 0.195343
Oregon -0.485454 -0.477388 -0.309548
In [118]: f = lambda x: x.max() - x.min()
In [119]: frame.apply(f)
Out[119]:
b 1.133201
d 1.965980
e 2.829781
dtype: float64
In [120]: format = lambda x: '%.2f' % x
In [121]: frame.applymap(format)
Out[121]:
b d e
Utah -0.03 1.08 1.28
Ohio 0.65 0.83 -1.55
Texas 0.51 -0.88 0.20
Oregon -0.49 -0.48 -0.31
命名为applymap的原因是Series有一个map方法用于应用一个按元素划分的函数:
In [122]: frame['e'].map(format)
Out[122]:
Utah 1.28
Ohio -1.55
Texas 0.20
Oregon -0.31
Name: e, dtype: object
frame.apply(np.sqrt)
Out[102]:
b d e
Utah NaN 1.435159 NaN
Ohio 1.098164 0.510594 0.729748
Texas NaN 0.456436 0.697337
Oregon 0.359079 NaN NaN
frame.applymap(np.sqrt)
Out[103]:
b d e
Utah NaN 1.435159 NaN
Ohio 1.098164 0.510594 0.729748
Texas NaN 0.456436 0.697337
Oregon 0.359079 NaN NaN
majority = lambda x : x > 17
df2['legal_drinker'] = df2['age'].apply(majority)
def times10(x):
if type(x) is int:
x *= 10
return x
df2.applymap(times10)
df = DataFrame(1, columns=list('abc'),
index=list('1234'))
print(df)
f = lambda x: np.log(x)
print(df.applymap(f)) # apply to the whole dataframe
print(np.log(df)) # applied to the whole dataframe
print(df.applymap(np.sum)) # reducing can be applied for rows only
# apply can take different options (vs. applymap cannot)
print(df.apply(f)) # same as applymap
print(df.apply(sum, axis=1)) # reducing example
print(df.apply(np.log, axis=1)) # cannot reduce
print(df.apply(lambda x: [1, 2, 3], axis=1, result_type='expand')) # expand result