一、缺失值处理
import pandas as pd
1、判断是否存在缺失值
data=pd.read_csv('./1.csv')
pd.isnull(data).any()
pd.notnull(data).all()
2、缺失值是nan
data.dropna(inplace=True)
data['r'].fillna(data['r'].mean(),inplace=True)
3、缺失值不是nan
data_new=data.replace(to_replace='?',value=np.nan)
data_new.dropna(inplace=True)
二、one-hot
sr=pd.Series([1,2,3,4,5,9,0],index=['x','y','c','v','b','n','m'])
pd.qcut(sr,3)
pd.get_dummies(sr,prefix='这是前缀')
三、合并
data1=pd.concat([data1,data2],axis=0)
data2=pd.merge(left,right,how='inner',on=['key1','key2'])
四、交叉表
data=pd.crosstab(values1,values2)
data.div(data.sum(axis=1),axis=0)
五、分组与聚合
df.groupby(by='color')['price1'].max()