Pandas 缺失值处理

杨朝坤 发布于

Pandas笔记

缺失值处理

  • 中间空值填充:
import numpy as np
import pandas as pd

def fillna_between(df, value=0, method=None, axis=0, inplace=False, limit=None, downcast=None, **kwargs):
    if axis == 0:
        for c in df.isnull().columns:
            valued = ~np.isnan(df[c])
            if(len(df[c][valued]) >= 2):
                first = df[c][valued].index[0]
                last = df[c][valued].index[-1]
                df[c].loc[first: last].fillna(value=value,method=method,inplace=inplace,
                    limit=limit,downcast=downcast,**kwargs)
    else:
        for index, row in df.iterrows():
            valued = ~np.isnan(row)
            if(len(row[valued]) >= 2):
                first = row[valued].index[0]
                last = row[valued].index[-1]
                row.loc[first: last].fillna(value=value,method=method,inplace=inplace,
                    limit=limit,downcast=downcast,**kwargs)

df = pd.DataFrame([[np.nan, 2, np.nan, 0],
                    [3, 4, np.nan, 1],
                    [np.nan, np.nan, np.nan, 5],
                    [1, 3, np.nan, 4],
                    [np.nan, 3, np.nan, 4]],
                    columns=list('ABCD'))
fillna_between(df,inplace=True )