importpandasaspdimportnumpyasnpimportmatplotlib.pyplotaspltimportseabornassns# Create dataframedf=pd.DataFrame()n=100# Make individualsdf['Individual']=np.random.randint(low=20,high=60,size=n)# Delete some individual valuesdf=df.mask(np.random.random(df.shape)<.1)# Make groupsdf['Group']=np.random.randint(low=1,high=4,size=n)# Visualize missing data with heatmap (hat tip to Jose Portilla)sns.heatmap(df.isnull(),yticklabels=False,cbar=False,cmap='plasma')# Provide average based on group of missing numbersdefget_feature_mean(row,feature,index):ifrow.index[index]==feature:
ifnp.isnan(row[row.index[index]]):returndf.loc[df['Group']==row.Group,feature].mean().round(0)else:
returnrow[row.index[index]]feature='Individual'df[feature]=df.apply(get_feature_mean,args=(feature,df.columns.get_loc(feature)),axis=1)