panda 函数-处理空值
生活随笔
收集整理的這篇文章主要介紹了
panda 函数-处理空值
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
今天這里談的函數,以后進行數據分析的時候會經常用到。
import?numpy?as?np
import?pandas?as?pd
from?pandas?import?DataFrame?,?Series
from?numpy?import?nan?as?NA
obj?=?Series(['c',?'a',?'d',?'a',?'a',?'b',?'b',?'c',?'c'])
uniques?=?obj.unique()
print("obj?is?\n",?obj)
print("obj.unique?is?\n?",?obj.unique())
print("uniques.sort()?is?\n",?uniques.sort())
print("obj.value_counts()?is?\n",?obj.value_counts())
print("pd.value_counts(obj.values,sort=False)?\n",?pd.value_counts(obj.values,?sort=False))
mask?=?obj.isin(['b'?,?'c'])
print("obj.isin(['b','c'])?\n",?obj.isin(['b'?,?'c']))
print("mask?=?obj.isin(['b','c'])")
print("obj[mask]?is?\n",?obj[mask])
data=?DataFrame(
{
'Qu1':[1,3,4,3,4],
? ? ? ? 'Qu2':[2,3,1,2,3],
? ? ? ? 'Qu3':[1,5,2,4,4]
}
)
print?("data?is?\n",data)
result?=?data.apply(pd.value_counts).fillna(0)
print("data.apply(pd.value_counts).fillna(0)\n?",?result)
print("計算一個series各值出現的頻率")
print("handling?the?missing?data?\n")
string_data?=?Series(['aardvark','artichoke',np.nan,'avocado'])
print("string_data?is?\n",?string_data)
print("string_data.isnull()?\n",string_data.isnull())
print("The?built-in?python?None?value?is?also?treated?as?NA?in?object?Arrays?\n")
print("string_data[0]=None\n")
string_data[0]=None
print("string_data.isnull()?\n?",string_data.isnull)
print("?NA?handling?methods?in?P143?Table?5-12")
data?=?Series([1,NA,3.5,NA,7])
data.dropna()
print("data?is?\n",data)
print("data.dropna()?is?\n",?data.dropna())
print("data[data.notnull()],\n",data[data.notnull()])
data?=?DataFrame([[1.,6.5,3.],[1.,NA,NA],[NA,NA,NA],[NA,6.5,3.]])
cleaned?=?data.dropna()
print("data?is?\n",data)
print("data.dropna()?is?\n",cleaned)
print("data.dropna(how='all')?is?\n",?data.dropna(how='all'))
print("passing?how=all?will?only?drop?rows?that?are?all?NA")
data[4]=NA
print("New?data?is?\n",?data)
print("data.dropna(axis=1,how='all')?\n",data.dropna(axis=1,how='all'))
print("按照columns?drop")
df=DataFrame(np.random.randn(7,3))
print("df?is?\n",df)
df.ix[:4,1]=NA
df.ix[:2,2]=NA
print("New?df?is?\n",df)
print("df.dropna(thresh=3)\n",df.dropna(thresh=3))
print("filling?in?the?missing?data")
print("df.fillna(0)?\n",df.fillna(0))
print("df.fillna({1:0.5,3:-1})?\n",df.fillna({1:0.5,3:-1}))
print("calling?fillna?with?a?dict?you?can?use?a?different?fill?value?for?each?columns")
_=df.fillna(0,inplace=True)
print("_=df.fillna(0,inplace=True)?\n",df)
df=DataFrame(np.random.randn(6,3))
print("DataFrame(np.random.randn(6,3))?\n",df)
df.ix[2:,1]?=?NA
df.ix[4:,2]?=?NA
print("df.ix[2:,1]?=?NA;?df.ix[4:,2]?=?NA?\n",df?)
print("df.fillna(method?=?'ffill')?\n",?df.fillna(method?=?'ffill'))
print("df.fillna(method?=?'ffill',limit?=2)?\n",df.fillna(method='ffill',limit?=?2))
data=?Series([1.,NA,3.5,NA,7])
print("data?is?\n",data)
print("data.fillna(data.mean())?\n",data.fillna(data.mean()))
print("fillna?function?arguments?on?P146?Table?5-13")
print("Hierarchical?indexing")
data?=?Series(np.random.randn(10),index=[['a','a','a','b','b','b','c','c','d','d'],[1,2,3,1,2,3,1,2,2,3]])
print("data?is?\n",data)
print("a?Series?with?multi-index")
print("data.index",data.index)
print("data['b']?\n",data['b'])
print("data['b':'c']?\n",data['b':'c'])
print("data.ix[['b','d']]?\n",data.ix[['b','d']])
print("data[:,2]?\n",data[:,2])
print("data.unstack()?\n",data.unstack())
print("data.unstack().stack()?\n?",data.unstack().stack())
print("data?frame")
frame?=?DataFrame(np.arange(12).reshape((4,3)),index=[['a','a','b','b'],[1,2,1,2]],columns=[['Ohio','Ohio','Colorado'],['Green','Red','Green']])
print("frame?is?\n",frame)
frame.index.names?=["key1","key2"]
frame.columns.names=["state","color"]
print("New?frame?is?\n",frame)
print("frame['Ohio']?\n",frame['Ohio'])
print("frame.swaplevel('key1','key2')?\n",?frame.swaplevel('key1','key2'))
print("frame.sortlevel(1)?\n",frame.sortlevel(1))
print("frame.swaplevel(0,1).sortlevel(0)\n",frame.swaplevel(0,1).sortlevel(0))
print("summary?statistics?by?level")
print("frame.sum(level='key2')?\n",frame.sum(level='key2'))
print("frame.sum(level='color',axis=1)?\n",frame.sum(level='color',axis?=?1))
print("Using?a?DataFrame's?columns")
frame?=?DataFrame({'a':range(7),'b':range(7,0,-1),'c':['one','one','one','two','two','two','two'],'d':[0,1,2,0,1,2,3]})
print("frame?is?\n",frame)
frame2=?frame.set_index(['c','d'])
print("creating?a?new?Dataframe?using?one?or?more?its?columns?as?the?index")
print("frame.set_index(['c','d'])?\n",frame2)
frame.set_index(['c','d'],drop=False)
print("frame.set_index(['c','d'],drop?=False)?\n",frame.set_index(['c','d'],drop=False))
print("reset_index?does?the?opposite?of?set_index,the?hierarchical?index?levels?are?moved?into?the?columns")
print("frame2.reset_index()?\n",frame2.reset_index())
http://www.xuebuyuan.com/2180572.html
轉載于:https://www.cnblogs.com/wutongyuhou/p/6888148.html
總結
以上是生活随笔為你收集整理的panda 函数-处理空值的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 访问Storm ui界面,出现Nimbu
- 下一篇: 堆区和栈区