不要在panda中使用循环,它们与矢量化的解决方案相比很慢-通过
astype
True, False
转换为
1, 0
:
dataframe = pd.DataFrame({'A':list('abcdef'),
'B':[4,5,4,5,5,4],
'C':[7,8,9,4,2,3],
'D':[1,3,5,7,1,0],
'E':list('aaabbb'),
'F':[5,3,6,9,2,4],
'G':[5,3,6,9,2,4]
})
a = 5
dataframe['new'] = (dataframe.iloc[:,5] >= a).astype(int)
print (dataframe)
A B C D E F G new
0 a 4 7 1 a 5 5 1
1 b 5 8 3 a 3 3 0
2 c 4 9 5 a 6 6 1
3 d 5 4 7 b 9 9 1
4 e 5 2 1 b 2 2 0
5 f 4 3 0 b 4 4 0
如果要覆盖第7列:
a = 5
dataframe.iloc[:,6] = (dataframe.iloc[:,5] >= a).astype(int)
print (dataframe)
A B C D E F G
0 a 4 7 1 a 5 1
1 b 5 8 3 a 3 0
2 c 4 9 5 a 6 1
3 d 5 4 7 b 9 1
4 e 5 2 1 b 2 0
5 f 4 3 0 b 4 0