我试着从
df
到
df2
我是按
review_meta_id, age_bin
ctr
从
sum(click_count)/ sum(impression_count)
In [69]: df
Out[69]:
review_meta_id age_month impression_count click_count age_bin
0 3 4 10 3 1
1 3 10 5 2 2
2 3 20 5 3 3
3 3 8 9 2 2
4 4 9 9 5 2
In [70]: df2
Out[70]:
review_meta_id ctr age_bin
0 3 0.300000 1
1 3 0.285714 2
2 3 0.600000 3
3 4 0.555556 2
import pandas as pd
bins = [0, 5, 15, 30]
labels = [1,2,3]
l = [dict(review_meta_id=3, age_month=4, impression_count=10, click_count=3), dict(review_meta_id=3, age_month=10, impression_count=5, click_count=2), dict(review_meta_id=3, age_month=20, impression_count=5, cli\
ck_count=3), dict(review_meta_id=3, age_month=8, impression_count=9, click_count=2), dict(review_meta_id=4, age_month=9, impression_count=9, click_count=5)]
df = pd.DataFrame(l)
df['age_bin'] = pd.cut(df['age_month'], bins=bins, labels=labels)
grouped = df.groupby(['review_meta_id', 'age_bin'])
data = []
for name, group in grouped:
ctr = group['click_count'].sum() / group['impression_count'].sum()
review_meta_id, age_bin = name
data.append(dict(review_meta_id=review_meta_id, ctr=ctr, age_bin=age_bin))
df2 = pd.DataFrame(data)