用户流失特征
WITH ps AS (SELECT DATE(timestamp )AS dt, army_num, mid, army_id, left_num, kingdom_id,timestamp, player_id,platform_idFROM `heidao-market.mafia1_ods.game_log_army`where DATE(timestamp) >= '{}' and DATE(timestamp) <= '{}'and player_id in {} )
--大R提取最近七天的數據SELECT--兵源數量變化 SUM(army_num) AS sum_change, --剩余兵源平均值AVG(left_num ) AS m_left,--兵源數量變化除以剩余兵源平均值 可以理解為戰斗力的波動率SUM(army_num)/AVG(left_num +1.123467 ) AS ratio,--計算兵源降低的次數,就是在這七天他被打了幾次COUNTIF( army_num<=0) AS ct_m,--計算兵源增加的次數,就是在這七天造兵的次數COUNTIF(army_num>0) AS ct_p,--計算兵源降低的次數除以(計算兵源降低的次數+計算兵源增加的次數)這個數值一定程度上反映了被打后--繼續玩游戲的欲望COUNTIF( army_num<=0)/( COUNTIF( army_num<=0)+COUNTIF(army_num>0)) as m_p,--兵源數量變化的最大值max( army_num) as max_change,--兵源數量變化的最小值min( army_num) as min_change,--兵源變化的振幅max( army_num)/( min( army_num)+0.2345) as mm_ratio,player_id,dtFROM psGROUP BY player_id,dt分別計算登錄時間長的移動平均線
ma30= talib.MA(df2.values, timeperiod=30, matype=0)
ma20= talib.MA(df2.values, timeperiod=20, matype=0)
ma10= talib.MA(df2.values, timeperiod=10, matype=0)
ma5= talib.MA(df2.values, timeperiod=5, matype=0)
temp1=temp[['sum_change', 'm_left', 'ratio', 'ct_m', 'ct_p', 'm_p', 'max_change','min_change', 'mm_ratio', ]]
mmax=temp1.max()
mmin=temp1.min()
mean=temp1.mean()one=(mmax-mean)/(mmax-mmin+1.012345)ct_p_min 0.139468 ct_p_min 是源增加的次數最小值
m_p_max 0.109516 繼續玩游戲的欲望 的最大值
max_change_min 0.109302 兵源數量變化的最大值
ratio_mean 0.103400 兵源數量變化除以剩余兵源平均值
ct_p_mean 0.041459
sum_change_mean 0.020636
ratio_min 0.018449
mm_ratio_min 0.016809
m_p_mean 0.009128
ma610_88 0.008878
max_change_mean 0.008340
ma610_90 0.008145
max_change_max 0.007979
ma610_20 0.007847
m_p_mean 0.007339
ma610_46 0.006822
ma610_92 0.006645
ratio_max 0.006498
ma605_58 0.006170
總結