【算法竞赛学习】资金流入流出预测-挑战Baseline_时间序列规则
生活随笔
收集整理的這篇文章主要介紹了
【算法竞赛学习】资金流入流出预测-挑战Baseline_时间序列规则
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
賽題簡介
螞蟻金服擁有上億會員并且業務場景中每天都涉及大量的資金流入和流出,面對如此龐大的用戶群,資金管理壓力會非常大。在既保證資金流動性風險最小,又滿足日常業務運轉的情況下,精準地預測資金的流入流出情況變得尤為重要。此屆大賽以《資金流入流出預測》為題,期望參賽者能夠通過對例如余額寶用戶的申購贖回數據的把握,精準預測未來每日的資金流入流出情況。對貨幣基金而言,資金流入意味著申購行為,資金流出為贖回行為 。
賽題與數據
競賽中使用的數據主要包含四個部分,分別為用戶基本信息數據、用戶申購贖回數據、收益率表和銀行間拆借利率表。https://tianchi.aliyun.com/competition/entrance/231573/information
時間序列規則
import pandas as pd import sklearn as skr import numpy as np import datetime import matplotlib.pyplot as plt import seaborn as sns from dateutil.relativedelta import relativedelta # Load the balance data def load_data(path: str = 'user_balance_table.csv')->pd.DataFrame:data_balance = pd.read_csv(path)data_balance = add_timestamp(data_balance)return data_balance.reset_index(drop=True)# add tiemstamp to dataset def add_timestamp(data: pd.DataFrame, time_index: str = 'report_date')->pd.DataFrame:data_balance = data.copy()data_balance['date'] = pd.to_datetime(data_balance[time_index], format= "%Y%m%d")data_balance['day'] = data_balance['date'].dt.daydata_balance['month'] = data_balance['date'].dt.monthdata_balance['year'] = data_balance['date'].dt.yeardata_balance['week'] = data_balance['date'].dt.weekdata_balance['weekday'] = data_balance['date'].dt.weekdayreturn data_balance.reset_index(drop=True)# total amount def get_total_balance(data: pd.DataFrame, date: str = '2014-03-31')->pd.DataFrame:df_tmp = data.copy()df_tmp = df_tmp.groupby(['date'])['total_purchase_amt','total_redeem_amt'].sum()df_tmp.reset_index(inplace=True)return df_tmp[(df_tmp['date']>= date)].reset_index(drop=True)# Generate the test data def generate_test_data(data: pd.DataFrame)->pd.DataFrame:total_balance = data.copy()start = datetime.datetime(2014,9,1)testdata = []while start != datetime.datetime(2014,10,15):temp = [start, np.nan, np.nan]testdata.append(temp)start += datetime.timedelta(days = 1)testdata = pd.DataFrame(testdata)testdata.columns = total_balance.columnstotal_balance = pd.concat([total_balance, testdata], axis = 0)total_balance = total_balance.reset_index(drop=True)return total_balance.reset_index(drop=True)# Load user's information def load_user_information(path: str = 'user_profile_table.csv')->pd.DataFrame:return pd.read_csv(path) # 載入數據balance_data = load_data('Data/user_balance_table.csv') balance_data = add_timestamp(balance_data) total_balance = get_total_balance(balance_data, date = '2014-03-01') total_balance = generate_test_data(total_balance) total_balance = add_timestamp(total_balance, 'date') # 創建數據的深層拷貝data = total_balance.copy() # 定義生成時間序列規則預測結果的方法def generate_base(df: pd.DataFrame, month_index: int)->pd.DataFrame:# 選中固定時間段的數據集total_balance = df.copy()total_balance = total_balance[['date','total_purchase_amt','total_redeem_amt']]total_balance = total_balance[(total_balance['date'].dt.date >= datetime.date(2014,3,1)) & (total_balance['date'].dt.date < datetime.date(2014, month_index, 1))]# 加入時間戳total_balance['weekday'] = total_balance['date'].dt.weekdaytotal_balance['day'] = total_balance['date'].dt.daytotal_balance['week'] = total_balance['date'].dt.weektotal_balance['month'] = total_balance['date'].dt.month# 統計翌日因子mean_of_each_weekday = total_balance[['weekday']+['total_purchase_amt','total_redeem_amt']].groupby('weekday',as_index=False).mean()for name in ['total_purchase_amt','total_redeem_amt']:mean_of_each_weekday = mean_of_each_weekday.rename(columns={name: name+'_weekdaymean'})mean_of_each_weekday['total_purchase_amt_weekdaymean'] /= np.mean(total_balance['total_purchase_amt'])mean_of_each_weekday['total_redeem_amt_weekdaymean'] /= np.mean(total_balance['total_redeem_amt'])# 合并統計結果到原數據集total_balance = pd.merge(total_balance, mean_of_each_weekday, on='weekday', how='left')# 分別統計翌日在(1~31)號出現的頻次weekday_count = total_balance[['day','weekday','date']].groupby(['day','weekday'],as_index=False).count()weekday_count = pd.merge(weekday_count, mean_of_each_weekday, on='weekday')# 依據頻次對翌日因子進行加權,獲得日期因子weekday_count['total_purchase_amt_weekdaymean'] *= weekday_count['date'] / len(np.unique(total_balance['month']))weekday_count['total_redeem_amt_weekdaymean'] *= weekday_count['date'] / len(np.unique(total_balance['month']))day_rate = weekday_count.drop(['weekday','date'],axis=1).groupby('day',as_index=False).sum()# 將訓練集中所有日期的均值剔除日期殘差得到baseday_mean = total_balance[['day'] + ['total_purchase_amt','total_redeem_amt']].groupby('day',as_index=False).mean()day_pre = pd.merge(day_mean, day_rate, on='day', how='left')day_pre['total_purchase_amt'] /= day_pre['total_purchase_amt_weekdaymean']day_pre['total_redeem_amt'] /= day_pre['total_redeem_amt_weekdaymean']# 生成測試集數據for index, row in day_pre.iterrows():if month_index in (2,4,6,9) and row['day'] == 31:breakday_pre.loc[index, 'date'] = datetime.datetime(2014, month_index, int(row['day']))# 基于base與翌日因子獲得最后的預測結果day_pre['weekday'] = day_pre.date.dt.weekdayday_pre = day_pre[['date','weekday']+['total_purchase_amt','total_redeem_amt']]day_pre = pd.merge(day_pre, mean_of_each_weekday,on='weekday')day_pre['total_purchase_amt'] *= day_pre['total_purchase_amt_weekdaymean']day_pre['total_redeem_amt'] *= day_pre['total_redeem_amt_weekdaymean']day_pre = day_pre.sort_values('date')[['date']+['total_purchase_amt','total_redeem_amt']]return day_pre # 生成預測結果(以及殘差)base_list = [] for i in range(4, 10):base_list.append(generate_base(data, i).reset_index(drop=True))base = pd.concat(base_list).reset_index(drop=True) for i in ['total_purchase_amt','total_redeem_amt']:base = base.rename(columns={i: i+'_base'})data = pd.merge(data.reset_index(drop=True), base.reset_index(drop=True), on='date', how='left').reset_index(drop=True)data['purchase_residual'] = data['total_purchase_amt'] / data['total_purchase_amt_base']data['redeem_residual'] = data['total_redeem_amt'] / data['total_redeem_amt_base'] # 對結果表重命名data = data[['date','purchase_residual','redeem_residual','total_purchase_amt_base', 'total_redeem_amt_base']] for i in data.columns:if i == 'date':data[i] = data[i].astype(str)data[i] = data[i].str.replace('-','') data.columns = [['date'] + ['total_purchase_amt','total_redeem_amt'] + ['total_purchase_predicted_by_cycle','total_redeem_predicted_by_cycle'] ] # 保存預測結果到本地data.to_csv('Data/base.csv',index=False)總結
以上是生活随笔為你收集整理的【算法竞赛学习】资金流入流出预测-挑战Baseline_时间序列规则的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: win7下如何更新win10系统
- 下一篇: 基本概念(2)——make、ninja、