码农技术炒股之路——抓取日线数据、计算均线和除权数据
? ? ? ? 日線數(shù)據(jù)是股票每日收盤后的信息。這塊數(shù)據(jù)不用實時抓取,所以并不占用寶貴的交易時間的資源。于是我們抓取完數(shù)據(jù)后直接往切片后的數(shù)據(jù)庫中保存。(轉(zhuǎn)載請指明出于breaksoftware的csdn博客)
抓取日線數(shù)據(jù)
? ? ? ? 我們先要獲取今天有交易信息的股票代碼。因為存在股票停牌的情況,所以不需要這類股票信息
def _get_all_share_ids(self):date_info = time.strftime('%Y_%m_%d')trade_table_name = "trade_info_%s" % (date_info)share_ids = fetch_data.get_data(fetch_data.select_db("daily_temp", trade_table_name, ["share_id"],{}, pre = "distinct"))return share_ids
? ? ? ? 然后通過股票ID從股票基本信息表中獲取股票代碼和市場類型等數(shù)據(jù)
def _get_all_share_ids_market_type(self):share_ids = self._get_all_share_ids()ids = []for share_id in share_ids:ids.append(share_id[0])share_ids = fetch_data.get_data(fetch_data.select_db("stock_db", "share_base_info", ["share_id", "market_type"],{"share_id":[ids, "in"]}))return share_ids
? ? ? ? 然后就是抓取和保存數(shù)據(jù)
class update_stock_daily_info(job_base):def __init__(self):passdef run(self):share_id_market_type = self._get_all_share_ids_market_type()for id_market_type in share_id_market_type:share_id = id_market_type[0]market_type = id_market_type[1]self._query_save_data(share_id, market_type) LOG_INFO("run update_stock_daily_info")
? ? ? ? 這次我們數(shù)據(jù)從網(wǎng)易抓取。這兒要非常感謝網(wǎng)易,它提供一個通過指定起始和截止日期的接口拉取歷史日線數(shù)據(jù)。如果起始和截止選擇今天,則拉取的是今天的數(shù)據(jù)。
def _get_data(self, market_type, id, start_time, end_time):url_format = """http://quotes.money.163.com/service/chddata.html?code=%d%s&start=%s&end=%s&fields=TCLOSE;HIGH;LOW;TOPEN;LCLOSE;PCHG;TURNOVER;VOTURNOVER;VATURNOVER"""url = url_format % (market_type, id, start_time, end_time)res = fetch_data.get_data(fetch_data.query_http(url))#res = res.decode("gbk").encode("utf-8")return res
? ? ? ? 最開始時,我們是一條數(shù)據(jù)都沒有的,于是我們選擇從1990年1月1日開始。之后我們有數(shù)據(jù)了,則從有數(shù)據(jù)的最后一天開始算起。
def _get_start_time(self, share_id, table_name):stock_conn_manager_obj = stock_conn_manager()conn_name = stock_conn_manager_obj.get_conn_name(share_id)last_time = fetch_data.get_data(fetch_data.select_db(conn_name, table_name, ["time"], {}, extend="order by time desc limit 1"))if len(last_time) > 0:last_day = last_time[0][0]tz = pytz.timezone('Asia/Shanghai')last_day_obj = datetime.datetime.fromtimestamp(last_day, tz)while True:next_day_obj = last_day_obj + datetime.timedelta(days = 1)if next_day_obj.weekday() < 5:breaklast_day_obj = next_day_objtime_str = next_day_obj.strftime("%Y%m%d")else:time_str = "19900101"return time.mktime(time.strptime(time_str, '%Y%m%d'))
? ? ? ? 獲取一個區(qū)間的數(shù)據(jù)后,我們通過正則表達式對結(jié)果進行拆分
def _filter_data(self, data):data = data.replace("None", "0")filter_data = fetch_data.get_data(fetch_data.regular_split("quotes_money_163", data))if len(filter_data) > 0:del filter_data[0]useful_data = []for item in filter_data:if int(item[-2]) == 0:continuetime_str = item[0]time_int = time.mktime(time.strptime(time_str,'%Y-%m-%d'))item.insert(0, time_int)del item[2]del item[2]useful_data.append(item)return useful_data
? ? ? ? 最后將數(shù)據(jù)保存到對應(yīng)的表中
def _save_data(self, share_id, table_name, data):into_db_columns = ["time","time_str","today_close","today_high","today_low","today_open","yesterday_close","pchg","turnover_rate","volume","turnover"]columns_count = len(into_db_columns)for item in data:if len(item) != columns_count:LOG_INFO("%s length is not match for column length %d" %(str(item), columns_count))continuedel itemif 0 == len(data):returnstock_conn_manager_obj = stock_conn_manager()conn = stock_conn_manager_obj.get_conn(share_id)conn.insert_data(table_name, into_db_columns, data)
計算均線數(shù)據(jù)
? ? ? ? 均線數(shù)據(jù)按類型分可以分為成交量均線和價格均線。按時間分可以分為5日、10日、20日、30日、60日、90日、120日、180日和360日均線。
? ? ? ? 為了方便計算,我引入了talib庫
pip install TA-Lib -i http://pypi.douban.com/simple
? ? ? ? 首先獲取所有股票代碼以便之后枚舉
class update_stock_daily_average_info(job_base):def __init__(self):passdef run(self):share_ids = self._get_all_share_ids()for share_id_item in share_ids:share_id = share_id_item[0]self._update_average(share_id) LOG_INFO("run update_stock_daily_average_info")def _get_all_share_ids(self):date_info = time.strftime('%Y_%m_%d')trade_table_name = "trade_info_%s" % (date_info)share_ids = fetch_data.get_data(fetch_data.select_db("daily_temp", trade_table_name, ["share_id"],{}, pre = "distinct"))return share_ids
? ? ? ? 然后查詢每支股票最后一次計算均線的日期。判斷規(guī)則就是查看價格5日均線值是否為0。因為均線計算量非常大,所以我們不能野蠻的全部重算。每次都要基于上次計算成果進行增量計算。
def _get_ma_empty_start_time(self, share_id, table_name):stock_conn_manager_obj = stock_conn_manager()conn_name = stock_conn_manager_obj.get_conn_name(share_id)last_time = fetch_data.get_data(fetch_data.select_db(conn_name, table_name, ["time"], {"close_ma5":[0, "="]}, extend="order by time asc limit 1"))if len(last_time) > 0:last_day = last_time[0][0]tz = pytz.timezone('Asia/Shanghai')last_day_obj = datetime.datetime.fromtimestamp(last_day, tz)time_str = last_day_obj.strftime("%Y%m%d")return time.mktime(time.strptime(time_str, '%Y%m%d'))else:return 0
? ? ? ? 因為我們代碼中最多分析180日均線數(shù)據(jù),所以日期要從上面函數(shù)得到的日前前推180日;如果之前沒有180日數(shù)據(jù),則返回最早的那天。如果是新股,則返回當(dāng)日。
def _get_start_time(self, share_id, table_name, ma_empty_start_time):stock_conn_manager_obj = stock_conn_manager()conn_name = stock_conn_manager_obj.get_conn_name(share_id)last_time = fetch_data.get_data(fetch_data.select_db(conn_name, table_name, ["time"], {"time":[ma_empty_start_time, "<="]}, extend="order by time desc limit 180"))if len(last_time) > 0:last_day = last_time[-1][0]tz = pytz.timezone('Asia/Shanghai')last_day_obj = datetime.datetime.fromtimestamp(last_day, tz)time_str = last_day_obj.strftime("%Y%m%d")return time.mktime(time.strptime(time_str, '%Y%m%d'))else:return ma_empty_start_time
? ? ? ? 下一步就是計算各個日期的均值
def _get_ma_data(self, ori_data, periods):ret_data = {}float_data = [float(x) for x in ori_data]for period in periods:data = talib.MA(numpy.array(float_data), timeperiod = period)data_list = data.tolist()data_list = self._filter_data(data_list)ret_data["%d" % period] = data_listreturn ret_data
? ? ? ? 然后將計算結(jié)果保存到數(shù)組中并保存
def _calc_average_data(self, share_id, table_name):ma_empty_start_time_int = self._get_ma_empty_start_time(share_id, table_name)if ma_empty_start_time_int == 0:return []start_time_int = self._get_start_time(share_id, table_name, ma_empty_start_time_int)stock_info = self._get_close_volume(share_id, table_name, start_time_int)periods = [5, 10, 20, 30, 60, 90, 120, 150, 180]#periods = [90, 180]close_data = self._get_ma_data(stock_info["close"], periods)volume_data = self._get_ma_data(stock_info["volume"], periods)if len(stock_info["time"]) == len(close_data["180"]) and len(close_data["180"]) == len(volume_data["180"]):passelse:LOG_WARNING("calc %s daily average error" % share_id)returninfos = []data_len = len(stock_info["time"])for index in range(data_len):info = {}time_int = stock_info["time"][index]if time_int < ma_empty_start_time_int:continueinfo["time"] = time_intfor period in periods:info["close_ma%s" % period] = close_data["%s" % period][index]info["volume_ma%s" % period] = volume_data["%s" % period][index]infos.append(info)return infosdef _filter_data(self, data):for index in range(len(data)):if math.isnan(data[index]):data[index] = 0.01else:breakreturn datadef _save_data(self, share_id, table_name, data):if len(data) < 2:return stock_conn_manager_obj = stock_conn_manager()conn = stock_conn_manager_obj.get_conn(share_id)conn.update(table_name, data, ["time"])
計算除權(quán)后均線數(shù)據(jù)
? ? ? ? 之前算的那些均值理論上來說是沒什么用的!因為沒有除權(quán)!這是我在對比我的數(shù)據(jù)和同花順的數(shù)據(jù)之后得出的。于是只能再改改。
? ? ? ? 基本思路是要計算一個因子,因子=前一日數(shù)據(jù)中收盤價/今日數(shù)據(jù)中昨日收盤價。然后把除權(quán)日之前的價格都“乘以”該因子得出向后復(fù)權(quán)的價格,相應(yīng)的把除權(quán)日之前的成交量都“除以”該因子得出向后復(fù)權(quán)的成交量。這樣就會導(dǎo)致整個表進行一次更新(從后向前)。
? ? ? ? 有意思的是同花順將成交量也“乘以”該因子,其實這個算法是錯誤的。舉個例子,比如昨日股票收盤10元,成交量100股,則成交金額是1000元。今天除權(quán),于是拉取數(shù)據(jù)中昨日的收盤價是5元。這樣相當(dāng)于單股價值縮水一半。那么因子是5/10=0.5。那么向后復(fù)權(quán)計算,昨日的股票收盤價是10*0,5=5元。成交量應(yīng)該是100/0,5=200股。這樣昨日的成交金額是5*200=1000。但是同花順的算法昨日成交量100*0.5=50股,這明顯是錯誤的。
? ? ? ? 最后貼上向后復(fù)權(quán)的算法
def _dividend_ori_data(self, share_id, from_table, to_table, start_time, compare = ">", yesterday_close = 0):ori_data = self._get_daily_info(share_id, from_table, start_time, compare)if 0 == len(ori_data):return 0if ori_data[0][6] == yesterday_close:return 0ex_dividend_ori = []pre_div_value = 1for item in ori_data:if 0 == yesterday_close:ex_dividend_ori.append(item)yesterday_close = item[6]continueif len(ex_dividend_ori) > 0:yesterday_close = ex_dividend_ori[-1][6]ori_close = item[2]if ori_close == 0 or yesterday_close == 0:div_value = pre_div_valueelse:if yesterday_close == ori_close:ex_dividend_ori.append(item)continuediv_value = yesterday_close/ori_closepre_div_value = div_valueex_dividend_ori.append([item[0], item[1], item[2] * div_value, item[3] * div_value,item[4] * div_value,item[5] * div_value,item[6] * div_value,item[7],item[8],item[9] / div_value,item[10]])stock_conn_manager_obj = stock_conn_manager()conn = stock_conn_manager_obj.get_conn(share_id)if from_table != to_table:conn.insert_data(to_table, self._table_keys, ex_dividend_ori)else:for info_value in ex_dividend_ori:infos = {}for index in range(len(self._table_keys)):infos[self._table_keys[index]] = info_value[index]conn.insert_onduplicate(to_table, infos, ["time"])conn.insert_onduplicate(to_table, {"close_ma5":0, "time":ex_dividend_ori[-1][0]}, ["time"])last_yesterday_close = ex_dividend_ori[-1][6]return last_yesterday_close ? ? ? ?
總結(jié)
以上是生活随笔為你收集整理的码农技术炒股之路——抓取日线数据、计算均线和除权数据的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 码农技术炒股之路——实时交易信息、主力动
- 下一篇: 同步、异步、堵塞、非堵塞和函数调用及I/