A股数据day级前复权数据下载与存储
存儲為3個文件夾:大盤數據、股票數據與概括文件
數據獲取來源:股票數據來源于R中的WindR;大盤數據來源于python中的tushare(wind中指數似乎只有中證板塊)
設置數據文件夾,代碼中只需修改更新截止日期與工作路徑,后面創建文件都無需手動操作
文件夾中文件不能隨意變動,標紅代碼部分需要自己修改
股票數據中變量分別為股票代碼、股票簡稱、日期、時間、開高低收(乘10000)、交易量、交易額、轉手率(基于自由流通股本)、自由流通股本
初次獲取數據
1、大盤指數數據(399001, 399005, 399006, 399300,999999)
## 初始設置
os.chdir("C:/Users/Administrator.USER-20161208UW/Desktop/數據") #修改當前工作目錄
start_date = "2009-01-01"
end_date = "2017-12-20"
new_date = end_date.replace("-", "")
## 創建新文件夾
os.mkdir("ts大盤數據")
os.mkdir("ts大盤數據/大盤數據"+new_date)
## 概括信息
stock_info = pd.DataFrame({"stock.code":["399001", "399005", "399006", "399300", "999999"] ,
"stock.name":["深證成指", "中小板指", "創業板指", "滬深300", "上證指數"],
"type":["SZ", "SZ", "SZ", "SZ", "SH"]})
stock_info.to_csv("ts大盤數據/大盤數據"+new_date+"/stock_info.csv", index=False)
# 獲取大盤信息
dapanzhishu1 = ["399001", "399005", "399006", "399300", "000001"]
dapanzhishu2 = ["399001", "399005", "399006", "399300", "999999"]
wind_code = ["399001.SZ", "399005SZ", "399006SZ", "399300SZ", "999999SH"]
name = ["深證成指", "中小板指", "創業板指", "滬深300", "上證指數"]
for i in list(range(5)):
df = ts.get_h_data(dapanzhishu1[i], start=start_date, end=end_date, index=True, pause=4)
df.sort_index(inplace=True)
df[["open", "high", "close", "low"]] = df[["open", "high", "close", "low"]]*10000
df["date"] = df.index
df["date"] = df["date"].astype(str).apply(lambda x:x.replace('-', ''))
df.columns = ['open','high', 'close', 'low', 'volumw', 'turover', 'date']
df["name"] = name[i]
df["wind_code"] = wind_code[i]
df["time"] = 151500000
df["turn"] = 0
df["free_turn"] = 0
df = df[["wind_code", "name", "date", "time", "open", "high", 'low', 'close', 'volumw', 'turover', "turn", "free_turn"]]
df.to_csv("ts大盤數據/大盤數據"+new_date+"/"+dapanzhishu2[i]+".csv", index=False)
2、A股股票數據與概括文件
## Rlibrary(WindR) library(xlsx) library(data.table) library(magrittr) library(tcltk2) setwd("C:/Users/~~~/Desktop/數據")new.date <- "20171220" start.date <- "20090101"w.start() end.date <- paste(substr(new.date, 1, 4), substr(new.date, 5, 6), substr(new.date, 7, 8), sep = "-") start.date <- paste(substr(start.date, 1, 4), substr(start.date, 5, 6), substr(start.date, 7, 8), sep = "-") ## 摘取當日在市股票代碼 stock.code.df <- w.wset('sectorconstituent', paste0("date=", end.date, ";sectorid=a001010100000000")) if(stock.code.df$ErrorCode == 0){stock.code.sh.sz <- stock.code.df$Data$wind_code }else{print(paste0("獲取數據出錯,錯誤代碼", stock.code.df$ErrorCode)) } new.stock.code <- substr(stock.code.sh.sz, 1, 6) # str(stock.code.sh.sz)## 創建新的文件夾 dir.create("概括文件") dir.create("股票數據") dir.create(paste0("股票數據/股票數據", new.date))## 補全stock.code的開始及截至日期及其他信息CSV文件 general.information <- data.frame(array(dim=c(length(stock.code.sh.sz), 5))) colnames(general.information) <- c("stock.code", "stock.name", "type", "starttime", "endtime")## 設置進度條 pb <- tkProgressBar("進度", "已完成 %", 0, 100) ## 從WindR獲取數據 ## 未檢查數據是否存在異常日期(大盤中沒有的交易日) w.start() # i <- 1 for(i in 1:length(stock.code.sh.sz)){wind.data <- w.wsd(stock.code.sh.sz[i], "trade_code, sec_name, open, high, low, close, volume, amt, free_turn,free_float_shares",start.date, end.date, "unit=1;PriceAdj=F")if(wind.data$ErrorCode == 0){wind.df <- data.frame(array(dim=c(nrow(wind.data$Data), 12)))colnames(wind.df) <- c("wind_code", "name", "date", "time", "open", "high", "low", "close", "volumw", "turover", "free_turn", "free_float_shares")wind.df[, 1] <- wind.data$Codewind.df[, 2] <- wind.data$Data$SEC_NAMEwind.df[, 3] <- gsub("-", "", wind.data$Data$DATETIME)wind.df[, 4] <- 151500000wind.df[, 5:8] <- wind.data$Data[4:7] * 10000wind.df[, 9:12] <- wind.data$Data[8:11]wind.df <- wind.df[!(is.na(wind.df$open)), ]if(nrow(wind.df) == 0){print(c(i, stock.code[i]))}else{if(any(is.na(wind.df))){print(paste(stock.code.sh.sz[i], "數據出錯(數據中仍有NA)"))}else{if(any(wind.df[, 5:8] == 0)){print(paste(stock.code.sh.sz[i], "數據出錯(數據中開高低收存在0)"))}else{if(any(table(wind.df$date) > 1)){print(paste(stock.code.sh.sz[i], "數據出錯(數據中存在日期相同)"))}else{if(any(wind.df$date != sort(wind.df$date))){print(paste(stock.code.sh.sz[i], "數據出錯(數據中日期順序不對)"))}else{general.information[i, 1] <- substr(stock.code.sh.sz[i], 1, 6)general.information[i, 2] <- wind.df$name[1]general.information[i, 3] <- substr(stock.code.sh.sz[i], 8, 9)general.information[i, 4] <- wind.df[1, 3]general.information[i, 5] <- wind.df[nrow(wind.df), 3]write.csv(wind.df, paste("股票數據/股票數據", new.date, "/", new.stock.code[i], ".csv", sep=""), row.names = FALSE)}}}}}}else{print(paste0(stock.code.sh.sz[i], "股票獲取數據出錯,錯誤代碼:", wind.data$ErrorCode))break}info <- sprintf("已完成 %d%%", round(i*100/length(stock.code.sh.sz)))setTkProgressBar(pb, i*100/length(stock.code.sh.sz), sprintf("進度 (%s)", info), info) }## 關閉進度條 close(pb)## 總概括文件中無NA時輸出 if(all(!(is.na(general.information)))){write.xlsx(general.information, paste0("概括文件/概括文件", new.date, ".xlsx"), row.names = FALSE) }else{print("總概況文件中存在NA,需查驗") }
3、文件格式
(1)一級
(2)二級
?
?(3)三級
?
?
轉載于:https://www.cnblogs.com/lj0019/p/8093943.html
總結
以上是生活随笔為你收集整理的A股数据day级前复权数据下载与存储的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 修复exe文件关联
- 下一篇: 中国环境检测市场运营现状分析与投资规划研