大数据索引
體會(huì)代碼,有二種方式。第一種是初始化索引,第二種是在第一種建立好索引的基礎(chǔ)上使用的移動(dòng)文件指針的方式,減少讀入內(nèi)存數(shù)據(jù)的方式移動(dòng)指針。詳見(jiàn)下面代碼:
#define _CRT_SECURE_NO_WARNINGS #include<stdio.h> #include<stdlib.h> #include<string.h> #include<Windows.h>#define N 84357484//讀取大數(shù)據(jù)有多少行(不知道行數(shù),無(wú)法開(kāi)辟內(nèi)存空間) //把索引讀入到 int a[n] 在堆上 //寫(xiě)入到文件 //索引文件載入到內(nèi)存 //隨機(jī)讀char *path = "E:\\qq.txt"; char *index = "E:\\qqindex.txt";struct index //索引的數(shù)據(jù)結(jié)構(gòu) {int *pindex; //每行的首地址int length; //行數(shù) }allindex;int getLine(char *path) {int line = 0;FILE *pfr = fopen(path, "rb");if (pfr == NULL){printf("文件打開(kāi)失敗!\n");return -1;}else{while (!feof(pfr)){char str[256] = { 0 };fgets(str, 256, pfr);line++;}fclose(pfr);}return line; }//初始化數(shù)據(jù) void initindex(char *path) {printf("索引數(shù)組開(kāi)始分配...\n");allindex.length = getLine(path);allindex.pindex = calloc(N, sizeof(int)); //開(kāi)辟內(nèi)存空間printf("索引數(shù)組完成分配。\n");printf("開(kāi)始讀取...\n");FILE *pfr = fopen(path, "rb");FILE *pfw = fopen(index, "wb"); //索引文件if (pfr == NULL || pfw == NULL){printf("文件打開(kāi)失敗!\n");return;}else{int alllength = 0;int i = 0;while (!feof(pfr)){char str[256] = { 0 };fgets(str, 256, pfr);//記錄每行數(shù)據(jù)所占用的長(zhǎng)度,方便后面指針查詢的跳轉(zhuǎn)allindex.pindex[i] = alllength; int length = strlen(str);alllength += length;i++;}fclose(pfr);}printf("結(jié)束讀取...\n");//把索引寫(xiě)入到文件中printf("索引寫(xiě)入...\n");fwrite(allindex.pindex, sizeof(int), allindex.length, pfw);fclose(pfw);printf("索引寫(xiě)入結(jié)束。\n");//釋放內(nèi)存//free(allindex.pindex);/*printf("開(kāi)始讀取...\n");FILE *pfr1 = fopen("E:\\qqindex.txt", "rb");fread(allindex.pindex, sizeof(int), allindex.length, pfr1);fclose(pfr1);printf("結(jié)束讀取...\n");*/ }//快速讀取,就是建立好索引文件后,直接讀取索引文件 void qucik(char *path) {printf("索引數(shù)組開(kāi)始分配...\n");allindex.length = getLine(path);allindex.pindex = calloc(N, sizeof(int)); //開(kāi)辟內(nèi)存空間printf("索引數(shù)組完成分配。\n");printf("開(kāi)始讀取...\n");FILE *pfr1 = fopen("E:\\qqindex.txt", "rb");fread(allindex.pindex, sizeof(int), allindex.length, pfr1);fclose(pfr1);printf("結(jié)束讀取...\n"); }void main1() {//int line = getLine(path);//printf("%d\n", line);initindex(path); //初始化//qucik(path); //這是在上面的 初始化 后有了索引文件后可以這樣快速執(zhí)行。FILE *pfr = fopen(path, "rb");while (1){printf("\n請(qǐng)輸入要讀取的行數(shù):");int num = 0;scanf("%d", &num);fseek(pfr, allindex.pindex[num], SEEK_SET);char str[256] = { 0 };fgets(str, 256, pfr);printf("%s\n", str);}fclose(pfr);system("pause"); }// 因?yàn)樗饕募4娴氖莍nt 數(shù)組,而數(shù)據(jù)排列是線性的,即每個(gè)數(shù)據(jù)所占的空間大小都一樣。 //這時(shí)可以按指針移動(dòng),來(lái)跳到索引處(因?yàn)槊總€(gè)索行保存的每行數(shù)據(jù)的首地址),這時(shí)讀出 //索引的值,然后再移動(dòng)數(shù)據(jù)指針到相應(yīng)的文件處,讀出其中的 數(shù)據(jù),這樣可以使用較小的 //內(nèi)存,可以完成對(duì)大數(shù)據(jù)的查詢。不過(guò)首先要建立好索引。void main() {FILE *pf1 = fopen(path, "rb");//數(shù)據(jù)文件FILE *pf2 = fopen(index, "rb");//索引文件if (pf1 == NULL || pf2 == NULL){printf("文件打開(kāi)失敗!\n");return;}while (1){printf("\n請(qǐng)輸入要讀取的行數(shù):");int num = 0;scanf("%d", &num);int index =0;fseek(pf2,num*sizeof(int) , SEEK_SET); //移動(dòng)指針在索引文件中查詢fread(&index, sizeof(int), 1, pf2); //讀出索引文件的值。fseek(pf1, index, SEEK_SET);char str[256] = { 0 };fgets(str, 256, pf1);printf("%s\n", str);}fclose(pf1);fclose(pf2); }總結(jié)
- 上一篇: package.json字段详解
- 下一篇: 任正非首谈接班人制度