MPEG原理分析及MPEG音频编码器的调试
一、程序設計的整體框架
主函數及注釋:
int main (int argc, char **argv) {typedef double SBS[2][3][SCALE_BLOCK][SBLIMIT];SBS *sb_sample;typedef double JSBS[3][SCALE_BLOCK][SBLIMIT];JSBS *j_sample;typedef double IN[2][HAN_SIZE];IN *win_que;typedef unsigned int SUB[2][3][SCALE_BLOCK][SBLIMIT];SUB *subband;frame_info frame; //頭信息、比特分配表、聲道數、子帶數等信息frame_header header; //頭信息的內容char original_file_name[MAX_NAME_SIZE]; //輸入文件名char encoded_file_name[MAX_NAME_SIZE]; //輸出文件名short **win_buf;static short buffer[2][1152];static unsigned int bit_alloc[2][SBLIMIT], scfsi[2][SBLIMIT];static unsigned int scalar[2][3][SBLIMIT], j_scale[3][SBLIMIT];static double smr[2][SBLIMIT], lgmin[2][SBLIMIT], max_sc[2][SBLIMIT];// FLOAT snr32[32];short sam[2][1344]; /* was [1056]; */int model, nch, error_protection;static unsigned int crc;int sb, ch, adb;unsigned long frameBits, sentBits = 0;unsigned long num_samples;int lg_frame;int i;/* Used to keep the SNR values for the fast/quick psy models */static FLOAT smrdef[2][32]; //各個子帶static int psycount = 0;extern int minimum;time_t start_time, end_time;int total_time;sb_sample = (SBS *) mem_alloc (sizeof (SBS), "sb_sample");j_sample = (JSBS *) mem_alloc (sizeof (JSBS), "j_sample");win_que = (IN *) mem_alloc (sizeof (IN), "Win_que");subband = (SUB *) mem_alloc (sizeof (SUB), "subband");win_buf = (short **) mem_alloc (sizeof (short *) * 2, "win_buf");/* clear buffers */memset ((char *) buffer, 0, sizeof (buffer));memset ((char *) bit_alloc, 0, sizeof (bit_alloc));memset ((char *) scalar, 0, sizeof (scalar));memset ((char *) j_scale, 0, sizeof (j_scale));memset ((char *) scfsi, 0, sizeof (scfsi));memset ((char *) smr, 0, sizeof (smr));memset ((char *) lgmin, 0, sizeof (lgmin));memset ((char *) max_sc, 0, sizeof (max_sc));//memset ((char *) snr32, 0, sizeof (snr32));memset ((char *) sam, 0, sizeof (sam));global_init (); //初始化header.extension = 0;frame.header = &header;frame.tab_num = -1; /* no table loaded */frame.alloc = NULL;header.version = MPEG_AUDIO_ID; /* Default: MPEG-1 */total_time = 0;time(&start_time); programName = argv[0];if (argc == 1) /* no command-line args */short_usage ();elseparse_args (argc, argv, &frame, &model, &num_samples, original_file_name,encoded_file_name);print_config (&frame, &model, original_file_name, encoded_file_name); //輸出配置信息到窗口中/* this will load the alloc tables and do some other stuff */hdr_to_frps (&frame); //根據頭信息來設定其他信息nch = frame.nch;error_protection = header.error_protection;while (get_audio (musicin, buffer, num_samples, nch, &header) > 0) {//獲取音頻信息if (glopts.verbosity > 1)if (++frameNum % 10 == 0)fprintf (stderr, "[%4u]\r", frameNum);fflush (stderr);win_buf[0] = &buffer[0][0];win_buf[1] = &buffer[1][0];adb = available_bits (&header, &glopts); //計算可用比特數lg_frame = adb / 8;if (header.dab_extension) {/* in 24 kHz we always have 4 bytes */if (header.sampling_frequency == 1)header.dab_extension = 4; /* You must have one frame in memory if you are in DAB mode */ /* in conformity of the norme ETS 300 401 http://www.etsi.org *//* see bitstream.c */if (frameNum == 1)minimum = lg_frame + MINIMUM;adb -= header.dab_extension * 8 + header.dab_length * 8 + 16;}{int gr, bl, ch;/* New polyphase filterCombines windowing and filtering. Ricardo Feb'03 */for( gr = 0; gr < 3; gr++ ) //每12個樣點一組for ( bl = 0; bl < 12; bl++ ) //每組12個for ( ch = 0; ch < nch; ch++ ) //聲道數次WindowFilterSubband( &buffer[ch][gr * 12 * 32 + 32 * bl], ch,&(*sb_sample)[ch][gr][bl][0] ); //多相濾波器組}#ifdef REFERENCECODE{/* Old code. left here for reference */int gr, bl, ch;for (gr = 0; gr < 3; gr++)for (bl = 0; bl < SCALE_BLOCK; bl++)for (ch = 0; ch < nch; ch++) {window_subband (&win_buf[ch], &(*win_que)[ch][0], ch);filter_subband (&(*win_que)[ch][0], &(*sb_sample)[ch][gr][bl][0]);}} #endif#ifdef NEWENCODEscalefactor_calc_new(*sb_sample, scalar, nch, frame.sblimit);find_sf_max (scalar, &frame, max_sc);if (frame.actual_mode == MPG_MD_JOINT_STEREO) {/* this way we calculate more mono than we need *//* but it is cheap */combine_LR_new (*sb_sample, *j_sample, frame.sblimit);scalefactor_calc_new (j_sample, &j_scale, 1, frame.sblimit);} #elsescale_factor_calc (*sb_sample, scalar, nch, frame.sblimit);pick_scale (scalar, &frame, max_sc);if (frame.actual_mode == MPG_MD_JOINT_STEREO) {/* this way we calculate more mono than we need *//* but it is cheap */combine_LR (*sb_sample, *j_sample, frame.sblimit);scale_factor_calc (j_sample, &j_scale, 1, frame.sblimit);} #endif//選擇合適的心理聲學模型if ((glopts.quickmode == TRUE) && (++psycount % glopts.quickcount != 0)) {/* We're using quick mode, so we're only calculating the model every'quickcount' frames. Otherwise, just copy the old ones across */for (ch = 0; ch < nch; ch++) {for (sb = 0; sb < SBLIMIT; sb++)smr[ch][sb] = smrdef[ch][sb];}} else {/* calculate the psymodel */switch (model) {case -1:psycho_n1 (smr, nch);break;case 0: /* Psy Model A */psycho_0 (smr, nch, scalar, (FLOAT) s_freq[header.version][header.sampling_frequency] * 1000); break;case 1:psycho_1 (buffer, max_sc, smr, &frame);break;case 2:for (ch = 0; ch < nch; ch++) {psycho_2 (&buffer[ch][0], &sam[ch][0], ch, &smr[ch][0], //snr32,(FLOAT) s_freq[header.version][header.sampling_frequency] *1000, &glopts);}break;case 3:/* Modified psy model 1 */psycho_3 (buffer, max_sc, smr, &frame, &glopts);break;case 4:/* Modified Psycho Model 2 */for (ch = 0; ch < nch; ch++) {psycho_4 (&buffer[ch][0], &sam[ch][0], ch, &smr[ch][0], // snr32,(FLOAT) s_freq[header.version][header.sampling_frequency] *1000, &glopts);}break; case 5:/* Model 5 comparse model 1 and 3 */psycho_1 (buffer, max_sc, smr, &frame);fprintf(stdout,"1 ");smr_dump(smr,nch);psycho_3 (buffer, max_sc, smr, &frame, &glopts);fprintf(stdout,"3 ");smr_dump(smr,nch);break;case 6:/* Model 6 compares model 2 and 4 */for (ch = 0; ch < nch; ch++) psycho_2 (&buffer[ch][0], &sam[ch][0], ch, &smr[ch][0], //snr32,(FLOAT) s_freq[header.version][header.sampling_frequency] *1000, &glopts);fprintf(stdout,"2 ");smr_dump(smr,nch);for (ch = 0; ch < nch; ch++) psycho_4 (&buffer[ch][0], &sam[ch][0], ch, &smr[ch][0], // snr32,(FLOAT) s_freq[header.version][header.sampling_frequency] *1000, &glopts);fprintf(stdout,"4 ");smr_dump(smr,nch);break;case 7:fprintf(stdout,"Frame: %i\n",frameNum);/* Dump the SMRs for all models */ psycho_1 (buffer, max_sc, smr, &frame);fprintf(stdout,"1");smr_dump(smr, nch);psycho_3 (buffer, max_sc, smr, &frame, &glopts);fprintf(stdout,"3");smr_dump(smr,nch);for (ch = 0; ch < nch; ch++) psycho_2 (&buffer[ch][0], &sam[ch][0], ch, &smr[ch][0], //snr32,(FLOAT) s_freq[header.version][header.sampling_frequency] *1000, &glopts);fprintf(stdout,"2");smr_dump(smr,nch);for (ch = 0; ch < nch; ch++) psycho_4 (&buffer[ch][0], &sam[ch][0], ch, &smr[ch][0], // snr32,(FLOAT) s_freq[header.version][header.sampling_frequency] *1000, &glopts);fprintf(stdout,"4");smr_dump(smr,nch);break;case 8:/* Compare 0 and 4 */ psycho_n1 (smr, nch);fprintf(stdout,"0");smr_dump(smr,nch);for (ch = 0; ch < nch; ch++) psycho_4 (&buffer[ch][0], &sam[ch][0], ch, &smr[ch][0], // snr32,(FLOAT) s_freq[header.version][header.sampling_frequency] *1000, &glopts);fprintf(stdout,"4");smr_dump(smr,nch);break;default:fprintf (stderr, "Invalid psy model specification: %i\n", model);exit (0);}if (glopts.quickmode == TRUE)/* copy the smr values and reuse them later */for (ch = 0; ch < nch; ch++) {for (sb = 0; sb < SBLIMIT; sb++)smrdef[ch][sb] = smr[ch][sb];}if (glopts.verbosity > 4) smr_dump(smr, nch);}#ifdef NEWENCODEsf_transmission_pattern (scalar, scfsi, &frame);main_bit_allocation_new (smr, scfsi, bit_alloc, &adb, &frame, &glopts);//main_bit_allocation (smr, scfsi, bit_alloc, &adb, &frame, &glopts);if (error_protection)CRC_calc (&frame, bit_alloc, scfsi, &crc);write_header (&frame, &bs);//encode_info (&frame, &bs);if (error_protection)putbits (&bs, crc, 16);write_bit_alloc (bit_alloc, &frame, &bs);//encode_bit_alloc (bit_alloc, &frame, &bs);write_scalefactors(bit_alloc, scfsi, scalar, &frame, &bs);//encode_scale (bit_alloc, scfsi, scalar, &frame, &bs);subband_quantization_new (scalar, *sb_sample, j_scale, *j_sample, bit_alloc,*subband, &frame);//subband_quantization (scalar, *sb_sample, j_scale, *j_sample, bit_alloc,// *subband, &frame);write_samples_new(*subband, bit_alloc, &frame, &bs);//sample_encoding (*subband, bit_alloc, &frame, &bs); #elsetransmission_pattern (scalar, scfsi, &frame);main_bit_allocation (smr, scfsi, bit_alloc, &adb, &frame, &glopts);if (error_protection)CRC_calc (&frame, bit_alloc, scfsi, &crc);encode_info (&frame, &bs);if (error_protection)encode_CRC (crc, &bs);encode_bit_alloc (bit_alloc, &frame, &bs);encode_scale (bit_alloc, scfsi, scalar, &frame, &bs);subband_quantization (scalar, *sb_sample, j_scale, *j_sample, bit_alloc,*subband, &frame);sample_encoding (*subband, bit_alloc, &frame, &bs); #endif/* If not all the bits were used, write out a stack of zeros */for (i = 0; i < adb; i++)put1bit (&bs, 0);if (header.dab_extension) {/* Reserve some bytes for X-PAD in DAB mode */putbits (&bs, 0, header.dab_length * 8);for (i = header.dab_extension - 1; i >= 0; i--) {CRC_calcDAB (&frame, bit_alloc, scfsi, scalar, &crc, i);/* this crc is for the previous frame in DAB mode */if (bs.buf_byte_idx + lg_frame < bs.buf_size)bs.buf[bs.buf_byte_idx + lg_frame] = crc;/* reserved 2 bytes for F-PAD in DAB mode */putbits (&bs, crc, 8);}putbits (&bs, 0, 16);}frameBits = sstell (&bs) - sentBits;if (frameBits % 8) { /* a program failure */fprintf (stderr, "Sent %ld bits = %ld slots plus %ld\n", frameBits,frameBits / 8, frameBits % 8);fprintf (stderr, "If you are reading this, the program is broken\n");fprintf (stderr, "email [mfc at NOTplanckenerg.com] without the NOT\n");fprintf (stderr, "with the command line arguments and other info\n");exit (0);}sentBits += frameBits;}close_bit_stream_w (&bs);if ((glopts.verbosity > 1) && (glopts.vbr == TRUE)) {int i; #ifdef NEWENCODEextern int vbrstats_new[15]; #elseextern int vbrstats[15]; #endiffprintf (stdout, "VBR stats:\n");for (i = 1; i < 15; i++)fprintf (stdout, "%4i ", bitrate[header.version][i]);fprintf (stdout, "\n");for (i = 1; i < 15; i++) #ifdef NEWENCODEfprintf (stdout,"%4i ",vbrstats_new[i]); #elsefprintf (stdout, "%4i ", vbrstats[i]); #endiffprintf (stdout, "\n");}fprintf (stderr,"Avg slots/frame = %.3f; b/smp = %.2f; bitrate = %.3f kbps\n",(FLOAT) sentBits / (frameNum * 8),(FLOAT) sentBits / (frameNum * 1152),(FLOAT) sentBits / (frameNum * 1152) *s_freq[header.version][header.sampling_frequency]);if (fclose (musicin) != 0) {fprintf (stderr, "Could not close \"%s\".\n", original_file_name);exit (2);}fprintf (stderr, "\nDone\n");time(&end_time);total_time = end_time - start_time;printf("total time is %d\n", total_time);exit (0); }二、感知音頻編碼的設計思想
基本思想:分析信號,去掉不能被感知的部分。
聽覺閾值:
聽覺掩蔽特性:
MPEG-1 Audio LayerII編碼器:
該編碼器的兩條線: 1.碼流經過濾波器組變為32個子帶的頻域信號,進行子帶編碼。
2.對碼流做1024點fft變換,根據心理聲學模型來分配比特數,進行編碼。
時-頻分析的矛盾: 時域取值間隔越短,頻域帶寬越寬,更難分析。
三、心理聲學模型的實現過程
臨界頻帶:
掩蔽值計算的思路:
四、碼率分配的實現思路
對每個子帶計算噪掩比NMR=SMR-SNR(dB)
對最高NMR的子帶進行比特分配,使獲益
最大的子帶的量化級別增加一級,然后重新計算該子帶的NMR,此時分配了更多比特的子帶的信噪比(SNR)會提升,所以其NMR會下降。不斷循環,直到沒有比特可分配或者所有NMR都減到0。
五、輸出音頻的采樣率和目標碼率
查閱得原代碼中帶有輸出采樣頻率和目標碼率到屏幕的功能:
fprintf (stderr, "Input File : '%s' %.1f kHz\n",(strcmp (inPath, "-") ? inPath : "stdin"),s_freq[header->version][header->sampling_frequency]); //輸出采樣頻率fprintf (stderr, "Output File: '%s'\n",(strcmp (outPath, "-") ? outPath : "stdout"));fprintf (stderr, "%d kbps ", bitrate[header->version][header->bitrate_index]);//輸出目標碼率六、輸出某一數據幀的信息
為輸出該幀所分配的比特數,該幀的比例因子,該幀的比特分配結果,在主函數的while循環中添加如下代碼:
FILE* frame_info_file; frame_info_file = fopen("C:\\Users\\tonym\\Desktop\\study\\shujuyasuo\\mpeg\\實驗6_MPG音頻編碼\\m2aenc\\frame_info_file.txt", "w"); if (frameNum == 50)//此處設定想第幾幀 {fprintf(frame_info_file, "采樣率:%f khz\n", s_freq[frame.header->version][frame.header->sampling_frequency]);fprintf(frame_info_file, "目標碼率:%d kbps\n", bitrate[frame.header->version][frame.header->bitrate_index]);fprintf(frame_info_file, "第%d幀\n", frameNum);fprintf(frame_info_file, "所分配比特數:%d\n", adb);fprintf(frame_info_file, "比例因子:\n");for (ch = 0; ch < nch; ch++){fprintf(frame_info_file, "聲道%d:\n", ch);for (sb = 0; sb < frame.sblimit; sb++){fprintf(frame_info_file, " 子帶%2d:", sb);for (int gr = 0; gr < 3; gr++){fprintf(frame_info_file, "%4d", scalar[ch][gr][sb]);}fprintf(frame_info_file, "\n");}}fprintf(frame_info_file, "\n");fprintf(frame_info_file, "比特分配表:\n");for (ch = 0; ch < nch; ch++){fprintf(frame_info_file, "聲道%d:\n", ch);for (sb = 0; sb < frame.sblimit; sb++){fprintf(frame_info_file, " 子帶%2d:\t%d\n", sb, bit_alloc[ch][sb]);}fprintf(frame_info_file, "\n");} }輸出樣例:
輸入文件為音樂(.wav)時的輸出文件:
采樣率:44.100000 khz
目標碼率:192 kbps
第50幀
所分配比特數:5008
比例因子:
聲道0:
子帶 0: 10 11 11
子帶 1: 18 19 23
子帶 2: 17 17 21
子帶 3: 22 25 27
子帶 4: 30 31 33
子帶 5: 24 25 29
子帶 6: 22 27 30
子帶 7: 19 23 26
子帶 8: 42 43 45
子帶 9: 29 31 35
子帶10: 29 30 31
子帶11: 29 29 29
子帶12: 21 24 28
子帶13: 24 23 27
子帶14: 24 26 28
子帶15: 23 23 31
子帶16: 30 32 33
子帶17: 28 29 32
子帶18: 26 26 30
子帶19: 29 31 35
子帶20: 31 32 35
子帶21: 29 31 36
子帶22: 40 42 44
子帶23: 52 54 49
子帶24: 52 55 53
子帶25: 52 53 55
子帶26: 53 52 51
子帶27: 56 52 55
子帶28: 53 55 53
子帶29: 54 55 51
比特分配表:
聲道0:
子帶 0: 9
子帶 1: 8
子帶 2: 8
子帶 3: 8
子帶 4: 6
子帶 5: 7
子帶 6: 7
子帶 7: 7
子帶 8: 3
子帶 9: 6
子帶10: 6
子帶11: 6
子帶12: 7
子帶13: 6
子帶14: 6
子帶15: 6
子帶16: 5
子帶17: 5
子帶18: 6
子帶19: 4
子帶20: 4
子帶21: 4
子帶22: 0
子帶23: 0
子帶24: 0
子帶25: 0
子帶26: 0
子帶27: 0
子帶28: 0
子帶29: 0
輸入文件為噪聲(.wav)時的輸出文件:
采樣率:48.000000 khz
目標碼率:192 kbps
第50幀
所分配比特數:4608
比例因子:
聲道0:
子帶 0: 24 23 24
子帶 1: 26 26 27
子帶 2: 28 25 27
子帶 3: 28 28 26
子帶 4: 25 24 26
子帶 5: 25 26 25
子帶 6: 29 27 28
子帶 7: 32 31 31
子帶 8: 31 31 33
子帶 9: 32 32 33
子帶10: 33 32 35
子帶11: 37 35 38
子帶12: 35 34 36
子帶13: 35 34 34
子帶14: 35 36 37
子帶15: 39 36 37
子帶16: 37 37 40
子帶17: 40 40 40
子帶18: 43 43 43
子帶19: 45 44 43
子帶20: 46 45 45
子帶21: 50 48 47
子帶22: 57 59 59
子帶23: 60 57 60
子帶24: 56 57 58
子帶25: 58 57 57
子帶26: 57 57 58
比特分配表:
聲道0:
子帶 0: 8
子帶 1: 7
子帶 2: 7
子帶 3: 9
子帶 4: 9
子帶 5: 8
子帶 6: 8
子帶 7: 8
子帶 8: 8
子帶 9: 7
子帶10: 7
子帶11: 6
子帶12: 6
子帶13: 6
子帶14: 6
子帶15: 6
子帶16: 6
子帶17: 5
子帶18: 5
子帶19: 0
子帶20: 0
子帶21: 0
子帶22: 0
子帶23: 0
子帶24: 0
子帶25: 0
子帶26: 0
輸入為音樂與噪聲混合(.wav)時的輸出文件:
采樣率:44.100000 khz
目標碼率:192 kbps
第50幀
所分配比特數:5008
比例因子:
聲道0:
子帶 0: 12 11 11
子帶 1: 15 16 16
子帶 2: 17 15 17
子帶 3: 20 22 21
子帶 4: 19 20 18
子帶 5: 18 18 19
子帶 6: 19 20 20
子帶 7: 20 21 21
子帶 8: 25 26 27
子帶 9: 25 26 25
子帶10: 28 26 28
子帶11: 26 28 29
子帶12: 27 25 24
子帶13: 24 26 21
子帶14: 25 25 23
子帶15: 26 24 24
子帶16: 29 28 28
子帶17: 30 29 27
子帶18: 29 28 24
子帶19: 29 32 32
子帶20: 30 32 31
子帶21: 29 30 32
子帶22: 38 38 41
子帶23: 58 55 57
子帶24: 57 56 58
子帶25: 59 60 58
子帶26: 58 58 59
子帶27: 57 58 57
子帶28: 57 58 56
子帶29: 58 54 59
聲道1:
子帶 0: 12 11 11
子帶 1: 15 16 16
子帶 2: 17 16 16
子帶 3: 19 22 21
子帶 4: 20 19 19
子帶 5: 17 18 19
子帶 6: 18 21 19
子帶 7: 20 21 23
子帶 8: 24 27 27
子帶 9: 26 27 26
子帶10: 27 27 28
子帶11: 26 28 31
子帶12: 27 27 24
子帶13: 25 27 22
子帶14: 24 24 22
子帶15: 26 24 23
子帶16: 29 28 29
子帶17: 29 30 29
子帶18: 27 28 23
子帶19: 30 31 32
子帶20: 30 31 32
子帶21: 29 30 32
子帶22: 37 39 43
子帶23: 57 58 58
子帶24: 59 57 57
子帶25: 58 58 59
子帶26: 59 58 58
子帶27: 56 58 58
子帶28: 57 58 56
子帶29: 59 54 58
比特分配表:
聲道0:
子帶 0: 5
子帶 1: 4
子帶 2: 3
子帶 3: 4
子帶 4: 4
子帶 5: 4
子帶 6: 4
子帶 7: 3
子帶 8: 1
子帶 9: 3
子帶10: 2
子帶11: 3
子帶12: 3
子帶13: 3
子帶14: 2
子帶15: 1
子帶16: 1
子帶17: 2
子帶18: 3
子帶19: 1
子帶20: 1
子帶21: 0
子帶22: 0
子帶23: 0
子帶24: 0
子帶25: 0
子帶26: 0
子帶27: 0
子帶28: 0
子帶29: 0
聲道1:
子帶 0: 5
子帶 1: 4
子帶 2: 3
子帶 3: 4
子帶 4: 5
子帶 5: 3
子帶 6: 4
子帶 7: 3
子帶 8: 1
子帶 9: 3
子帶10: 3
子帶11: 2
子帶12: 3
子帶13: 2
子帶14: 2
子帶15: 1
子帶16: 1
子帶17: 1
子帶18: 3
子帶19: 1
子帶20: 1
子帶21: 0
子帶22: 0
子帶23: 0
子帶24: 0
子帶25: 0
子帶26: 0
子帶27: 0
子帶28: 0
子帶29: 0
總結
以上是生活随笔為你收集整理的MPEG原理分析及MPEG音频编码器的调试的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: JPEG原理分析及JPEG解码器的调试
- 下一篇: H.264文件分析