FFmpeg音频编解码处理
新版的ffmpeg對音頻編碼處理已經有了很大的變化,記錄在此,做個備忘。
早期ffmpeg編碼音頻,輸入數據一般都是S16格式,解碼輸出一般也是S16,也就是說PCM數據是存儲在連續的buffer中,對一個雙聲道(左右)音頻來說,存儲格式可能就為
LRLRLR.........(左聲道在前還是右聲道在前沒有認真研究過)。所以以往編碼部分的代碼基本形如:
????int?sample_bytes?=?av_get_bytes_per_sample(pCodecCtx->sample_fmt);
????int?frame_bytes?=?pCodecCtx->frame_size?*?sample_bytes?*?pCodecCtx->channels;
???//?AVFifoBuffer*?fifo;????存放pcm數據
????while(av_fifo_size(fifo)?>=?frame_bytes)?{
????????av_fifo_generic_read(fifo,?inputBuf,?frame_bytes,?NULL);
????????AVPacket?pkt?=?{0};
????????av_init_packet(&pkt);
????????pkt.data?=?encodeBuf;
????????pkt.size?=?AVCODEC_MAX_AUDIO_FRAME_SIZE;
????????int?got_packet?=?0;
????????audioframe->nb_samples?=?pCodecCtx->frame_size;
????????int?samples_size?=?av_samples_get_buffer_size(NULL,?pCodecCtx->channels,
??????????????????????????????????????????????????audioframe->nb_samples,
??????????????????????????????????????????????????pCodecCtx->sample_fmt,?0);
????????avcodec_fill_audio_frame(audioframe,?pCodecCtx->channels,?pCodecCtx->sample_fmt,
????????????????inputBuf,?samples_size,?0);
????????audioframe->pts?=?audio_sync_opts;
????????audio_sync_opts?=?audioframe->pts?+?audioframe->nb_samples;
????????avcodec_encode_audio2(pCodecCtx,?&pkt,?audioframe,?&got_packet);
????????if?(got_packet?)?{
????????????//處理pkt,封裝存儲、流輸出或交由上層應用
????????}
????}
項目中需要對音視頻流進行轉碼輸出,音頻處理部分一般是先解碼(得到PCM?S16數據),再交由編碼(MP3、AAC)
ffmpeg升級到2.1后(具體哪個版本開始的沒去查,可能早幾個版本就已經這樣做了),音頻格式增加了plane概念(呃,不是灰機,是平面)
enum?AVSampleFormat?{
????AV_SAMPLE_FMT_NONE?=?-1,
????AV_SAMPLE_FMT_U8,??????????///<?unsigned?8?bits
????AV_SAMPLE_FMT_S16,?????????///<?signed?16?bits
????AV_SAMPLE_FMT_S32,?????????///<?signed?32?bits
????AV_SAMPLE_FMT_FLT,?????????///<?float
????AV_SAMPLE_FMT_DBL,?????????///<?double
//?以下都是帶平面格式
????AV_SAMPLE_FMT_U8P,?????????///<?unsigned?8?bits,?planar
????AV_SAMPLE_FMT_S16P,????????///<?signed?16?bits,?planar
????AV_SAMPLE_FMT_S32P,????????///<?signed?32?bits,?planar
????AV_SAMPLE_FMT_FLTP,????????///<?float,?planar
????AV_SAMPLE_FMT_DBLP,????????///<?double,?planar
????AV_SAMPLE_FMT_NB???????????///<?Number?of?sample?formats.?DO?NOT?USE?if?linking?dynamically
};
這就有點像視頻部分的YUV數據,有的帶P,有的是不帶P的,同樣對雙聲道音頻PCM數據,以S16P為例,存儲就可能是
plane?0:?LLLLLLLLLLLLLLLLLLLLLLLLLL...
plane?1:?RRRRRRRRRRRRRRRRRRRRRRRRRR...
而不再是以前的連續buffer。
如mp3編碼就明確規定了只使用平面格式的數據
AVCodec?ff_libmp3lame_encoder?=?{
.....
????.capabilities??????????=?CODEC_CAP_DELAY?|?CODEC_CAP_SMALL_LAST_FRAME,
????.sample_fmts???????????=?(const?enum?AVSampleFormat[])?{?AV_SAMPLE_FMT_S32P,
?????????????????????????????????????????????????????????????AV_SAMPLE_FMT_FLTP,
?????????????????????????????????????????????????????????????AV_SAMPLE_FMT_S16P,
?????????????????????????????????????????????????????????????AV_SAMPLE_FMT_NONE?},
....
};
而AAC編碼依舊使用?AV_SAMPLE_FMT_S16格式
也就說,音頻編碼不能再像以前那樣簡單的處理,統一輸入S16數據,而要根據具體的codec轉化為其支持的格式,否則無論是編碼還是解碼輸出的聲音會莫名其妙,幸好,轉換工作不用自己做,ffmpeg提供了相應的API,swr_convert(類似以前的audio_resample,只是audio_resample目前已不再推薦使用,因為swr_convert更強大)
基于此,對音頻編碼部分做了相應修改,主要用的數據結構為?struct?SwrContext*?m_SwrCtx;
step?1:判斷是否需要進行convert,初始化階段
if?(pCodecCtx->channels?!=?pInputCtx->channels
||?pCodecCtx->sample_rate?!=?pInputCtx->sample_rate
||?pCodecCtx->sample_fmt?!=?pInputCtx->sample_fmt)
{
u::Log::write(get_log_file(),?"Audio?need?resample!");
if?(?NULL?==?m_SwrCtx?)?{
m_SwrCtx?=?swr_alloc();
}
#if?LIBSWRESAMPLE_VERSION_MINOR?>=?17 //?根據版本不同,選用適當函數
av_opt_set_int(m_SwrCtx,?"ich",?pInputCtx->channels,?0);
av_opt_set_int(m_SwrCtx,?"och",?pCodecCtx->channels,?0);
av_opt_set_int(m_SwrCtx,?"in_sample_rate",??pInputCtx->sample_rate,?0);
av_opt_set_int(m_SwrCtx,?"out_sample_rate",??pCodecCtx->sample_rate,?0);
av_opt_set_sample_fmt(m_SwrCtx,?"in_sample_fmt",?pInputCtx->sample_fmt,?0);
av_opt_set_sample_fmt(m_SwrCtx,?"out_sample_fmt",?pCodecCtx->sample_fmt,?0);
#else
m_SwrCtx?=?swr_alloc_set_opts(m_SwrCtx,
pInputCtx->channel_layout,?AV_SAMPLE_FMT_S16,?pInputCtx->sample_rate,
pInputCtx->channel_layout,?pInputCtx->sample_fmt,?pInputCtx->sample_rate,
0,?NULL);
#endif
swr_init(m_SwrCtx);
if?(av_sample_fmt_is_planar(pCodecCtx->sample_fmt))?{
//如果是分平面數據,為每一聲道分配一個fifo,單獨存儲各平面數據
for?(int?i?=?0;?i?<?pCodecCtx->channels;?i++){
m_fifo[i]?=?av_fifo_alloc(BUF_SIZE_20K);
}
}?else?{
//不分平面,所有的數據只要一個fifo就夠了,其實用不用fifo完全看個人了,只是我覺得方便些
fifo?=?av_fifo_alloc(BUF_SIZE_20K);
}
}
step?2:進行轉換
//以下代碼部分抄自ffmpeg自帶的例子
if?(m_SwrCtx?!=?NULL)?{
if?(?!m_audioOut?)?{
ret?=?av_samples_alloc_array_and_samples(&m_audioOut,
&dst_samples_linesize,?pCodecCtx->channels,?max_dst_nb_samples,?pCodecCtx->sample_fmt,?0);
if?(ret?<?0){
av_log(NULL,?AV_LOG_WARNING,?"[%s.%d?%s()?Could?not?allocate?destination?samples\n",?__FILE__,?__LINE__,?__FUNCTION__);
return?-1;
}
}
dst_nb_samples?=?av_rescale_rnd(swr_get_delay(m_SwrCtx,?pCodecCtx->sample_rate)?+?src_nb_samples,
pCodecCtx->sample_rate,?pCodecCtx->sample_rate,?AV_ROUND_UP);
if?(dst_nb_samples?>?max_dst_nb_samples)?{
av_free(m_audioOut[0]);
ret?=?av_samples_alloc(m_audioOut,?&dst_samples_linesize,?pCodecCtx->channels,?dst_nb_samples,?pCodecCtx->sample_fmt,?0);
if?(ret?<?0){
av_log(NULL,?AV_LOG_WARNING,?"[%s.%d?%s()?Could?not?allocate?samples?Buffer\n",?__FILE__,?__LINE__,?__FUNCTION__);
return?-1;
}
max_dst_nb_samples?=?dst_nb_samples;
}
//輸入也可能是分平面的,所以要做如下處理
uint8_t*?m_ain[SWR_CH_MAX];
setup_array(m_ain,?(uint8_t*)input_buf,?data->ctx.sample_fmt,?src_nb_samples);
len?=?swr_convert(m_SwrCtx,?m_audioOut,?dst_nb_samples,?(const?uint8_t**)m_ain,?src_nb_samples);
if?(len?<?0)?{
char?errmsg[BUF_SIZE_1K];
av_strerror(len,?errmsg,?sizeof(errmsg));
av_log(NULL,?AV_LOG_WARNING,?"[%s:%d]?swr_convert!(%d)(%s)",?__FILE__,?__LINE__,?len,?errmsg);
return?-1;
}
paudiobuf?=?m_audioOut[0];
decode_size?=?len?*?pCodecCtx->channels?*?av_get_bytes_per_sample(pCodecCtx->sample_fmt);
}?else?{
paudiobuf?=?(uint8_t*)input_buf;
decode_size?=?input_size;
}
//存儲PCM數據,注意:m_SwrCtx即使進行了轉換,也要判斷轉換后的數據是否分平面
if?(m_SwrCtx?&&?av_sample_fmt_is_planar(pCodecCtx->sample_fmt)?)?{
for?(int?i?=?0;?i?<?pCodecCtx->channels;?i++){
if?(av_fifo_realloc2(m_fifo[i],?av_fifo_size(m_fifo[i])?+?len*av_get_bytes_per_sample(pCodecCtx->sample_fmt))?<?0){
av_log(NULL,?AV_LOG_FATAL,?"av_fifo_realloc2()?failed\n");
return?-1;
}
av_fifo_generic_write(m_fifo[i],?m_audioOut[0]+i*dst_samples_linesize,?len*av_get_bytes_per_sample(pCodecCtx->sample_fmt),?NULL);
}
}?else?{
if?(av_fifo_realloc2(fifo,?av_fifo_size(fifo)?+?decode_size)?<?0)?{
av_log(NULL,?AV_LOG_FATAL,?"av_fifo_realloc2()?failed\n");
return?-1;
}
av_fifo_generic_write(fifo,?paudiobuf,?decode_size,?NULL);
}
setup_array函數摘自ffmpeg例程
static?void?setup_array(uint8_t*?out[SWR_CH_MAX],?uint8_t*?in,?int?format,?int?samples){
if?(av_sample_fmt_is_planar((AVSampleFormat)format))?{
int?i;
int?plane_size?=?av_get_bytes_per_sample((AVSampleFormat)(format?&?0xFF))?*?samples;
format?&=?0xFF;
for?(i?=?0;?i?<?SWR_CH_MAX;?i++)?{
out[i]?=?in?+?i*plane_size;
}
}?else?{
out[0]?=?in;
}
}
step?3:進行編碼
//編碼格式要求是分平面數據
if?(m_SwrCtx?&&?(?av_sample_fmt_is_planar(pCodecCtx->sample_fmt)?))?{
??//這里為簡單示例,只判斷第一個聲道(因為左右聲道數據大小是一致的),實際應用中應考慮每個聲道具體情況
while(av_fifo_size(m_fifo[0])?>=?pCodecCtx->frame_size?*?sample_bytes){
for?(int?i?=?0;?i?<?pCodecCtx->channels;?i++)?{
??//inputBuf是一塊連續內存
av_fifo_generic_read(m_fifo[i],?inputBuf+i*pCodecCtx->frame_size?*?sample_bytes,?pCodecCtx->frame_size?*?sample_bytes,?NULL);
}
AVPacket?pkt?=?{0};
av_init_packet(&pkt);
pkt.data?=?encodeBuf;
pkt.size?=?AVCODEC_MAX_AUDIO_FRAME_SIZE;
int?got_packet?=?0;
audioframe->nb_samples?=?pCodecCtx->frame_size;
int?samples_size?=?av_samples_get_buffer_size(NULL,?pCodecCtx->channels,
??????????????????????????????????????????????????audioframe->nb_samples,
??????????????????????????????????????????????????pCodecCtx->sample_fmt,?0);
avcodec_fill_audio_frame(audioframe,?pCodecCtx->channels,?pCodecCtx->sample_fmt,
inputBuf,?samples_size,?0);
int?ret?=?avcodec_encode_audio2(pCodecCtx,?&pkt,?audioframe,?&got_packet);
if?(got_packet?)?{
//處理pkt
}
}
}?else?{
//不分平面
while(av_fifo_size(fifo)?>=?frame_bytes)?{
av_fifo_generic_read(fifo,?inputBuf,?frame_bytes,?NULL);
AVPacket?pkt?=?{0};
av_init_packet(&pkt);
pkt.data?=?encodeBuf;
pkt.size?=?AVCODEC_MAX_AUDIO_FRAME_SIZE;
int?got_packet?=?0;
audioframe->nb_samples?=?pCodecCtx->frame_size;
int?samples_size?=?av_samples_get_buffer_size(NULL,?pCodecCtx->channels,
??????????????????????????????????????????????????audioframe->nb_samples,
??????????????????????????????????????????????????pCodecCtx->sample_fmt,?0);
avcodec_fill_audio_frame(audioframe,?pCodecCtx->channels,?pCodecCtx->sample_fmt,
inputBuf,?samples_size,?0);
int?ret?=?avcodec_encode_audio2(pCodecCtx,?&pkt,?audioframe,?&got_packet);
if?(got_packet?)?{
//處理pkt
}
}
}
另:
對于解碼也可能需要做swr_convert,比如做播放器,很多時候我們是將S16格式數據丟給聲卡,而新版ffmpeg解碼音頻輸出的格式可能不滿足S16,如AAC解碼后得到的是FLT(浮點型),AC3解碼是FLTP(帶平面)等,需要根據具體的情況決定是否需要convert,轉換過程與上類似。
總結
以上是生活随笔為你收集整理的FFmpeg音频编解码处理的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: vi格式化代码,撤销,重做,回退操作
- 下一篇: win10+vs2017+caffe(c