小程序 网站建设 app 开发,中金超钒 网站建设,手机wap网站源码,常宁做网站目录 流程概述用到的APItipsdemo样例附录 - SwrContext结构体字段 流程概述
音频重采样的基本流程为#xff1a; 申请重采样器上下文设置重采样去上下文的参数初始化重采样器申请数据存放的缓冲区空间进行重采样 注意#xff0c;要先设置参数再对重采样器初始化
用到的API… 目录 流程概述用到的APItipsdemo样例附录 - SwrContext结构体字段 流程概述
音频重采样的基本流程为 申请重采样器上下文设置重采样去上下文的参数初始化重采样器申请数据存放的缓冲区空间进行重采样 注意要先设置参数再对重采样器初始化
用到的API SwrContext重采样器上下文的结构体。此结构是不透明的这意味着如果要设置选项诸如av_opt_set等函数来设置。 struct SwrContext *swr_alloc();申请重采样器上下文。 int av_opt_set(void *obj, const char *name, const char *val, int search_flags); int av_opt_set_int(void *obj, const char *name, int64_t val, int search_flags); int av_opt_set_chlayout(void *obj, const char *name, const AVChannelLayout *layout, int search_flags); av_opt_set* 函数簇这里仅列举几个。以av_opt_set为例用于将给定name的obj字段设置为指定的val。第一个void* 的obj参数表示要设置的对象第二个name参数表示要设置的字段名称以字符串形式传入。例如obj为SwrContext* 对象name为in_sample_rate就对应着SwrContext中的同名字段。中间的部分就为要设置的参数最后的search_flags表示搜索搜索标志一般设为0即可。 int swr_alloc_set_opts2(struct SwrContext **ps, const AVChannelLayout *out_ch_layout, enum AVSampleFormat out_sample_fmt, int out_sample_rate, const AVChannelLayout *in_ch_layout, enum AVSampleFormat in_sample_fmt, int in_sample_rate, int log_offset, void *log_ctx);如果还未分配则分配SwrContext并设置/重置公共参数。就相当于alloc set。 int swr_init(struct SwrContext *s);重采样去初始化。必须在设置过SwrContext 参数之后初始化。 int64_t av_rescale_rnd(int64_t a, int64_t b, int64_t c, enum AVRounding rnd)和int64_t av_rescale(int64_t a, int64_t b, int64_t c)都是用于计算的(a*b/c)唯一的区别在于rnd可以设置向上取整向下取整等。 int av_samples_alloc_array_and_samples(uint8_t ***audio_data, int *linesize, int nb_channels, int nb_samples, enum AVSampleFormat sample_fmt, int align); 申请一个 data[nb_channels][ch_data] 的二维数组所以audio_data要作为一个三级指针传进去。 void av_freep(void *ptr);释放av_samples_alloc_array_and_samples申请的data。av_freep即使传入null也是安全的。用法示例 uint8_t *buf av_malloc(16);
av_freep(buf);int64_t swr_get_delay(struct SwrContext *s, int64_t base);获取下一个输入样本相对于下一个输出样本所经历的延迟帧数。 int swr_convert(struct SwrContext *s, uint8_t * const *out, int out_count, const uint8_t * const *in , int in_count); 音频重采样in和out是由av_samples_alloc_array_and_samples生成的data缓冲区。in_count和out_count则是对应的缓冲区大小的样本数。 int av_samples_get_buffer_size(int *linesize, int nb_channels, int nb_samples, enum AVSampleFormat sample_fmt, int align); 获取给定音频参数所需的缓冲区大小。
tips
swr是software resample的缩写nb_samples样本数表示每帧的每个通道中的采样点数。重采样的三个关键参数采样率、采样格式、声道布局。音频的planner格式的数据是分在多个数组中的例如左右声道的data[0]中存放L声道的数据data[1]中存放R声道的数据。而交错模式的数据则是按照LRLR…的顺序统一放到data[0]中的。av_freep要取地址的原因是因为要将指针置空仅此而已。老版本的FFmpeg例如在ffmpeg-4.2下音频声道数只是一个单一的int型字段。而新版本的FFmpeg以ffmpeg-7.0为例则是将音频数据封装为一个AVChannelLayout结构体了。所以在设置 ‘layout’ 字段时不能再用av_opt_set_int接口而是要用av_opt_set_chlayoutname参数也要使用in_chlayout才行。 demo样例
重采样样例参考Examples - resample_audio.c
#include iostream
#include fstream
#include string
#include cmath
using namespace std;extern C
{
#include libavutil/opt.h
#include libavutil/channel_layout.h
#include libavutil/samplefmt.h
#include libswresample/swresample.h
}/* format转字符串 */
string string_sample_fmt(enum AVSampleFormat sample_fmt)
{// 定义sample_fmt_entry结构体同时定义了一个数组struct sample_fmt_entry{enum AVSampleFormat sample_fmt; const char *fmt_be, *fmt_le;} sample_fmt_entries[] {{ AV_SAMPLE_FMT_U8, u8, u8 },{ AV_SAMPLE_FMT_S16, s16be, s16le },{ AV_SAMPLE_FMT_S32, s32be, s32le },{ AV_SAMPLE_FMT_FLT, f32be, f32le },{ AV_SAMPLE_FMT_DBL, f64be, f64le },};// 返回字符串const char* str_fmt nullptr;int arr_len FF_ARRAY_ELEMS(sample_fmt_entries);for (int i 0; i arr_len; i){auto entry sample_fmt_entries[i];if (sample_fmt entry.sample_fmt){return AV_NE(entry.fmt_be, entry.fmt_le);}}
}/*** Fill dst buffer with nb_samples, generated starting from t.* 交错模式函数摘自https://ffmpeg.org/doxygen/7.0/resample_audio_8c-example.html* sin曲线t表示当前所在的相位周期为一帧所持续的时间*/
void fill_samples(double *dst, int nb_samples, int nb_channels, int sample_rate, double *t)
{int i, j;double tincr 1.0 / sample_rate, *dstp dst;const double c 2 * M_PI * 440.0;/* generate sin tone with 440Hz frequency and duplicated channels */for (i 0; i nb_samples; i) {*dstp sin(c * *t);for (j 1; j nb_channels; j)dstp[j] dstp[0];dstp nb_channels;*t tincr;}
}int main()
{/* 采样参数定义 */// 输入参数int src_sample_rate 48000;enum AVSampleFormat src_sample_fmt AV_SAMPLE_FMT_DBL;AVChannelLayout src_ch_layout AV_CHANNEL_LAYOUT_STEREO; // 立体声// 输出参数int dst_sample_rate 44100;enum AVSampleFormat dst_sample_fmt AV_SAMPLE_FMT_S16;AVChannelLayout dst_ch_layout AV_CHANNEL_LAYOUT_STEREO; // 立体声// 创建重采样器上下文暂且认为不会失败SwrContext *swr_ctx swr_alloc();/* 参数设置SwrContext字段设置 */// 输入参数check_optset(av_opt_set_int(swr_ctx, in_sample_rate, src_sample_rate, 0), __LINE__);check_optset(av_opt_set_sample_fmt(swr_ctx, in_sample_fmt, src_sample_fmt, 0), __LINE__);check_optset(av_opt_set_chlayout(swr_ctx, in_chlayout, src_ch_layout, 0), __LINE__);// 输出参数check_optset(av_opt_set_int(swr_ctx, out_sample_rate, dst_sample_rate, 0), __LINE__);check_optset(av_opt_set_sample_fmt(swr_ctx, out_sample_fmt, dst_sample_fmt, 0), __LINE__);check_optset(av_opt_set_chlayout(swr_ctx, out_chlayout, dst_ch_layout, 0), __LINE__);// 参数设置完成后初始化上下文swr_init(swr_ctx);// 给输入源分配内存空间uint8_t **src_data nullptr;int src_linesize;int src_nb_samples 1024; // 每个通道的样本数av_samples_alloc_array_and_samples(src_data, src_linesize, src_ch_layout.nb_channels,src_nb_samples, src_sample_fmt, 0);// 给输出源分配内存空间uint8_t **dst_data;int dst_linesize;// 计算输出的信道样本数a * b / cAV_ROUND_UP表示向上取整int dst_nb_samples av_rescale_rnd(src_nb_samples, dst_sample_rate, src_sample_rate, AV_ROUND_UP);// 分配空间av_samples_alloc_array_and_samples(dst_data, dst_linesize, dst_ch_layout.nb_channels,dst_nb_samples, dst_sample_fmt, 0);// 采样转换double t 0; // 时间以输入源的时间为基准int max_nb_samples dst_nb_samples;string dst_file_name out.pcm;ofstream dst_file(dst_file_name, ios_base::out | ios_base::binary);while(t 10){// 生成输入源模拟fill_samples((double*)src_data[0], src_nb_samples, src_ch_layout.nb_channels, src_sample_rate, t);// 获取延迟dst音频相对src音频延迟的帧数int64_t delay swr_get_delay(swr_ctx, src_sample_rate);// 输出的信道样本数a * b / cdst_nb_samples av_rescale(delay src_nb_samples, dst_sample_rate, src_sample_rate);// 如果输出缓冲区大小不够重新申请空间if(dst_nb_samples max_nb_samples){// 重新申请空间av_freep(dst_data[0]);av_samples_alloc(dst_data, dst_linesize, dst_ch_layout.nb_channels,dst_nb_samples, dst_sample_fmt, 1);max_nb_samples dst_nb_samples;}// 音频重采样int ret swr_convert(swr_ctx, dst_data, dst_nb_samples,(const uint8_t **)src_data, src_nb_samples);// 获取给定音频参数所需的缓冲区大小。int dst_buf_size av_samples_get_buffer_size(dst_linesize, dst_ch_layout.nb_channels,ret, dst_sample_fmt, 1);// writedst_file.write((char*)dst_data[0], dst_buf_size);}// clear and exit// TODO
} 附录 - SwrContext结构体字段
版本ffmpeg-7.0
struct SwrContext {const AVClass *av_class; /// AVClass used for AVOption and av_log()int log_level_offset; /// logging level offsetvoid *log_ctx; /// parent logging contextenum AVSampleFormat in_sample_fmt; /// input sample formatenum AVSampleFormat int_sample_fmt; /// internal sample format (AV_SAMPLE_FMT_FLTP or AV_SAMPLE_FMT_S16P)enum AVSampleFormat out_sample_fmt; /// output sample formatAVChannelLayout used_ch_layout; /// number of used input channels (mapped channel count if channel_map, otherwise in.ch_count)AVChannelLayout in_ch_layout; /// input channel layoutAVChannelLayout out_ch_layout; /// output channel layoutint in_sample_rate; /// input sample rateint out_sample_rate; /// output sample rateint flags; /// miscellaneous flags such as SWR_FLAG_RESAMPLEfloat slev; /// surround mixing levelfloat clev; /// center mixing levelfloat lfe_mix_level; /// LFE mixing levelfloat rematrix_volume; /// rematrixing volume coefficientfloat rematrix_maxval; /// maximum value for rematrixing outputint matrix_encoding; /** matrixed stereo encoding */const int *channel_map; /// channel index (or -1 if muted channel) mapint engine;AVChannelLayout user_used_chlayout; /// User set used channel layoutAVChannelLayout user_in_chlayout; /// User set input channel layoutAVChannelLayout user_out_chlayout; /// User set output channel layoutenum AVSampleFormat user_int_sample_fmt; /// User set internal sample formatint user_dither_method; /// User set dither methodstruct DitherContext dither;int filter_size; /** length of each FIR filter in the resampling filterbank relative to the cutoff frequency */int phase_shift; /** log2 of the number of entries in the resampling polyphase filterbank */int linear_interp; /** if 1 then the resampling FIR filter will be linearly interpolated */int exact_rational; /** if 1 then enable non power of 2 phase_count */double cutoff; /** resampling cutoff frequency (swr: 6dB point; soxr: 0dB point). 1.0 corresponds to half the output sample rate */int filter_type; /** swr resampling filter type */double kaiser_beta; /** swr beta value for Kaiser window (only applicable if filter_type AV_FILTER_TYPE_KAISER) */double precision; /** soxr resampling precision (in bits) */int cheby; /** soxr: if 1 then passband rolloff will be none (Chebyshev) irrational ratio approximation precision will be higher */float min_compensation; /// swr minimum below which no compensation will happenfloat min_hard_compensation; /// swr minimum below which no silence inject / sample drop will happenfloat soft_compensation_duration; /// swr duration over which soft compensation is appliedfloat max_soft_compensation; /// swr maximum soft compensation in seconds over soft_compensation_durationfloat async; /// swr simple 1 parameter async, similar to ffmpegs -asyncint64_t firstpts_in_samples; /// swr first pts in samplesint resample_first; /// 1 if resampling must come first, 0 if rematrixingint rematrix; /// flag to indicate if rematrixing is needed (basically if input and output layouts mismatch)int rematrix_custom; /// flag to indicate that a custom matrix has been definedAudioData in; /// input audio dataAudioData postin; /// post-input audio data: used for rematrix/resampleAudioData midbuf; /// intermediate audio data (postin/preout)AudioData preout; /// pre-output audio data: used for rematrix/resampleAudioData out; /// converted output audio dataAudioData in_buffer; /// cached audio data (convert and resample purpose)AudioData silence; /// temporary with silenceAudioData drop_temp; /// temporary used to discard outputint in_buffer_index; /// cached buffer positionint in_buffer_count; /// cached buffer lengthint resample_in_constraint; /// 1 if the input end was reach before the output end, 0 otherwiseint flushed; /// 1 if data is to be flushed and no further input is expectedint64_t outpts; /// output PTSint64_t firstpts; /// first PTSint drop_output; /// number of output samples to dropdouble delayed_samples_fixup; /// soxr 0.1.1: needed to fixup delayed_samples after flush has been called.struct AudioConvert *in_convert; /// input conversion contextstruct AudioConvert *out_convert; /// output conversion contextstruct AudioConvert *full_convert; /// full conversion context (single conversion for input and output)struct ResampleContext *resample; /// resampling contextstruct Resampler const *resampler; /// resampler virtual function tabledouble matrix[SWR_CH_MAX][SWR_CH_MAX]; /// floating point rematrixing coefficientsfloat matrix_flt[SWR_CH_MAX][SWR_CH_MAX]; /// single precision floating point rematrixing coefficientsuint8_t *native_matrix;uint8_t *native_one;uint8_t *native_simd_one;uint8_t *native_simd_matrix;int32_t matrix32[SWR_CH_MAX][SWR_CH_MAX]; /// 17.15 fixed point rematrixing coefficientsuint8_t matrix_ch[SWR_CH_MAX][SWR_CH_MAX1]; /// Lists of input channels per output channel that have non zero rematrixing coefficientsmix_1_1_func_type *mix_1_1_f;mix_1_1_func_type *mix_1_1_simd;mix_2_1_func_type *mix_2_1_f;mix_2_1_func_type *mix_2_1_simd;mix_any_func_type *mix_any_f;/* TODO: callbacks for ASM optimizations */
};