After hours browsing the Internet, I couldn’t get my hands on a working tutorial to simply extract audio samples from a file using libavcodec. So here’s to you, a full working example!
Warning
FFMPEG and the libraries it depends on are often updated, and their API can drastically change between versions. This code works with the libraries available under Ubuntu 16.04, but might become outdated in the future, or cause trouble with earlier versions.
Only one way to know: try it!
Installation
First, you need to set up the required libraries. Let’s use apt-get
to perform this easily:
apt-get install libavcodec-dev libavformat-dev libavutil-dev libswresample-dev |
The code
The function decode_audio_file
takes 4 parameters:
path
: the path of the file to decodesample_rate
: the desired sample rate for the output datadata
: a pointer to a pointer to double precision values, where the extracted data will be storedsize
: a pointer to the length of the final extracted values array (number of samples)
It returns 0
upon success, and -1
in case of failure, assorted with error message written to the stderr
stream.
#include <stdio.h> #include <stdlib.h> #include <libavutil/opt.h> #include <libavcodec/avcodec.h> #include <libavformat/avformat.h> #include <libswresample/swresample.h> int decode_audio_file(const char* path, const int sample_rate, double** data, int* size) { // initialize all muxers, demuxers and protocols for libavformat // (does nothing if called twice during the course of one program execution) av_register_all(); // get format from audio file AVFormatContext* format = avformat_alloc_context(); if (avformat_open_input(&format, path, NULL, NULL) != 0) { fprintf(stderr, "Could not open file '%s'\n", path); return -1; } if (avformat_find_stream_info(format, NULL) < 0) { fprintf(stderr, "Could not retrieve stream info from file '%s'\n", path); return -1; } // Find the index of the first audio stream int stream_index =- 1; for (int i=0; i<format->nb_streams; i++) { if (format->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO) { stream_index = i; break; } } if (stream_index == -1) { fprintf(stderr, "Could not retrieve audio stream from file '%s'\n", path); return -1; } AVStream* stream = format->streams[stream_index]; // find & open codec AVCodecContext* codec = stream->codec; if (avcodec_open2(codec, avcodec_find_decoder(codec->codec_id), NULL) < 0) { fprintf(stderr, "Failed to open decoder for stream #%u in file '%s'\n", stream_index, path); return -1; } // prepare resampler struct SwrContext* swr = swr_alloc(); av_opt_set_int(swr, "in_channel_count", codec->channels, 0); av_opt_set_int(swr, "out_channel_count", 1, 0); av_opt_set_int(swr, "in_channel_layout", codec->channel_layout, 0); av_opt_set_int(swr, "out_channel_layout", AV_CH_LAYOUT_MONO, 0); av_opt_set_int(swr, "in_sample_rate", codec->sample_rate, 0); av_opt_set_int(swr, "out_sample_rate", sample_rate, 0); av_opt_set_sample_fmt(swr, "in_sample_fmt", codec->sample_fmt, 0); av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_DBL, 0); swr_init(swr); if (!swr_is_initialized(swr)) { fprintf(stderr, "Resampler has not been properly initialized\n"); return -1; } // prepare to read data AVPacket packet; av_init_packet(&packet); AVFrame* frame = av_frame_alloc(); if (!frame) { fprintf(stderr, "Error allocating the frame\n"); return -1; } // iterate through frames *data = NULL; *size = 0; while (av_read_frame(format, &packet) >= 0) { // decode one frame int gotFrame; if (avcodec_decode_audio4(codec, frame, &gotFrame, &packet) < 0) { break; } if (!gotFrame) { continue; } // resample frames double* buffer; av_samples_alloc((uint8_t**) &buffer, NULL, 1, frame->nb_samples, AV_SAMPLE_FMT_DBL, 0); int frame_count = swr_convert(swr, (uint8_t**) &buffer, frame->nb_samples, (const uint8_t**) frame->data, frame->nb_samples); // append resampled frames to data *data = (double*) realloc(*data, (*size + frame->nb_samples) * sizeof(double)); memcpy(*data + *size, buffer, frame_count * sizeof(double)); *size += frame_count; } // clean up av_frame_free(&frame); swr_free(&swr); avcodec_close(codec); avformat_free_context(format); // success return 0; } |
Compilation
You will need the following flags to compile a program that uses : -lavcodec-ffmpeg -lavformat-ffmpeg -lavutil -lswresample
.
Depending on your system and installation, it could also be: -lavcodec -lavformat -lavutil -lswresample
.
Usage
This program takes the first command-line argument it has been provided as the path to an audio file, decodes it, and outputs the average value for every sample met:
int main(int argc, char const *argv[]) { // check parameters if (argc < 2) { fprintf(stderr, "Please provide the path to an audio file as first command-line argument.\n"); return -1; } // decode data int sample_rate = 44100; double* data; int size; if (decode_audio_file(argv[1], sample_rate, &data, &size) != 0) { return -1; } // sum data double sum = 0.0; for (int i=0; i<size; ++i) { sum += data[i]; } // display result and exit cleanly printf("sum is %f", sum); free(data); return 0; } |