Libavcodec tutorial: decode virtually any audio file in C/C++

After hours browsing the Internet, I couldn’t get my hands on a working tutorial to simply extract audio samples from a file using libavcodec. So here’s to you, a full working example!

Warning

FFMPEG and the libraries it depends on are often updated, and their API can drastically change between versions. This code works with the libraries available under Ubuntu 16.04, but might become outdated in the future, or cause trouble with earlier versions.

Only one way to know: try it!

Installation

First, you need to set up the required libraries. Let’s use apt-get to perform this easily:

apt-get install libavcodec-dev libavformat-dev libavutil-dev libswresample-dev

The code

The function decode_audio_file takes 4 parameters:

  • path: the path of the file to decode
  • sample_rate: the desired sample rate for the output data
  • data: a pointer to a pointer to double precision values, where the extracted data will be stored
  • size: a pointer to the length of the final extracted values array (number of samples)

It returns 0 upon success, and -1 in case of failure, assorted with error message written to the stderr stream.

#include <stdio.h>
#include <stdlib.h>
 
#include <libavutil/opt.h>
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libswresample/swresample.h>
 
 
int decode_audio_file(const char* path, const int sample_rate, double** data, int* size) {
 
    // initialize all muxers, demuxers and protocols for libavformat
    // (does nothing if called twice during the course of one program execution)
    av_register_all();
 
    // get format from audio file
    AVFormatContext* format = avformat_alloc_context();
    if (avformat_open_input(&format, path, NULL, NULL) != 0) {
        fprintf(stderr, "Could not open file '%s'\n", path);
        return -1;
    }
    if (avformat_find_stream_info(format, NULL) < 0) {
        fprintf(stderr, "Could not retrieve stream info from file '%s'\n", path);
        return -1;
    }
 
    // Find the index of the first audio stream
    int stream_index =- 1;
    for (int i=0; i<format->nb_streams; i++) {
        if (format->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
            stream_index = i;
            break;
        }
    }
    if (stream_index == -1) {
        fprintf(stderr, "Could not retrieve audio stream from file '%s'\n", path);
        return -1;
    }
    AVStream* stream = format->streams[stream_index];
 
    // find & open codec
    AVCodecContext* codec = stream->codec;
    if (avcodec_open2(codec, avcodec_find_decoder(codec->codec_id), NULL) < 0) {
        fprintf(stderr, "Failed to open decoder for stream #%u in file '%s'\n", stream_index, path);
        return -1;
    }
 
    // prepare resampler
    struct SwrContext* swr = swr_alloc();
    av_opt_set_int(swr, "in_channel_count",  codec->channels, 0);
    av_opt_set_int(swr, "out_channel_count", 1, 0);
    av_opt_set_int(swr, "in_channel_layout",  codec->channel_layout, 0);
    av_opt_set_int(swr, "out_channel_layout", AV_CH_LAYOUT_MONO, 0);
    av_opt_set_int(swr, "in_sample_rate", codec->sample_rate, 0);
    av_opt_set_int(swr, "out_sample_rate", sample_rate, 0);
    av_opt_set_sample_fmt(swr, "in_sample_fmt",  codec->sample_fmt, 0);
    av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_DBL,  0);
    swr_init(swr);
    if (!swr_is_initialized(swr)) {
        fprintf(stderr, "Resampler has not been properly initialized\n");
        return -1;
    }
 
    // prepare to read data
    AVPacket packet;
    av_init_packet(&packet);
    AVFrame* frame = av_frame_alloc();
	if (!frame) {
		fprintf(stderr, "Error allocating the frame\n");
		return -1;
	}
 
    // iterate through frames
    *data = NULL;
    *size = 0;
    while (av_read_frame(format, &packet) >= 0) {
        // decode one frame
        int gotFrame;
        if (avcodec_decode_audio4(codec, frame, &gotFrame, &packet) < 0) {
            break;
        }
        if (!gotFrame) {
            continue;
        }
        // resample frames
        double* buffer;
        av_samples_alloc((uint8_t**) &buffer, NULL, 1, frame->nb_samples, AV_SAMPLE_FMT_DBL, 0);
        int frame_count = swr_convert(swr, (uint8_t**) &buffer, frame->nb_samples, (const uint8_t**) frame->data, frame->nb_samples);
        // append resampled frames to data
        *data = (double*) realloc(*data, (*size + frame->nb_samples) * sizeof(double));
        memcpy(*data + *size, buffer, frame_count * sizeof(double));
        *size += frame_count;
    }
 
    // clean up
    av_frame_free(&frame);
    swr_free(&swr);
    avcodec_close(codec);
    avformat_free_context(format);
 
    // success
    return 0;
 
}

Compilation

You will need the following flags to compile a program that uses : -lavcodec-ffmpeg -lavformat-ffmpeg -lavutil -lswresample

.

Depending on your system and installation, it could also be: -lavcodec -lavformat -lavutil -lswresample

.

Usage

This program takes the first command-line argument it has been provided as the path to an audio file, decodes it, and outputs the average value for every sample met:

int main(int argc, char const *argv[]) {
 
    // check parameters
    if (argc < 2) {
        fprintf(stderr, "Please provide the path to an audio file as first command-line argument.\n");
        return -1;
    }
 
    // decode data
    int sample_rate = 44100;
    double* data;
    int size;
    if (decode_audio_file(argv[1], sample_rate, &data, &size) != 0) {
        return -1;
    }
 
    // sum data
    double sum = 0.0;
    for (int i=0; i<size; ++i) {
        sum += data[i];
    }
 
    // display result and exit cleanly
    printf("sum is %f", sum);
    free(data);
    return 0;
}

Leave a Comment


Warning: Unknown: open(/var/lib/php5/sessions/sess_ue40j1d9nja7ibuokmh63iddb5, O_RDWR) failed: Permission denied (13) in Unknown on line 0

Warning: Unknown: Failed to write session data (files). Please verify that the current setting of session.save_path is correct (/var/lib/php5/sessions) in Unknown on line 0