/* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- * * This tool used OpenAL and fftw to compute the spectrogram for an audio * stream and print it to the standard output. To preprocess the data, Hamming * window function is used. For stereo, the tool only uses the first channel. * * For identical values of parameters the program is known to provide output * which matches the output of MATLAB R2009b's spectrogram() function. * * The program outputs the raw complex values for the spectrogram in CSV * format. See consume_fft() function if you want to do something else with the * spectrogram. * * For available options, either consult "./alsgram --help" or the source code. * * Author: Vlad Sukhoy * * The author disclaims copyright to this source code. * * To use this program you will need: * 1. An OpenAL implementation. * 2. An ALUT implementation. * 4. fftw with major version 3. * 3. GNU argp (typically included in libc). * * To build from shell, invoke * $ gcc alsgram.c -lopenal -lalut -lfftw3 -o alsgram * * This program is known to compile and work on a GNU/Linux system with: * openal-soft 1.11.753 (implementation of OpenAL) and freealut 1.1.0 * (implementation of ALUT) and fftw 3.1.2. * * You might need to modify the code to run on either non-Linux OS or on a * processor with an instruction set that is not derived from x86 or to compile * it with non-gcc compiler. * * For documentation on OpenAL and ALUT, see * http://connect.creativelabs.com/openal/ * For documentation on fftw, see * http://www.fftw.org/ */ #include #include /* for argp_parse() */ #include #include #include #include /* for intN_t types */ #include /* for memmove() */ #include /* data structure to hold global settings of the program */ static struct { const char *input; const char *output; int printversion; int verbose; int nfft; int window; int noverlap; } settings = { /* input = */ "/dev/stdin", /* output = */ "/dev/stdout", /* printversion = */ 0, /* verbose = */ 0, /* nfft = */ 256, /* window = */ 256, /* noverlap = */ -1}; enum { key_nfft = 0x1000, key_window, key_noverlap }; const char *version = "0.71.70"; /* the program is a part of rc suite of software for the robot, the version is the version of the whole suite. */ static char *about = "alsgram -- the program to compute and output the \ spectrogram for audio data"; static struct argp_option opts[] = { {"input", 'i', "PATH", 0, "Path to the input audio file. The default is to \ read audio data from the standard input."}, {"output", 'o', "PATH", 0, "Path to the output. The default is to write \ the FFT to the standard output."}, {"version", 'V', 0, 0, "Print version and exit."}, {"verbose", 'v', 0, 0, "Be verbose."}, {"nfft", key_nfft, "NUMBER", 0, "Number of frequency bins in FFT. \ The default is 256."}, {"window", key_window, "NUMBER", 0, "FFT window size. The default is 256."}, {"noverlap", key_noverlap, "NUMBER", 0, "Amount of overlap between FFT \ windows. The default is one half of the window."}, {}}; /* callback to GNU argp to process the command line options */ static error_t parse_opt(int key, char *arg, struct argp_state *state) { switch (key) { case 'i': settings.input = arg; break; case 'o': settings.output = arg; break; case 'V': settings.printversion = 1; break; case 'v': settings.verbose = 1; break; case key_nfft: settings.nfft = atoi(arg); break; case key_window: settings.window = atoi(arg); break; case key_noverlap: settings.noverlap = atoi(arg); break; default: return ARGP_ERR_UNKNOWN; }; return 0; } static void report_alut_error() { fprintf(stderr, "ALUT error: %s\n", alutGetErrorString(alutGetError())); } /* convert OpenAL audio format to a human-readable string */ static const char* alfmt2str(ALenum fmt) { switch (fmt) { case AL_FORMAT_MONO8: return "8-bit mono"; case AL_FORMAT_MONO16: return "16-bit mono"; case AL_FORMAT_STEREO8: return "8-bit stereo"; case AL_FORMAT_STEREO16: return "16-bit stereo"; default: assert(0); /* unknown OpenAL format */ return "UNKNOWN"; }; } /* given the number of bytes in the audio and its format, compute the number of samples */ static ALsizei alnsamps(ALsizei nbytes, ALenum fmt) { assert(nbytes >= 0); switch (fmt) { case AL_FORMAT_MONO8: return nbytes; case AL_FORMAT_MONO16: case AL_FORMAT_STEREO8: assert(!(nbytes % 2)); /* no dangling bytes */ return nbytes / 2; case AL_FORMAT_STEREO16: assert(!(nbytes % 4)); /* no dangling bytes */ return nbytes / 4; default: assert(0); /* unknown OpenAL format */ return 0; } } /* data structure for one sample that's more uniform than native formats */ struct sample { double ch[2]; /* the data is normalized into range [-1, 1] */ }; /* get the next sample from a pointer into the stream. The pointer is advanced. */ static inline struct sample next_sample(ALvoid **p, ALenum fmt) { struct sample rv; static const union { unsigned char c[8]; double d; } nan = {{0, 0, 0, 0, 0, 0, 0xf8, 0x7f}}; switch (fmt) { case AL_FORMAT_MONO8: { uint8_t *samp = *p; rv.ch[0] = ((*samp++) - 128.0) / 128.0; rv.ch[1] = nan.d; *p = samp; } break; case AL_FORMAT_MONO16: { int16_t *samp = *p; rv.ch[0] = (*samp++) / 32768.0; rv.ch[1] = nan.d; *p = samp; } break; case AL_FORMAT_STEREO8: { uint8_t *samp = *p; rv.ch[0] = ((*samp++) - 128.0) / 128.0; rv.ch[1] = ((*samp++) - 128.0) / 128.0; *p = samp; } break; case AL_FORMAT_STEREO16: { int16_t *samp = *p; rv.ch[0] = (*samp++) / 32768.0; rv.ch[1] = (*samp++) / 32768.0; *p = samp; } break; default: rv.ch[0] = rv.ch[1] = nan.d; }; return rv; } /* Computes the Hamming window function for a window of size N */ static inline double hamming(unsigned int n, const unsigned int N) { return 0.54 - 0.46*cos(2*M_PI*n/(N - 1)); } /* Consumes the output of FFT. Replace with your own code to work with it */ static void consume_fft(fftw_complex *fft_out, int n) { int i; for (i=0; i 0) { process leftovers for (i %= settings.nfft; i < settings.nfft; ++i) fft_in[i] = 0.0; fftw_execute(*plan); consume_fft(fft_out, settings.nfft/2 + 1); }*/ } int main(int argc, char **argv) { struct argp ap = {opts, parse_opt, 0, about}; argp_parse(&ap, argc, argv, 0, 0, &settings); if (settings.printversion) { printf("Version: %s\n", version); return 0; } alutInit(&argc, argv); ALenum fmt = 0; ALsizei nbytes = 0, nsamps; ALfloat freq = 0; ALvoid *data = alutLoadMemoryFromFile(settings.input, &fmt, &nbytes, &freq); if (!data) { report_alut_error(); return 1; } nsamps = alnsamps(nbytes, fmt); if (settings.verbose) { printf("Format:\t\t%s\n", alfmt2str(fmt)); printf("Frequency:\t%.3f kHz\n", freq/1000.0); printf("Total samples:\t%d\n", nsamps); } ALsizei sidx; ALvoid *p = data; /* compute proper value for the window overlap */ int noverlap = (settings.noverlap>=0) ? settings.noverlap:(settings.window/2); if (noverlap > settings.window) { fprintf(stderr, "Invalid value for overlap: %d\n", noverlap); return 1; } double win[settings.window]; /* use gcc's volatile size var on stack */ /* fftw_malloc() produces nice aligned pointers useful for SIMD in FFT */ double *fft_in = fftw_malloc(sizeof(double)*settings.nfft); fftw_complex *fft_out = fftw_malloc(sizeof(fftw_complex)*(settings.nfft/2+1)); /* if you are here, you definitely need a plan */ fftw_plan plan = fftw_plan_dft_r2c_1d(settings.nfft, fft_in, fft_out, FFTW_ESTIMATE); int winsamps = 0; /* number of samples in the window */ for (sidx=0; sidx 0) process_window(win, winsamps, fft_in, fft_out, &plan); /* cleanup */ fftw_free(fft_in); fftw_free(fft_out); free(data); alutExit(); return 0; }