I have run this FFT algorithm on a 440Hz audio file. But I get an unexpected sound frequency: 510Hz.
- Is the
byteArraycontaining .wav correctly converted into 2 double arrays (Re & Im parts)? The imaginary array contains only 0. - I assume that the highest sound frequency is the maximum of
xRearray: please look at the very end of therun()function? Maybe that is my mistake: is it average or something like that?
What is the problem then?
UPDATED: The biggest sum Re+Im is at index = 0 so I get frequency = 0;
Whole project: contains .wav -> just open and run.
using System;
using System.Net;
using System.IO;
namespace FFT {
/**
* Performs an in-place complex FFT.
*
* Released under the MIT License
*
* Copyright (c) 2010 Gerald T. Beauregard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
public class FFT2 {
// Element for linked list in which we store the
// input/output data. We use a linked list because
// for sequential access it's faster than array index.
class FFTElement {
public double re = 0.0; // Real component
public double im = 0.0; // Imaginary component
public FFTElement next; // Next element in linked list
public uint revTgt; // Target position post bit-reversal
}
private static int sampleRate;
private uint m_logN = 0; // log2 of FFT size
private uint m_N = 0; // FFT size
private FFTElement[] m_X; // Vector of linked list elements
/**
*
*/
public FFT2() {
}
/**
* Initialize class to perform FFT of specified size.
*
* @param logN Log2 of FFT length. e.g. for 512 pt FFT, logN = 9.
*/
public void init(uint logN) {
m_logN = logN;
m_N = (uint)(1 << (int)m_logN);
// Allocate elements for linked list of complex numbers.
m_X = new FFTElement[m_N];
for (uint k = 0; k < m_N; k++)
m_X[k] = new FFTElement();
// Set up "next" pointers.
for (uint k = 0; k < m_N - 1; k++)
m_X[k].next = m_X[k + 1];
// Specify target for bit reversal re-ordering.
for (uint k = 0; k < m_N; k++)
m_X[k].revTgt = BitReverse(k, logN);
}
/**
* Performs in-place complex FFT.
*
* @param xRe Real part of input/output
* @param xIm Imaginary part of input/output
* @param inverse If true, do an inverse FFT
*/
public void run(double[] xRe, double[] xIm, bool inverse = false) {
uint numFlies = m_N >> 1; // Number of butterflies per sub-FFT
uint span = m_N >> 1; // Width of the butterfly
uint spacing = m_N; // Distance between start of sub-FFTs
uint wIndexStep = 1; // Increment for twiddle table index
// Copy data into linked complex number objects
// If it's an IFFT, we divide by N while we're at it
FFTElement x = m_X[0];
uint k = 0;
double scale = inverse ? 1.0 / m_N : 1.0;
while (x != null) {
x.re = scale * xRe[k];
x.im = scale * xIm[k];
x = x.next;
k++;
}
// For each stage of the FFT
for (uint stage = 0; stage < m_logN; stage++) {
// Compute a multiplier factor for the "twiddle factors".
// The twiddle factors are complex unit vectors spaced at
// regular angular intervals. The angle by which the twiddle
// factor advances depends on the FFT stage. In many FFT
// implementations the twiddle factors are cached, but because
// array lookup is relatively slow in C#, it's just
// as fast to compute them on the fly.
double wAngleInc = wIndexStep * 2.0 * Math.PI / m_N;
if (inverse == false)
wAngleInc *= -1;
double wMulRe = Math.Cos(wAngleInc);
double wMulIm = Math.Sin(wAngleInc);
for (uint start = 0; start < m_N; start += spacing) {
FFTElement xTop = m_X[start];
FFTElement xBot = m_X[start + span];
double wRe = 1.0;
double wIm = 0.0;
// For each butterfly in this stage
for (uint flyCount = 0; flyCount < numFlies; ++flyCount) {
// Get the top & bottom values
double xTopRe = xTop.re;
double xTopIm = xTop.im;
double xBotRe = xBot.re;
double xBotIm = xBot.im;
// Top branch of butterfly has addition
xTop.re = xTopRe + xBotRe;
xTop.im = xTopIm + xBotIm;
// Bottom branch of butterly has subtraction,
// followed by multiplication by twiddle factor
xBotRe = xTopRe - xBotRe;
xBotIm = xTopIm - xBotIm;
xBot.re = xBotRe * wRe - xBotIm * wIm;
xBot.im = xBotRe * wIm + xBotIm * wRe;
// Advance butterfly to next top & bottom positions
xTop = xTop.next;
xBot = xBot.next;
// Update the twiddle factor, via complex multiply
// by unit vector with the appropriate angle
// (wRe + j wIm) = (wRe + j wIm) x (wMulRe + j wMulIm)
double tRe = wRe;
wRe = wRe * wMulRe - wIm * wMulIm;
wIm = tRe * wMulIm + wIm * wMulRe;
}
}
numFlies >>= 1; // Divide by 2 by right shift
span >>= 1;
spacing >>= 1;
wIndexStep <<= 1; // Multiply by 2 by left shift
}
// The algorithm leaves the result in a scrambled order.
// Unscramble while copying values from the complex
// linked list elements back to the input/output vectors.
x = m_X[0];
while (x != null) {
uint target = x.revTgt;
xRe[target] = x.re;
xIm[target] = x.im;
x = x.next;
}
//looking for max IS THIS IS FREQUENCY
double max = 0, index = 0;
for (int i = 0; i < xRe.Length; i++) {
if (xRe[i] + xIm[i] > max) {
max = xRe[i]*xRe[i] + xIm[i]*xIm[i];
index = i;
}
}
max = Math.Sqrt(max);
/* if the peak is at bin index i then the corresponding
frequency will be i * Fs / N whe Fs is the sample rate in Hz and N is the FFT size.*/
//DONT KNOW WHY THE BIGGEST VALUE IS IN THE BEGINNING
Console.WriteLine("max "+ max+" index " + index + " m_logN" + m_logN + " " + xRe[0]);
max = index * sampleRate / m_logN;
Console.WriteLine("max " + max);
}
/**
* Do bit reversal of specified number of places of an int
* For example, 1101 bit-reversed is 1011
*
* @param x Number to be bit-reverse.
* @param numBits Number of bits in the number.
*/
private uint BitReverse(
uint x,
uint numBits) {
uint y = 0;
for (uint i = 0; i < numBits; i++) {
y <<= 1;
y |= x & 0x0001;
x >>= 1;
}
return y;
}
public static void Main(String[] args) {
// BinaryReader reader = new BinaryReader(System.IO.File.OpenRead(@"C:\Users\Duke\Desktop\e.wav"));
BinaryReader reader = new BinaryReader(File.Open(@"440.wav", FileMode.Open));
//Read the wave file header from the buffer.
int chunkID = reader.ReadInt32();
int fileSize = reader.ReadInt32();
int riffType = reader.ReadInt32();
int fmtID = reader.ReadInt32();
int fmtSize = reader.ReadInt32();
int fmtCode = reader.ReadInt16();
int channels = reader.ReadInt16();
sampleRate = reader.ReadInt32();
int fmtAvgBPS = reader.ReadInt32();
int fmtBlockAlign = reader.ReadInt16();
int bitDepth = reader.ReadInt16();
if (fmtSize == 18) {
// Read any extra values
int fmtExtraSize = reader.ReadInt16();
reader.ReadBytes(fmtExtraSize);
}
int dataID = reader.ReadInt32();
int dataSize = reader.ReadInt32();
// Store the audio data of the wave file to a byte array.
byte[] byteArray = reader.ReadBytes(dataSize);
/* for (int i = 0; i < byteArray.Length; i++) {
Console.Write(byteArray[i] + " ");
}*/
byte[] data = byteArray;
double[] arrRe = new double[data.Length];
for (int i = 0; i < arrRe.Length; i++) {
arrRe[i] = data[i] / 32768.0;
}
double[] arrI = new double[data.Length];
for (int i = 0; i < arrRe.Length; i++) {
arrI[i] = 0;
}
/**
* Initialize class to perform FFT of specified size.
*
* @param logN Log2 of FFT length. e.g. for 512 pt FFT, logN = 9.
*/
Console.WriteLine();
FFT2 fft2 = new FFT2();
uint logN = (uint)Math.Log(data.Length, 2);
fft2.init(logN);
fft2.run(arrRe, arrI);
// After this you have to split that byte array for each channel (Left,Right)
// Wav supports many channels, so you have to read channel from header
Console.ReadLine();
}
}
}
There are a few things that you need to address:
you’re not applying a window function prior to the FFT – this will result in spectral leakage in the general case and you may get misleading results, particularly when looking for peaks, as there will be “smearing” of the spectrum.
when looking for peaks you should be looking at the magnitude of FFT output bins, not the individual real and imaginary parts –
magnitude = sqrt(re^2 +im^2)(although you don’t need to worry about thesqrtif you’re just looking for peaks).having identified a peak you need to convert the bin index into a frequency – if the peak is at bin index i then the corresponding frequency will be
i * Fs / NwhereFsis the sample rate in Hz andNis the FFT size.for a real-to-complex FFT you can ignore the second N / 2 output bins as they are just the complex conjugate mirror image of the first N / 2 bins
(See also this answer for fuller explanations of the above.)