i want to find mfcc of each frame of a song. can you tell me how to find mfcc. i found this code but unable to understand what is it doing. this code is printing an array and duration and period. But i heard that mfcc gives you some co-eficient against each frame. can you help me in how to use this code.
Here is the code
This is the main program (timeseries.py):

import numpy as N

__all__ = [
'timeseries'
]

class InfoArray(N.ndarray):
def __new__(subtype, data, dtype=None, copy=True, **kwargs):
    if isinstance(data, InfoArray):
        if not copy and dtype==data.dtype:
            return data.view(subtype)
        else:
            return data.astype(dtype).view(subtype)
    for f in subtype._fields_:
        if kwargs.has_key(f):
            value = kwargs[f]
        else:
            value = None
        setattr(subtype, '_' + f, value)
        setattr(subtype, f, value)
    for kw in kwargs:
        if not hasattr(subtype, kw):
            raise TypeError, 'invalid keyword argument \'%s\'' % (kw,)
    return N.array(data).view(subtype)

def __array_finalize__(self, obj):
    for f in self._fields_:
        if hasattr(obj, f):
            setattr(self, f, getattr(obj, f))
        else:
            setattr(self, f, getattr(self, '_' + f))

class timeseries(N.ndarray):
def __new__(subtype, data, sampling_frequency=0.0, info=None, dtype=None, copy=True):
    # when data is a timeseries
    if isinstance(data, timeseries):
        if not copy and dtype==data.dtype:
            return data.view(subtype)
        else:
            return data.astype(dtype).view(subtype)
    subtype._sampling_frequency = sampling_frequency
    subtype.sampling_frequency = subtype._sampling_frequency
    subtype._info = info
    subtype.info = subtype._info
    return N.array(data).view(subtype)

def __array_finalize__(self, obj):
    attrs = ['sampling_frequency', 'info']
    for attr in attrs:
        if hasattr(obj, attr):
            setattr(self, attr, getattr(obj, attr))
        else:
            setattr(self, attr, getattr(self, '_' + attr))

def __repr__(self):
    desc = """array(data=\n  %(data)s,\nsampling_frequency=%(fs)f)"""
    return desc % {'data' :  str(self),
                   'fs' : self.sampling_frequency}

if __name__ == '__main__':
class MFCC(InfoArray):
    _fields_ = ['duration', 'period']

    def bar(self):
        print self

x = MFCC([1.,2.,3.], duration=20e-3, period=10e-3)
print x
print x.duration
print x.period
x.bar()

and this is the code of MFCC class (mfcc.py)

import numpy as N

from timeseries import timeseries
from util import exactly_2d

__all__ = [
'triang',
'linear2mel',
'mel2linear',
'MelFilterbank',
'MFCC'
]

# triangular window from SciPy
def triang(M, sym=1):
"""The M-point triangular window."""
if M < 1:
    return N.array([])
if M == 1:
    return N.ones(1,'d')
odd = M % 2
if not sym and not odd:
    M = M + 1
n = N.arange(1,int((M+1)/2)+1)
if M % 2 == 0:
    w = (2*n-1.0)/M
    w = N.r_[w, w[::-1]]
else:
    w = 2*n/(M+1.0)
    w = N.r_[w, w[-2::-1]]

if not sym and not odd:
    w = w[:-1]

return w

def linear2mel(linfreq):
melfreq = N.atleast_1d(linfreq).copy()
melfreq /= 700.0
melfreq += 1.0
melfreq[:] = N.log(melfreq)
melfreq *= 1127.0
return melfreq.squeeze()

def mel2linear(melfreq):
linfreq = N.atleast_1d(melfreq).copy()
linfreq /= 1127.0
linfreq[:] = N.exp(linfreq)
linfreq -= 1.0
linfreq *= 700.0
return linfreq.squeeze()

class MelFilterbank:
def __init__(self, nfilters, startfreq, stopfreq, filter_window):
    """
    Parameters:

    - `nfilters`: number of filters.
    - `startfreq`: linear frequency where first filter begins.
    - `stopfreq`: linear frequency where last filter ends.
    - `filter_window`: window to use for each filter.
    """
    self.nfilters = nfilters
    self.filter_window = filter_window

    # make sure we're working with floating point values
    startfreq, stopfreq = map(float, [startfreq, stopfreq])

    # convert start and stop frequencies
    melstartfreq = linear2mel(startfreq)
    melstopfreq = linear2mel(stopfreq)

    # step between start of filters
    melstepfreq = (melstopfreq - melstartfreq) / (nfilters + 1.0)

    # start Mel frequencies of filters
    starts = N.arange(0., nfilters)
    starts *= melstepfreq
    starts += melstartfreq

    # stop Mel frequencies of filters
    stops = N.arange(0., nfilters)
    stops += 2.0
    stops *= melstepfreq
    stops += melstartfreq

    # filter bands in Mel frequency
    self.filter_bands = zip(starts, stops)

def apply(self, frames, sampling_frequency):
    # XXX figure out if we want power spectrum or energy spectrum here
    # XXX rename variables accordingly

    frames = exactly_2d(frames)

    # convert frames from time domain to power spectrum
    n = int(2**N.ceil(N.log2(frames.shape[-1])))
    power_spectrum = N.abs(N.fft.rfft(frames, n))

    # samples per frequency
    sample_step = frames.shape[-1] / sampling_frequency

    # beginning and end of filters in samples
    filter_samples = [(int(N.ceil(start * sample_step)),
                       int(N.ceil(stop * sample_step)))
                      for start, stop in self.filter_bands]

    energies = N.zeros((frames.shape[0], self.nfilters))
    for i, (start_sample, stop_sample) in enumerate(filter_samples):
        band_nsamples = stop_sample - start_sample + 1
        band_slice = slice(start_sample,
                           N.min((stop_sample,
                                  power_spectrum.shape[-1])))
        if band_slice.start >= band_slice.stop:
            # break if signal's highest frequency is less than the
            # start frequency of this filter
            break
        band_spectrum = power_spectrum[band_slice, :].copy()
        # apply filter window function to spectrum samples
        window = self.filter_window(band_nsamples)
        band_spectrum *= window[band_slice]
        filter_energy = energies[:, i]
        filter_energy[:] = band_spectrum.sum(axis=-1)
        filter_energy /= band_nsamples

    return energies

class MFCC:
def __init__(self, period, duration, frame_window, filterbank):
    """
    Parameters:

    - `period`: frame period in seconds.
    - `duration`: frame duration in seconds.
    - `frame_window`: window to apply to each frame.
    """
    self.period = period
    self.duration = duration
    self.frame_window = frame_window
    self.filterbank = filterbank

def apply(self, signal, dtype=None):
    #signal = self._normalize(signal, dtype)
    #self._preemphasis(signal, alpha=0.98)
    frames = self._split_into_frames(signal)
    energies = self.filterbank.apply(frames,
                                     signal.sampling_frequency)
    spectra = N.log(energies)
    spectra *= 20
    # XXX discrete cosine transform
    # XXX lifter

def _normalize(self, signal, dtype):
    signal = timeseries(signal, dtype=dtype)
    signal -= signal.mean()
    signal /= N.max(N.abs(signal))
    return signal

def _preemphasis(self, signal, alpha):
    """Apply preemphasis to signal inplace."""
    signal[1:] -= alpha * signal[:-1]

def _split_into_frames(self, signal):
    # split signal into frames
    fs = signal.sampling_frequency
    period_samples = int(fs * self.period)
    duration_samples = int(fs * self.duration)
    starts = range(0, len(signal) - duration_samples + 1, period_samples)
    stops = [x + duration_samples for x in starts]
    frames = N.array([signal[s] for s in
                      [slice(*r) for r in zip(starts,stops)]])

    # apply frame window to each frame
    window = self.frame_window(duration_samples)
    frames *= window

    return frames

You have to save both codes in same folder and run the timeseries code

This article has been dead for over six months. Start a new discussion instead.