i want to find mfcc of each frame of a song. can you tell me how to find mfcc. i found this code but unable to understand what is it doing. this code is printing an array and duration and period. But i heard that mfcc gives you some co-eficient against each frame. can you help me in how to use this code.
Here is the code
This is the main program (timeseries.py):

import numpy as N

__all__ = [

class InfoArray(N.ndarray):
def __new__(subtype, data, dtype=None, copy=True, **kwargs):
    if isinstance(data, InfoArray):
        if not copy and dtype==data.dtype:
            return data.view(subtype)
            return data.astype(dtype).view(subtype)
    for f in subtype._fields_:
        if kwargs.has_key(f):
            value = kwargs[f]
            value = None
        setattr(subtype, '_' + f, value)
        setattr(subtype, f, value)
    for kw in kwargs:
        if not hasattr(subtype, kw):
            raise TypeError, 'invalid keyword argument \'%s\'' % (kw,)
    return N.array(data).view(subtype)

def __array_finalize__(self, obj):
    for f in self._fields_:
        if hasattr(obj, f):
            setattr(self, f, getattr(obj, f))
            setattr(self, f, getattr(self, '_' + f))

class timeseries(N.ndarray):
def __new__(subtype, data, sampling_frequency=0.0, info=None, dtype=None, copy=True):
    # when data is a timeseries
    if isinstance(data, timeseries):
        if not copy and dtype==data.dtype:
            return data.view(subtype)
            return data.astype(dtype).view(subtype)
    subtype._sampling_frequency = sampling_frequency
    subtype.sampling_frequency = subtype._sampling_frequency
    subtype._info = info
    subtype.info = subtype._info
    return N.array(data).view(subtype)

def __array_finalize__(self, obj):
    attrs = ['sampling_frequency', 'info']
    for attr in attrs:
        if hasattr(obj, attr):
            setattr(self, attr, getattr(obj, attr))
            setattr(self, attr, getattr(self, '_' + attr))

def __repr__(self):
    desc = """array(data=\n  %(data)s,\nsampling_frequency=%(fs)f)"""
    return desc % {'data' :  str(self),
                   'fs' : self.sampling_frequency}

if __name__ == '__main__':
class MFCC(InfoArray):
    _fields_ = ['duration', 'period']

    def bar(self):
        print self

x = MFCC([1.,2.,3.], duration=20e-3, period=10e-3)
print x
print x.duration
print x.period

and this is the code of MFCC class (mfcc.py)

import numpy as N

from timeseries import timeseries
from util import exactly_2d

__all__ = [

# triangular window from SciPy
def triang(M, sym=1):
"""The M-point triangular window."""
if M < 1:
    return N.array([])
if M == 1:
    return N.ones(1,'d')
odd = M % 2
if not sym and not odd:
    M = M + 1
n = N.arange(1,int((M+1)/2)+1)
if M % 2 == 0:
    w = (2*n-1.0)/M
    w = N.r_[w, w[::-1]]
    w = 2*n/(M+1.0)
    w = N.r_[w, w[-2::-1]]

if not sym and not odd:
    w = w[:-1]

return w

def linear2mel(linfreq):
melfreq = N.atleast_1d(linfreq).copy()
melfreq /= 700.0
melfreq += 1.0
melfreq[:] = N.log(melfreq)
melfreq *= 1127.0
return melfreq.squeeze()

def mel2linear(melfreq):
linfreq = N.atleast_1d(melfreq).copy()
linfreq /= 1127.0
linfreq[:] = N.exp(linfreq)
linfreq -= 1.0
linfreq *= 700.0
return linfreq.squeeze()

class MelFilterbank:
def __init__(self, nfilters, startfreq, stopfreq, filter_window):

    - `nfilters`: number of filters.
    - `startfreq`: linear frequency where first filter begins.
    - `stopfreq`: linear frequency where last filter ends.
    - `filter_window`: window to use for each filter.
    self.nfilters = nfilters
    self.filter_window = filter_window

    # make sure we're working with floating point values
    startfreq, stopfreq = map(float, [startfreq, stopfreq])

    # convert start and stop frequencies
    melstartfreq = linear2mel(startfreq)
    melstopfreq = linear2mel(stopfreq)

    # step between start of filters
    melstepfreq = (melstopfreq - melstartfreq) / (nfilters + 1.0)

    # start Mel frequencies of filters
    starts = N.arange(0., nfilters)
    starts *= melstepfreq
    starts += melstartfreq

    # stop Mel frequencies of filters
    stops = N.arange(0., nfilters)
    stops += 2.0
    stops *= melstepfreq
    stops += melstartfreq

    # filter bands in Mel frequency
    self.filter_bands = zip(starts, stops)

def apply(self, frames, sampling_frequency):
    # XXX figure out if we want power spectrum or energy spectrum here
    # XXX rename variables accordingly

    frames = exactly_2d(frames)

    # convert frames from time domain to power spectrum
    n = int(2**N.ceil(N.log2(frames.shape[-1])))
    power_spectrum = N.abs(N.fft.rfft(frames, n))

    # samples per frequency
    sample_step = frames.shape[-1] / sampling_frequency

    # beginning and end of filters in samples
    filter_samples = [(int(N.ceil(start * sample_step)),
                       int(N.ceil(stop * sample_step)))
                      for start, stop in self.filter_bands]

    energies = N.zeros((frames.shape[0], self.nfilters))
    for i, (start_sample, stop_sample) in enumerate(filter_samples):
        band_nsamples = stop_sample - start_sample + 1
        band_slice = slice(start_sample,
        if band_slice.start >= band_slice.stop:
            # break if signal's highest frequency is less than the
            # start frequency of this filter
        band_spectrum = power_spectrum[band_slice, :].copy()
        # apply filter window function to spectrum samples
        window = self.filter_window(band_nsamples)
        band_spectrum *= window[band_slice]
        filter_energy = energies[:, i]
        filter_energy[:] = band_spectrum.sum(axis=-1)
        filter_energy /= band_nsamples

    return energies

class MFCC:
def __init__(self, period, duration, frame_window, filterbank):

    - `period`: frame period in seconds.
    - `duration`: frame duration in seconds.
    - `frame_window`: window to apply to each frame.
    self.period = period
    self.duration = duration
    self.frame_window = frame_window
    self.filterbank = filterbank

def apply(self, signal, dtype=None):
    #signal = self._normalize(signal, dtype)
    #self._preemphasis(signal, alpha=0.98)
    frames = self._split_into_frames(signal)
    energies = self.filterbank.apply(frames,
    spectra = N.log(energies)
    spectra *= 20
    # XXX discrete cosine transform
    # XXX lifter

def _normalize(self, signal, dtype):
    signal = timeseries(signal, dtype=dtype)
    signal -= signal.mean()
    signal /= N.max(N.abs(signal))
    return signal

def _preemphasis(self, signal, alpha):
    """Apply preemphasis to signal inplace."""
    signal[1:] -= alpha * signal[:-1]

def _split_into_frames(self, signal):
    # split signal into frames
    fs = signal.sampling_frequency
    period_samples = int(fs * self.period)
    duration_samples = int(fs * self.duration)
    starts = range(0, len(signal) - duration_samples + 1, period_samples)
    stops = [x + duration_samples for x in starts]
    frames = N.array([signal[s] for s in
                      [slice(*r) for r in zip(starts,stops)]])

    # apply frame window to each frame
    window = self.frame_window(duration_samples)
    frames *= window

    return frames

You have to save both codes in same folder and run the timeseries code

Recommended Answers

class lines are misindented and also function lines from line 38

Correct code seems to be in http://www.koders.com/info.aspx?c=ProjectInfo&pid=SC78E2QBLRAQS1XVU32CHKWEEF.
in ...\trunk\pyspkrec\pyspkrec\

Jump to Post

All 2 Replies

i got this code from this link but its not working :S

Be a part of the DaniWeb community

We're a friendly, industry-focused community of 1.21 million developers, IT pros, digital marketers, and technology enthusiasts learning and sharing knowledge.