Function coinT() tests if two time series are stationary using ADF test and Hurst exponent. Time series are stored in cvs files 1511x6 each, but for testing only a vector of the 5th column is returned by function stock(), there are 50 files in total. It seems that the program is using too much memory as it makes the PC crash after running for ~30 secs, it works fine on 15 files, but crashes on larger sets(>50).
Can somebody please help me out to find where is the memory leak, I've tried splitting computations in to multiple functions and deleting object, but it didn't help much.
import numpy as np import pandas as pd import statsmodels.tsa.stattools as ts import csv import timeit from numpy import log, polyfit, sqrt, std, subtract from pandas.stats.api import ols import os src = 'C:/Users/PC/Desktop/Magistr/Ibpython/testing/' filenames = next(os.walk(src)) #load all stock file names into array cointegratedPairs =  def hurst(ts): """Returns the Hurst Exponent of the time series vector ts H<0.5 - The time series is mean reverting H=0.5 - The time series is a Geometric Brownian Motion H>0.5 - The time series is trending""" # Create the range of lag values lags = range(2, 100) # Calculate the array of the variances of the lagged differences tau = [sqrt(std(subtract(ts[lag:], ts[:-lag]))) for lag in lags] # Use a linear fit to estimate the Hurst Exponent poly = polyfit(log(lags), log(tau), 1) del lags del tau # Return the Hurst exponent from the polyfit output return poly*2.0 #Convert file into an array def stock(filename): #read file into array and get it's length delimiter = "," with open(src + filename,'r') as dest_f: data_iter = csv.reader(dest_f, delimiter = delimiter, quotechar = '"') data = [data for data in data_iter] data_array = np.asarray(data)[:,5] return data_array del data del data_array #Check if two time series are cointegrated def coinTest(itemX, itemY): indVar = map(float, stock(itemX)[0:1000]) #2009.05.22 - 2013.05.14 depVar = map(float, stock(itemY)[0:1000]) #2009.05.22 - 2013.05.14 #Calculate optimal hedge ratio "beta" df = pd.DataFrame() df[itemX] = indVar df[itemY] = depVar res = ols(y=df[itemY], x=df[itemX]) beta_hr = res.beta.x alpha = res.beta.intercept df["res"] = df[itemY] - beta_hr*df[itemX] - alpha #Calculate the CADF test on the residuals cadf = ts.adfuller(df["res"]) #Reject the null hypothesis at 1% confidence level if cadf['1%'] > cadf: #Hurst exponent test if residuals are mean reverting if hurst(df["res"]) < 0.4: cointegratedPairs.append((itemY,itemX)) del indVar del depVar del df[itemX] del df[itemY] del df["res"] del cadf #Main function def coinT(): limit = 0 TotalPairs = 0 for itemX in filenames: for itemY in filenames[limit:]: TotalPairs +=1 if itemX == itemY: next else: coinTest(itemX, itemY) limit +=1