# -*- coding: utf-8 -*-
"""
Created on Thu Oct 11 21:15:34 2018
@author: HP
"""
# Importing the required python libraries
import pandas as pd
import matplotlib.pyplot as plt
from pylab import rcParams
import warnings
from statsmodels.tsa.stattools import coint
warnings.filterwarnings('ignore')
import csv
# Function to get the cointegration significance score, by default the value is set to 0.05,
# it can be changed while calling the function. This function returns the pvalue for the pair.
def find_coint_significance(S1,S2,significance=0.01):
pvalue = coint(S1, S2, autolag=None)[1]
return pvalue
# This function plots the zscore for the pairs selected. field lb is the lookback period
# i.e how far the historical days value to be considered. xres & yres can be changed to plot
# the stock lines at correct resolution, these values just divide the stock price by the value specified
# so that it can be plotted next to zscore line.
def plot_pairs(df,S1,S2,lb=20,xres=10,yres=10):
spread = S1[-lb:] / S2[-lb:]
spread.name = 'ratio'
spread_mean = spread.mean()
std_dev = spread.std()
zscore = (spread - spread_mean)/std_dev
zscore.name = 'zScore'
#print("Y = " + S1.name + " Price :" ,df[S1.name].iloc[-1])
#print("X = " + S2.name + " Price :" ,df[S2.name].iloc[-1])
#print("zScore: ",round(zscore.iloc[-1],3))
'''plt.style.use('seaborn-white')
plt.style.use('ggplot')
rcParams['figure.figsize'] = 25,10
plt.plot(df[S1.name][-lb:].index, df[S1.name][-lb:].values/xres)
plt.plot(df[S2.name][-lb:].index, df[S2.name][-lb:].values/yres)
plt.plot(zscore[-lb:].index, zscore[-lb:].values)
plt.legend(["Y = " + S1.name, "X = " + S2.name, 'Price Spread Rolling z-Score']);
plt.axhline(0, color='black')
plt.axhline(1.0, color='red', linestyle='--');
plt.axhline(-1.0, color='green', linestyle='--');
plt.show()'''
return S1.name,S2.name,round(zscore.iloc[-1],3),df[S1.name].iloc[-1],df[S2.name].iloc[-1]
# This function loads the stock data file in to a pandas data frame for processing. I am considering
# the daily stock returns for cointegration testing which I feel is more accurate method.
def load_data(file):
df = pd.read_csv(file, index_col=[0])
returns = df.pct_change()[-200:]
returns = returns.iloc[1:,:].dropna(axis=1)
returns1=pd.read_csv(file, nrows=0,delim_whitespace=True)
return returns,df,returns1
def update_eod(masterfile,eodfile):
master = pd.read_csv(masterfile, index_col=[0])
eod = pd.read_csv(eodfile, header=None,index_col=[0],usecols=[0,5])
df = master.append(eod.T).dropna(axis=1).reset_index(drop=True)
df.to_csv('C://master/stockdata.csv')
return
#update_eod('C://master/stockdata.csv','C://master/eod.txt')
returns,df,returns1 = load_data('C://stockdata.csv')
lst1=[]
lst1=returns1.columns.str.split(',').tolist()
lst1=lst1[0]
del lst1[0]
print("Test ", len(lst1))
num=len(lst1)
for i in range(num):
for j in range(num-1):
S1 = returns[lst1[i]]
S2 = returns[lst1[j+1]]
pValue=find_coint_significance(S1,S2,significance=0.05)
SS1 = df[lst1[i]]
SS2 = df[lst1[j+1]]
str1,str2,zScore,price1,price2=plot_pairs(df,SS1,SS2,lb=20,xres=100,yres=300)
if (pValue < 0.01 and pValue !=0.0 and abs(zScore)>2.0 ):
#print("\nPair is Significant, pValue= ", pValue)
#print("zScore: ",zScore)
#print("Y = " + str1 +" Price :",price1)
#print("X = " + str2+" Price :",price2)
with open('D://Trade Book//Pair trading//Pair_Analysis.csv', 'a', newline='') as csvFile:
writer = csv.writer(csvFile)
writer.writerows([["Pair is Significant, pValue= "+str(pValue)],
["zScore: "+str(zScore)],
["Y = "+ str1 +" Price :"+str(price1)],
["X = "+ str2 +" Price :"+str(price2)],[]])
csvFile.close()
# -*- coding: utf-8 -*-
"""
Created on Thu Oct 11 21:15:34 2018
@author: HP
"""
# Importing the required python libraries
import pandas as pd
import matplotlib.pyplot as plt
from pylab import rcParams
import warnings
from statsmodels.tsa.stattools import coint
warnings.filterwarnings('ignore')
import csv
# Function to get the cointegration significance score, by default the value is set to 0.05,
# it can be changed while calling the function. This function returns the pvalue for the pair.
def find_coint_significance(S1,S2,significance=0.01):
pvalue = coint(S1, S2, autolag=None)[1]
return pvalue
# This function plots the zscore for the pairs selected. field lb is the lookback period
# i.e how far the historical days value to be considered. xres & yres can be changed to plot
# the stock lines at correct resolution, these values just divide the stock price by the value specified
# so that it can be plotted next to zscore line.
def plot_pairs(df,S1,S2,lb=20,xres=10,yres=10):
spread = S1[-lb:] / S2[-lb:]
spread.name = 'ratio'
spread_mean = spread.mean()
std_dev = spread.std()
zscore = (spread - spread_mean)/std_dev
zscore.name = 'zScore'
#print("Y = " + S1.name + " Price :" ,df[S1.name].iloc[-1])
#print("X = " + S2.name + " Price :" ,df[S2.name].iloc[-1])
#print("zScore: ",round(zscore.iloc[-1],3))
'''plt.style.use('seaborn-white')
plt.style.use('ggplot')
rcParams['figure.figsize'] = 25,10
plt.plot(df[S1.name][-lb:].index, df[S1.name][-lb:].values/xres)
plt.plot(df[S2.name][-lb:].index, df[S2.name][-lb:].values/yres)
plt.plot(zscore[-lb:].index, zscore[-lb:].values)
plt.legend(["Y = " + S1.name, "X = " + S2.name, 'Price Spread Rolling z-Score']);
plt.axhline(0, color='black')
plt.axhline(1.0, color='red', linestyle='--');
plt.axhline(-1.0, color='green', linestyle='--');
plt.show()'''
return S1.name,S2.name,round(zscore.iloc[-1],3),df[S1.name].iloc[-1],df[S2.name].iloc[-1]
# This function loads the stock data file in to a pandas data frame for processing. I am considering
# the daily stock returns for cointegration testing which I feel is more accurate method.
def load_data(file):
df = pd.read_csv(file, index_col=[0])
returns = df.pct_change()[-200:]
returns = returns.iloc[1:,:].dropna(axis=1)
returns1=pd.read_csv(file, nrows=0,delim_whitespace=True)
return returns,df,returns1
def update_eod(masterfile,eodfile):
master = pd.read_csv(masterfile, index_col=[0])
eod = pd.read_csv(eodfile, header=None,index_col=[0],usecols=[0,5])
df = master.append(eod.T).dropna(axis=1).reset_index(drop=True)
df.to_csv('C://master/stockdata.csv')
return
#update_eod('C://master/stockdata.csv','C://master/eod.txt')
returns,df,returns1 = load_data('C://stockdata.csv')
lst1=[]
lst1=returns1.columns.str.split(',').tolist()
lst1=lst1[0]
del lst1[0]
print("Test ", len(lst1))
num=len(lst1)
for i in range(num):
for j in range(num-1):
S1 = returns[lst1[i]]
S2 = returns[lst1[j+1]]
pValue=find_coint_significance(S1,S2,significance=0.05)
SS1 = df[lst1[i]]
SS2 = df[lst1[j+1]]
str1,str2,zScore,price1,price2=plot_pairs(df,SS1,SS2,lb=20,xres=100,yres=300)
if (pValue < 0.01 and pValue !=0.0 and abs(zScore)>2.0 ):
#print("\nPair is Significant, pValue= ", pValue)
#print("zScore: ",zScore)
#print("Y = " + str1 +" Price :",price1)
#print("X = " + str2+" Price :",price2)
with open('D://Trade Book//Pair trading//Pair_Analysis.csv', 'a', newline='') as csvFile:
writer = csv.writer(csvFile)
writer.writerows([["Pair is Significant, pValue= "+str(pValue)],
["zScore: "+str(zScore)],
["Y = "+ str1 +" Price :"+str(price1)],
["Y = "+ str2 +" Price :"+str(price2)],[]])
csvFile.close()