# -*- coding: utf-8 -*- """ @author: optimusqp """ import os import urllib import pandas as pd import time import codecs from datetime import datetime, date from pandas.io.common import EmptyDataError e='.csv'; p='7'; yf='2017'; yt='2017'; month_start='01'; day_start='01'; month_end='07'; day_end='13'; year_start=yf[2:]; year_end=yt[2:]; mf=(int(month_start.replace('0','')))-1; mt=(int(month_end.replace('0','')))-1; df=(int(day_start.replace('0',''))); dt=(int(day_end.replace('0',''))); dtf='1'; tmf='1'; MSOR='1'; mstimever='0' sep='1'; sep2='1'; datf='5'; at='1'; def quotes_finam_optimusqp(data,year_start,month_start,day_start,year_end,month_end,day_end,e,df,mf,yf,dt,mt,yt,p,dtf,tmf,MSOR,mstimever,sep,sep2,datf,at): temp_name_file='id,company\n'; incrim=1; for index, row in data.iterrows(): page = urllib.urlopen('http://export.finam.ru/'+str(row['code'])+'_'+str(year_start)+str(month_start)+str(day_start)+'_'+str(year_end)+str(month_end)+str(day_end)+str(e)+'?market='+str(row['id_exchange_2'])+'&em='+str(row['em'])+'&code='+str(row['code'])+'&apply=0&df='+str(df)+'&mf='+str(mf)+'&yf='+str(yf)+'&from='+str(day_start)+'.'+str(month_start)+'.'+str(yf)+'&dt='+str(dt)+'&mt='+str(mt)+'&yt='+str(yt)+'&to='+str(day_end)+'.'+str(month_end)+'.'+str(yt)+'&p='+str(p)+'&f='+str(row['code'])+'_'+str(year_start)+str(month_start)+str(day_start)+'_'+str(year_end)+str(month_end)+str(day_end)+'&e='+str(e)+'&cn='+str(row['code'])+'&dtf='+str(dtf)+'&tmf='+str(tmf)+'&MSOR='+str(MSOR)+'&mstimever='+str(mstimever)+'&sep='+str(sep)+'&sep2='+str(sep2)+'&datf='+str(datf)+'&at='+str(at)) print('http://export.finam.ru/'+str(row['code'])+'_'+str(year_start)+str(month_start)+str(day_start)+'_'+str(year_end)+str(month_end)+str(day_end)+str(e)+'?market='+str(row['id_exchange_2'])+'&em='+str(row['em'])+'&code='+str(row['code'])+'&apply=0&df='+str(df)+'&mf='+str(mf)+'&yf='+str(yf)+'&from='+str(day_start)+'.'+str(month_start)+'.'+str(yf)+'&dt='+str(dt)+'&mt='+str(mt)+'&yt='+str(yt)+'&to='+str(day_end)+'.'+str(month_end)+'.'+str(yt)+'&p='+str(p)+'&f='+str(row['code'])+'_'+str(year_start)+str(month_start)+str(day_start)+'_'+str(year_end)+str(month_end)+str(day_end)+'&e='+str(e)+'&cn='+str(row['code'])+'&dtf='+str(dtf)+'&tmf='+str(tmf)+'&MSOR='+str(MSOR)+'&mstimever='+str(mstimever)+'&sep='+str(sep)+'&sep2='+str(sep2)+'&datf='+str(datf)+'&at='+str(at)) print('code: '+str(row['code'])) # . # - file = codecs.open(str(row['code'])+"_"+"0"+".csv", "w", "utf-8") content = page.read() file.write(content) file.close() temp_name_file = temp_name_file + (str(incrim) + "," + str(row['code'])+"\n") incrim+=1 time.sleep(2) # code , # - . write_file = "name_file_data.csv" with open(write_file, "w") as output: for line in temp_name_file: output.write(line) # quotes_finam_optimusqp # function_parameters.csv #___http://optimusqp.ru/articles/articles_1/function_parameters.csv data_all = pd.read_csv('function_parameters.csv', index_col='id') # , # id_exchange_2 == 1, .. data = data_all[data_all['id_exchange_2']==1] quotes_finam_optimusqp(data,year_start,month_start,day_start,year_end,month_end,day_end,e,df,mf,yf,dt,mt,yt,p,dtf,tmf,MSOR,mstimever,sep,sep2,datf,at)
# , ? # - . name_file_data = pd.read_csv('name_file_data.csv', index_col='id') incrim=1; # how_work_days - , # , # temp_string_in_file='id,how_work_days\n'; for index, row1 in name_file_data.iterrows(): how_string_in_file = 0 # , name_file=row1['company']+"_"+"0"+".csv" # ? if os.path.exists(name_file): folder_size = os.path.getsize(name_file) # - , if folder_size>0: temp_quotes_data=pd.read_csv(name_file, delimiter=',') # , EmptyDataError # try: # (CLOSE); # quotes_data = temp_quotes_data.drop(['<OPEN>', '<HIGH>', '<LOW>', '<VOL>'], axis=1) # - how_string_in_file = len(quotes_data.index) # 1 100, # ; if how_string_in_file>1100: # days_data.csv, # # temp_string_in_file = temp_string_in_file + (str(incrim) + "," + str(how_string_in_file)+"\n") incrim+=1 quotes_data['DATE_str']=quotes_data['<DATE>'].astype(basestring) quotes_data['TIME_str']=quotes_data['<TIME>'].astype(basestring) #"" DATETIME quotes_data['DATETIME'] = quotes_data.apply(lambda x:'%s%s' % (x['DATE_str'],x['TIME_str']),axis=1) quotes_data = quotes_data.drop(['<DATE>','<TIME>','DATE_str','TIME_str'], axis=1) quotes_data['DATETIME'].apply(lambda d: datetime.strptime(d, '%Y%m%d%H%M%S')) quotes_data [row1['company']] = quotes_data['<CLOSE>'] - quotes_data['<CLOSE>'].shift(1) quotes_data = quotes_data.drop(['<CLOSE>'], axis=1) quotes_data.to_csv(row1['company']+"_"+"1"+".csv", sep=',', encoding='utf-8') os.unlink(row1['company']+"_"+"0"+".csv") else: os.unlink(row1['company']+"_"+"0"+".csv") except pd.io.common.EmptyDataError: os.unlink(row1['company']+"_"+"0"+".csv") else: os.unlink(row1['company']+"_"+"0"+".csv") else: continue write_file = "days_data.csv" with open(write_file, "w") as output: for line in temp_string_in_file: output.write(line)
import glob allFiles = glob.glob("*_1.csv") frame = pd.DataFrame() list_ = [] for file_ in allFiles: df = pd.read_csv(file_,index_col=None, header=0) list_.append(df) dfff = reduce(lambda df1,df2: pd.merge(df1,df2,on='DATETIME'), list_) quotes_data = dfff.drop(['Unnamed: 0_x', 'Unnamed: 0_y', 'Unnamed: 0'], axis=1) quotes_data.to_csv("securities.csv", sep=',', encoding='utf-8') quotes_data = quotes_data.drop(['DATETIME'], axis=1) number_columns=len(quotes_data.columns) columns_name_0 = quotes_data.columns columns_name_1 = quotes_data.columns
incrim=0 quotes_data_w=quotes_data.shift(1) for column in columns_name_0: quotes_data_w[column]=quotes_data_w[column].shift(-1) quotes_data_w.to_csv("securities_"+column+".csv", sep=',', encoding='utf-8') # quotes_data_w[column]=quotes_data_w[column].shift(1) incrim+=1
import pandas as pd import matplotlib.pyplot as plt import plotly.plotly as py import plotly.graph_objs as go from plotly.tools import FigureFactory as FF import numpy as np from scipy import stats, optimize, interpolate def Normality_Test(L): x = L shapiro_results = scipy.stats.shapiro(x) matrix_sw = [ ['', 'DF', 'Test Statistic', 'p-value'], ['Sample Data', len(x) - 1, shapiro_results[0], shapiro_results[1]] ] shapiro_table = FF.create_table(matrix_sw, index=True) py.iplot(shapiro_table, filename='pareto_file') #py.iplot(shapiro_table, filename='normal_file') #L =np.random.normal(115.0, 10, 860) L =np.random.pareto(3,50) Normality_Test(L)
allFiles = glob.glob("*_1.csv") def Shapiro(df,temp_header): df=df.drop(df.index[0]) x = df[temp_header].tolist() shapiro_results = scipy.stats.shapiro(x) matrix_sw = [ ['', 'DF', 'Test Statistic', 'p-value'], ['Sample Data', len(x) - 1, shapiro_results[0], shapiro_results[1]] ] shapiro_table = FF.create_table(matrix_sw, index=True) py.iplot(shapiro_table, filename='shapiro-table_'+temp_header) def Kolmogorov_Smirnov(df,temp_header): df=df.drop(df.index[0]) x = df[temp_header].tolist() ks_results = scipy.stats.kstest(x, cdf='norm') matrix_ks = [ ['', 'DF', 'Test Statistic', 'p-value'], ['Sample Data', len(x) - 1, ks_results[0], ks_results[1]] ] ks_table = FF.create_table(matrix_ks, index=True) py.iplot(ks_table, filename='ks-table_'+temp_header) frame = pd.DataFrame() list_ = [] for file_ in allFiles: df = pd.read_csv(file_,index_col=None, header=0) print(file_) columns = df.columns temp_header = columns[2] Shapiro(df,temp_header) time.sleep(3) Kolmogorov_Smirnov(df,temp_header) time.sleep(3)
incrim=0 for column0 in columns_name_1: df000 = pd.read_csv('securities_'+column0+".csv",index_col=None, header=0) # df000=df000.drop(df000.index[0]) df000 = df000.drop(['Unnamed: 0'], axis=1) # # corr_spr=df000.corr('spearman') # # corr_spr=corr_spr.sort_values([column0], ascending=False) # DataFrame corr_spr_temp=corr_spr[column0] corr_spr_temp.to_csv("corr_"+column0+".csv", sep=',', encoding='utf-8') incrim+=1
incrim=0 all_corr_Files = glob.glob("corr_*.csv") list_corr = [] quotes_data_end = pd.DataFrame() for file_corr in all_corr_Files: df_corr = pd.read_csv(file_corr,index_col=None, header=0) columns_corr = df_corr.columns temp_header = columns_corr[0] quotes_data_end[str(temp_header)]=df_corr.iloc[:,1] incrim+=1 quotes_data_end.to_csv("_quotes_data_end.csv", sep=',', encoding='utf-8') plt.figure(); quotes_data_end.plot();
Source: https://habr.com/ru/post/334288/
All Articles