“The power of machine learning among us, its methods surround us and bind. The power around me, everywhere, between me, you, the decisive tree, the lasso, the crest and the support vector "So, probably, Yoda would tell me if he taught me the ways of Data Science.
IESNA:LM-63-1995
[TEST] SL20695
[MANUFAC] PHILIPS
[LUMCAT]
[LUMINAIRE] NA
[LAMP] 3 Step Switch A60 220-240V9.5W-60W 806lm 150D 3000K-6500K Non Dim
[BALLAST] NA
[OTHER] B-Angle = 0.00 B-Tilt = 0.00 2015-12-07
TILT=NONE
1 806.00 1 181 1 1 2 -0.060 -0.060 0.120
1.0 1.0 9.50
0.00 1.00 2.00 3.00 4.00 5.00 6.00 7.00 8.00 9.00
10.00 11.00 12.00 13.00 14.00 15.00 16.00 17.00 18.00 19.00
20.00 21.00 22.00 23.00 24.00 25.00 26.00 27.00 28.00 29.00
30.00 31.00 32.00 33.00 34.00 35.00 36.00 37.00 38.00 39.00
40.00 41.00 42.00 43.00 44.00 45.00 46.00 47.00 48.00 49.00
50.00 51.00 52.00 53.00 54.00 55.00 56.00 57.00 58.00 59.00
60.00 61.00 62.00 63.00 64.00 65.00 66.00 67.00 68.00 69.00
70.00 71.00 72.00 73.00 74.00 75.00 76.00 77.00 78.00 79.00
80.00 81.00 82.00 83.00 84.00 85.00 86.00 87.00 88.00 89.00
90.00 91.00 92.00 93.00 94.00 95.00 96.00 97.00 98.00 99.00
100.00 101.00 102.00 103.00 104.00 105.00 106.00 107.00 108.00 109.00
110.00 111.00 112.00 113.00 114.00 115.00 116.00 117.00 118.00 119.00
120.00 121.00 122.00 123.00 124.00 125.00 126.00 127.00 128.00 129.00
130.00 131.00 132.00 133.00 134.00 135.00 136.00 137.00 138.00 139.00
140.00 141.00 142.00 143.00 144.00 145.00 146.00 147.00 148.00 149.00
150.00 151.00 152.00 153.00 154.00 155.00 156.00 157.00 158.00 159.00
160.00 161.00 162.00 163.00 164.00 165.00 166.00 167.00 168.00 169.00
170.00 171.00 172.00 173.00 174.00 175.00 176.00 177.00 178.00 179.00
180.00
0.00
137.49 137.43 137.41 137.32 137.23 137.10 136.97
136.77 136.54 136.27 136.01 135.70 135.37 135.01
134.64 134.27 133.85 133.37 132.93 132.42 131.93
131.41 130.87 130.27 129.68 129.08 128.44 127.78
127.11 126.40 125.69 124.92 124.18 123.43 122.63
121.78 120.89 120.03 119.20 118.26 117.34 116.40
115.46 114.49 113.53 112.56 111.52 110.46 109.42
108.40 107.29 106.23 105.13 104.03 102.91 101.78
100.64 99.49 98.35 97.15 95.98 94.80 93.65
92.43 91.23 89.99 88.79 87.61 86.42 85.17
83.96 82.76 81.49 80.31 79.13 77.91 76.66
75.46 74.29 73.07 71.87 70.67 69.49 68.33
67.22 66.00 64.89 63.76 62.61 61.46 60.36
59.33 58.19 57.11 56.04 54.98 53.92 52.90
51.84 50.83 49.82 48.81 47.89 46.88 45.92
44.99 44.03 43.11 42.18 41.28 40.39 39.51
38.62 37.74 36.93 36.09 35.25 34.39 33.58
32.79 32.03 31.25 30.46 29.70 28.95 28.23
27.48 26.79 26.11 25.36 24.71 24.06 23.40
22.73 22.08 21.43 20.84 20.26 19.65 19.04
18.45 17.90 17.34 16.83 16.32 15.78 15.28
14.73 14.25 13.71 13.12 12.56 11.94 11.72
11.19 10.51 9.77 8.66 6.96 4.13 0.91
0.20 0.17 0.16 0.19 0.19 0.20 0.16
0.20 0.18 0.20 0.20 0.21 0.20 0.16
0.20 0.17 0.19 0.20 0.19 0.18
#import libraries import warnings warnings.filterwarnings('ignore') import pandas as pd import numpy as np import matplotlib.pyplot as plt import matplotlib.patches as mpatches from sklearn.model_selection import train_test_split from sklearn . svm import SVC from sklearn.metrics import f1_score from sklearn.ensemble import RandomForestClassifier from scipy.stats import uniform as sp_rand from sklearn.model_selection import StratifiedKFold from sklearn.preprocessing import MinMaxScaler from sklearn.manifold import TSNE from IPython.display import display from sklearn.metrics import accuracy_score from sklearn.model_selection import train_test_split %matplotlib inline <source lang="python"> #reading data train_df=pd.read_csv('lidc_data\\train.csv',sep='\t',index_col=None) test_df=pd.read_csv('lidc_data\\test.csv',sep='\t',index_col=None) print('train shape {0}, test shape {1}]'. format(train_df.shape, test_df.shape)) display('train:',train_df.head(4),'test:',test_df.head(4)) #divide the data and labels X_train=np.array(train_df.iloc[:,1:-1]) X_test=np.array(test_df.iloc[:,1:-1]) y_train=np.array(train_df['label']) y_test=np.array(test_df['label'])
#draw classdistributions test_n_max=test_df.label.value_counts().max() train_n_max=train_df.label.value_counts().max() fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12,5)) train_df.label.plot.hist(ax=axes[0],title='train data class distribution', bins=7,yticks=np.arange(0,train_n_max+2,2), xticks=np.unique(train_df.label.values)) test_df.label.plot.hist(ax=axes[1],ti
#scaled all data for final prediction scl=MinMaxScaler() X_train_scl=scl.fit_transform(X_train.T).T X_test_scl=scl.fit_transform(X_test.T).T #scaled part of data for test X_train_part, X_test_part, y_train_part, y_test_part = train_test_split(X_train, y_train, test_size=0.20, stratify=y_train, random_state=42) scl=MinMaxScaler() X_train_part_scl=scl.fit_transform(X_train_part.T).T X_test_part_scl=scl.fit_transform(X_test_part.T).T
#not scaled x=np.arange(0,190,10) plt.figure(figsize=(17,10)) plt.plot(x,X_train[13]) plt.plot(x,X_train[109]) plt.plot(x,X_train[68]) plt.plot(x,X_train[127]) c1 = mpatches.Patch( color='blue', label='class 1') c2 = mpatches.Patch( color='green', label='class 2') c3 = mpatches.Patch(color='orange', label='class 3') c4 = mpatches.Patch( color='red',label='class 4') plt.legend(handles=[c1,c2,c3,c4]) plt.xlabel(' (polar angle)') plt.ylabel(' (luminous intensity)') plt.title(' ')
#scaled x=np.arange(0,190,10) plt.figure(figsize=(17,10)) plt.legend() plt.plot(x,X_train_scl[13],color='blue') plt.plot(x,X_train_scl[109],color='green') plt.plot(x,X_train_scl[68], color='orange') plt.plot(x,X_train_scl[127], color='red') c1 = mpatches.Patch( color='blue', label='class 1') c2 = mpatches.Patch( color='green', label='class 2') c3 = mpatches.Patch(color='orange', label='class 3') c4 = mpatches.Patch( color='red',label='class 4') plt.legend(handles=[c1,c2,c3,c4]) plt.xlabel(' (polar angle)') plt.ylabel(' (luminous intensity)') plt.title(' ()')
#T-SNE colors = ["#190aff", "#0fff0f", "#ff641e" , "#ff3232"] tsne = TSNE(random_state=42) d_tsne = tsne.fit_transform(X_train) plt.figure(figsize=(10, 10)) plt.xlim(d_tsne[:, 0].min(), d_tsne[:, 0].max() + 10) plt.ylim(d_tsne[:, 1].min(), d_tsne[:, 1].max() + 10) for i in range(len(X_train)): # , plt.text(d_tsne[i, 0], d_tsne[i, 1], str(y_train[i]), color = colors[y_train[i]-1], fontdict={'weight': 'bold', 'size': 10}) plt.xlabel("t-SNE feature 0") plt.ylabel("t-SNE feature 1")
#T-SNE for scaled data d_tsne = tsne.fit_transform(X_train_scl) plt.figure(figsize=(10, 10)) plt.xlim(d_tsne[:, 0].min(), d_tsne[:, 0].max() + 10) plt.ylim(d_tsne[:, 1].min(), d_tsne[:, 1].max() + 10) for i in range(len(X_train_scl)): # , plt.text(d_tsne[i, 0], d_tsne[i, 1], str(y_train[i]), color = colors[y_train[i]-1], fontdict={'weight': 'bold', 'size': 10}) plt.xlabel("t-SNE feature 0") plt.ylabel("t-SNE feature 1")
#predict part of full data (test labels the part of X_train) #not scaled svm = SVC(kernel= 'rbf', random_state=42 , gamma=2, C=1.1) svm.fit (X_train_part, y_train_part) pred=svm.predict(X_test_part) print("\n not scaled: \n results (pred, real): \n",list(zip(pred,y_test_part))) print('not scaled: accuracy = {}, f1-score= {}'.format( accuracy_score(y_test_part,pred), f1_score(y_test_part,pred, average='macro'))) #scaled svm = SVC(kernel= 'rbf', random_state=42 , gamma=2, C=1.1) svm.fit (X_train_part_scl, y_train_part) pred=svm.predict(X_test_part_scl) print("\n scaled: \n results (pred, real): \n",list(zip(pred,y_test_part))) print('scaled: accuracy = {}, f1-score= {}'.format( accuracy_score(y_test_part,pred), f1_score(y_test_part,pred, average='macro')))
not scaled:
results (pred, real):
[(2, 3), (2, 3), (2, 2), (3, 3), (2, 1), (2, 3), (2, 3), (2, 2), (2, 1), (2, 2), (1, 1), (3, 3), (2, 2), (2, 1), (2, 4), (3, 3), (2, 2), (2, 4), (2, 1), (2, 2), (4, 4), (2, 2), (4, 4), (2, 4), (2, 3), (2, 1), (2, 1), (2, 1), (2, 2)]
not scaled: accuracy = 0.4827586206896552, f1-score= 0.46380859284085096
scaled:
results (pred, real):
[(3, 3), (3, 3), (2, 2), (3, 3), (1, 1), (3, 3), (3, 3), (2, 2), (1, 1), (2, 2), (1, 1), (3, 3), (2, 2), (1, 1), (4, 4), (3, 3), (2, 2), (4, 4), (1, 1), (2, 2), (4, 4), (2, 2), (4, 4), (4, 4), (3, 3), (1, 1), (1, 1), (1, 1), (2, 2)]
scaled: accuracy = 1.0, f1-score= 1.0
#final predict full data svm.fit (X_train_scl, y_train) pred=svm.predict(X_test_scl) print("\n results (pred, real): \n",list(zip(pred,y_test))) print('scaled: accuracy = {}, f1-score= {}'.format( accuracy_score(y_test,pred), f1_score(y_test,pred, average='macro')))
results (pred, real):
[(1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (3, 3), (3, 3), (3, 3), (3, 3), (3, 3), (3, 3), (3, 3), (3, 3), (3, 3), (3, 3), (3, 3), (3, 3), (3, 3), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4)]
scaled: accuracy = 1.0, f1-score= 1.0
rfc=RandomForestClassifier(random_state=42,n_jobs=-1, n_estimators=100) rfc=rfc.fit(X_train, y_train) rpred=rfc.predict(X_test) print("\n not scaled: \n results (pred, real): \n",list(zip(rpred,y_test))) print('not scaled: accuracy = {}, f1-score= {}'.format( accuracy_score(y_test,rpred), f1_score(y_test,rpred, average='macro'))) rfc=rfc.fit(X_train_scl, y_train) rpred=rfc.predict(X_test_scl) print("\n scaled: \n results (pred, real): \n",list(zip(rpred,y_test))) print('scaled: accuracy = {}, f1-score= {}'.format( accuracy_score(y_test,rpred), f1_score(y_test,rpred, average='macro')))
not scaled:
results (pred, real):
[(1, 1), (1, 1), (2, 1), (1, 1), (1, 1), (2, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (2, 2), (2, 2), (2, 2), (1, 2), (2, 2), (2, 2), (2, 2), (2, 2), (3, 2), (2, 2), (3, 2), (2, 2), (4, 2), (3, 3), (3, 3), (3, 3), (3, 3), (3, 3), (3, 3), (3, 3), (3, 3), (3, 3), (3, 3), (4, 3), (3, 3), (3, 3), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4)]
not scaled: accuracy = 0.8541666666666666, f1-score= 0.8547222222222222
scaled:
results (pred, real):
[(1, 1), (1, 1), (2, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (3, 3), (3, 3), (3, 3), (3, 3), (3, 3), (3, 3), (3, 3), (3, 3), (3, 3), (3, 3), (3, 3), (3, 3), (3, 3), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4), (4, 4)]
scaled: accuracy = 0.9791666666666666, f1-score= 0.9807407407407408
Source: https://habr.com/ru/post/338124/