import numpy as np import pandas as pd from sklearn import linear_model def write_answer(data, str_add=''): with open("answer"+str(str_add)+".txt", "w") as fout: fout.write('\n'.join(map(str, data))) def convert_cat(inf,inf_data): return inf_data[inf_data == inf].index[0] X = pd.read_csv('x_train.csv') y = pd.read_csv('y_train.csv') X_check = pd.read_csv('x_test.csv') # memFreq . X.memFreq = pd.to_numeric(X.memFreq, errors = 'coerce') mean_memFreq = 525.576 X.fillna(value = mean_memFreq, inplace=True) X_check.memFreq = pd.to_numeric(X_check.memFreq, errors = 'coerce') X_check.fillna(value = mean_memFreq, inplace=True) # for c in X.columns: if len(np.unique(X_check[c])) == 1: X.drop(c, axis=1, inplace=True) X_check.drop(c, axis=1, inplace=True) # cpuArch_ = pd.Series(np.unique(X.cpuArch)) X.cpuArch = X.cpuArch.apply(lambda x: convert_cat(x,cpuArch_)) X_check.cpuArch = X_check.cpuArch.apply(lambda x: convert_cat(x,cpuArch_)) memType_ = pd.Series(np.unique(X.memType)) X.memType = X.memType.apply(lambda x: convert_cat(x,memType_)) X_check.memType = X_check.memType.apply(lambda x: convert_cat(x,memType_)) memtRFC_ = pd.Series(np.unique(X.memtRFC)) X.memtRFC = X.memtRFC.apply(lambda x: convert_cat(x,memtRFC_)) X_check.memtRFC = X_check.memtRFC.apply(lambda x: convert_cat(x,memtRFC_)) os_ = pd.Series(np.unique(X.os)) X.os = X.os.apply(lambda x: convert_cat(x,os_)) X_check.os = X_check.os.apply(lambda x: convert_cat(x,os_)) cpuFull_ = pd.Series(np.unique(X.cpuFull)) X.cpuFull = X.cpuFull.apply(lambda x: convert_cat(x,cpuFull_)) X_check.cpuFull = X_check.cpuFull.apply(lambda x: convert_cat(x,cpuFull_)) # perf_features = X.columns[3:] # X['log_mn'] = np.log(Xm * Xn) X['log_mk'] = np.log(np.int64(Xm*Xk)) X['log_kn'] = np.log(np.int64(Xk*Xn)) X['min_max_a'] = np.float64(X.loc[:, ['m', 'k']].max(axis=1)) / X.loc[:, ['m', 'k']].min(axis=1) X['min_max_b'] = np.float64(X.loc[:, ['n', 'k']].max(axis=1)) / X.loc[:, ['n', 'k']].min(axis=1) X_check['log_mn'] = np.log(X_check.m * X_check.n) X_check['log_mk'] = np.log(np.int64(X_check.m*X_check.k)) X_check['log_kn'] = np.log(np.int64(X_check.k*X_check.n)) X_check['min_max_a'] = np.float64(X_check.loc[:, ['m', 'k']].max(axis=1)) / X_check.loc[:, ['m', 'k']].min(axis=1) X_check['min_max_b'] = np.float64(X_check.loc[:, ['n', 'k']].max(axis=1)) / X_check.loc[:, ['n', 'k']].min(axis=1) model = linear_model.RidgeCV(cv=5) model.fit(X, np.log(y)) y_answer = np.exp(model.predict(X_check)) write_answer(y_answer.reshape(4947), '_habr_RidgeCV')
Source: https://habr.com/ru/post/305872/
All Articles