import numpy as np import urllib # url with dataset url = "http://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes/pima-indians-diabetes.data" # download the file raw_data = urllib.urlopen(url) # load the CSV file as a numpy matrix dataset = np.loadtxt(raw_data, delimiter=",") # separate the data from the target attributes X = dataset[:,0:7] y = dataset[:,8]
from sklearn import preprocessing # normalize the data attributes normalized_X = preprocessing.normalize(X) # standardize the data attributes standardized_X = preprocessing.scale(X)
from sklearn import metrics from sklearn.ensemble import ExtraTreesClassifier model = ExtraTreesClassifier() model.fit(X, y) # display the relative importance of each attribute print(model.feature_importances_)
from sklearn.feature_selection import RFE from sklearn.linear_model import LogisticRegression model = LogisticRegression() # create the RFE model and select 3 attributes rfe = RFE(model, 3) rfe = rfe.fit(X, y) # summarize the selection of the attributes print(rfe.support_) print(rfe.ranking_)
from sklearn import metrics from sklearn.linear_model import LogisticRegression model = LogisticRegression() model.fit(X, y) print(model) # make predictions expected = y predicted = model.predict(X) # summarize the fit of the model print(metrics.classification_report(expected, predicted)) print(metrics.confusion_matrix(expected, predicted))
from sklearn import metrics from sklearn.naive_bayes import GaussianNB model = GaussianNB() model.fit(X, y) print(model) # make predictions expected = y predicted = model.predict(X) # summarize the fit of the model print(metrics.classification_report(expected, predicted)) print(metrics.confusion_matrix(expected, predicted))
from sklearn import metrics from sklearn.neighbors import KNeighborsClassifier # fit a k-nearest neighbor model to the data model = KNeighborsClassifier() model.fit(X, y) print(model) # make predictions expected = y predicted = model.predict(X) # summarize the fit of the model print(metrics.classification_report(expected, predicted)) print(metrics.confusion_matrix(expected, predicted))
from sklearn import metrics from sklearn.tree import DecisionTreeClassifier # fit a CART model to the data model = DecisionTreeClassifier() model.fit(X, y) print(model) # make predictions expected = y predicted = model.predict(X) # summarize the fit of the model print(metrics.classification_report(expected, predicted)) print(metrics.confusion_matrix(expected, predicted))
from sklearn import metrics from sklearn.svm import SVC # fit a SVM model to the data model = SVC() model.fit(X, y) print(model) # make predictions expected = y predicted = model.predict(X) # summarize the fit of the model print(metrics.classification_report(expected, predicted)) print(metrics.confusion_matrix(expected, predicted))
import numpy as np from sklearn.linear_model import Ridge from sklearn.grid_search import GridSearchCV # prepare a range of alpha values to test alphas = np.array([1,0.1,0.01,0.001,0.0001,0]) # create and fit a ridge regression model, testing each alpha model = Ridge() grid = GridSearchCV(estimator=model, param_grid=dict(alpha=alphas)) grid.fit(X, y) print(grid) # summarize the results of the grid search print(grid.best_score_) print(grid.best_estimator_.alpha)
import numpy as np from scipy.stats import uniform as sp_rand from sklearn.linear_model import Ridge from sklearn.grid_search import RandomizedSearchCV # prepare a uniform distribution to sample for the alpha parameter param_grid = {'alpha': sp_rand()} # create and fit a ridge regression model, testing random alpha values model = Ridge() rsearch = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_iter=100) rsearch.fit(X, y) print(rsearch) # summarize the results of the random parameter search print(rsearch.best_score_) print(rsearch.best_estimator_.alpha)
Source: https://habr.com/ru/post/247751/
All Articles