from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf import numpy as np import pandas as pd from sklearn.model_selection import train_test_split import os # . # CSV- Tensor Flow . , (0 1) def prepare_data(data_file_name): header = ['class', 'cap_shape', 'cap_surface', # CSV- , 'agaricus-lepiota.name' 'cap_color', 'bruises', 'odor', 'gill_attachment', 'gill_spacing', 'gill_size', 'gill_color', 'stalk_shape', 'stalk_root', 'stalk_surface_above_ring', 'stalk_surface_below_ring', 'stalk_color_above_ring', 'stalk_color_below_ring', 'veil_type', 'veil_color', 'ring_number', 'ring_type', 'spore_print_color', 'population', 'habitat'] df = pd.read_csv(data_file_name, sep=',', names=header) # "?" # df.replace('?', np.nan, inplace=True) df.dropna(inplace=True) # # 'e' 'p' . # , 0 , 1 - df['class'].replace('p', 0, inplace=True) df['class'].replace('e', 1, inplace=True) # , # . Tensor Flow # . Pandas "get_dummies" # cols_to_transform = header[1:] df = pd.get_dummies(df, columns=cols_to_transform) # # - () # () df_train, df_test = train_test_split(df, test_size=0.1) # num_train_entries = df_train.shape[0] num_train_features = df_train.shape[1] - 1 num_test_entries = df_test.shape[0] num_test_features = df_test.shape[1] - 1 # csv-, .. # # csv, Tensor Flow df_train.to_csv('train_temp.csv', index=False) df_test.to_csv('test_temp.csv', index=False) # , open("mushroom_train.csv", "w").write(str(num_train_entries) + "," + str(num_train_features) + "," + open("train_temp.csv").read()) open("mushroom_test.csv", "w").write(str(num_test_entries) + "," + str(num_test_features) + "," + open("test_temp.csv").read()) # , os.remove("train_temp.csv") os.remove("test_temp.csv") # Tensor Flow def get_test_inputs(): x = tf.constant(test_set.data) y = tf.constant(test_set.target) return x, y # Tensor Flow def get_train_inputs(): x = tf.constant(training_set.data) y = tf.constant(training_set.target) return x, y # # ( : , ) # , def new_samples(): return np.array([[0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1]], dtype=np.int) if __name__ == "__main__": MUSHROOM_DATA_FILE = "agaricus-lepiota.data" # Tensor Flow, # CSV- ( ) prepare_data(MUSHROOM_DATA_FILE) # training_set = tf.contrib.learn.datasets.base.load_csv_with_header( filename='mushroom_train.csv', target_dtype=np.int, features_dtype=np.int, target_column=0) test_set = tf.contrib.learn.datasets.base.load_csv_with_header( filename='mushroom_test.csv', target_dtype=np.int, features_dtype=np.int, target_column=0) # , ( ) feature_columns = [tf.contrib.layers.real_valued_column("", dimension=98)] # DNN- 10, 20 10 classifier = tf.contrib.learn.DNNClassifier( feature_columns=feature_columns, hidden_units=[10, 20, 10], n_classes=2, model_dir="/tmp/mushroom_model") # classifier.fit(input_fn=get_train_inputs, steps=2000) # accuracy_score = classifier.evaluate(input_fn=get_test_inputs, steps=1)["accuracy"] print("\n : {0:f}\n".format(accuracy_score)) # predictions = list(classifier.predict_classes(input_fn=new_samples)) print(" : {}\n" .format(predictions))
from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf import numpy as np import pandas as pd from sklearn.model_selection import train_test_split import os
header = ['class', 'cap_shape', 'cap_surface', 'cap_color', 'bruises', 'odor', 'gill_attachment', 'gill_spacing', 'gill_size', 'gill_color', 'stalk_shape', 'stalk_root', 'stalk_surface_above_ring', 'stalk_surface_below_ring', 'stalk_color_above_ring', 'stalk_color_below_ring', 'veil_type', 'veil_color', 'ring_number', 'ring_type', 'spore_print_color', 'population', 'habitat'] df = pd.read_csv(data_file_name, sep=',', names=header)
df.replace('?', np.nan, inplace=True) df.dropna(inplace=True)
df['class'].replace('p', 0, inplace=True) df['class'].replace('e', 1, inplace=True)
cols_to_transform = header[1:] df = pd.get_dummies(df, columns=cols_to_transform)
df_train, df_test = train_test_split(df, test_size=0.1)
# num_train_entries = df_train.shape[0] num_train_features = df_train.shape[1] - 1 num_test_entries = df_test.shape[0] num_test_features = df_test.shape[1] - 1 # CSV df_train.to_csv('train_temp.csv', index=False) df_test.to_csv('test_temp.csv', index=False) # CSV, open("mushroom_train.csv", "w").write(str(num_train_entries) + "," + str(num_train_features) + "," + open("train_temp.csv").read()) open("mushroom_test.csv", "w").write(str(num_test_entries) + "," + str(num_test_features) + "," + open("test_temp.csv").read())
training_set = tf.contrib.learn.datasets.base.load_csv_with_header( filename='mushroom_train.csv', target_dtype=np.int, features_dtype=np.int, target_column=0) test_set = tf.contrib.learn.datasets.base.load_csv_with_header( filename='mushroom_test.csv', target_dtype=np.int, features_dtype=np.int, target_column=0)
feature_columns = [tf.contrib.layers.real_valued_column("", dimension=98)] classifier = tf.contrib.learn.DNNClassifier( feature_columns=feature_columns, hidden_units=[10, 20, 10], n_classes=2, model_dir="/tmp/mushroom_model")
def get_test_inputs(): x = tf.constant(test_set.data) y = tf.constant(test_set.target) return x, y def get_train_inputs(): x = tf.constant(training_set.data) y = tf.constant(training_set.target) return x, y
classifier.fit(input_fn=get_train_inputs, steps=2000)
accuracy_score = classifier.evaluate(input_fn=get_test_inputs, steps=1)["accuracy"] print("\n : {0:f}\n".format(accuracy_score))
def new_samples(): return np.array([[0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1]], dtype=np.int)
predictions = list(classifier.predict(input_fn=new_samples)) print(" : {}\n" .format(predictions))
: [0, 1]
Source: https://habr.com/ru/post/419917/