from requests import get from requests.exceptions import RequestException from contextlib import closing from bs4 import BeautifulSoup def query_url(url): try: with closing(get(url, stream=True)) as resp: if is_good_response(resp): return resp.content else: return None except RequestException as e: log_error('Error during requests to {0} : {1}'.format(url, str(e))) return None def is_good_response(resp): content_type = resp.headers['Content-Type'].lower() return (resp.status_code == 200 and content_type is not None and content_type.find('html') > -1) def log_error(e): print(e) def parse_ranks(raw_html, year): html = BeautifulSoup(raw_html, 'html.parser') ranks = [] for tr in html.select('tr'): tds = tr.select("td") if len(tds) == 10: rank = (year, tds[2].text, tds[7].text) ranks.append(rank) return ranks def get_url(year): if year in range(1960, 1999): method = 1 if year in range(1999, 2004): method = 2 if year in range(2004, 2009): method = 3 if year in range(2009, 2018): method = 4 if year in range(2018, 2019): method = 5 return f"https://kassiesa.home.xs4all.nl/bert/uefa/data/method{method}/crank{year}.html" ranks = [] for year in range(1960, 2019): url = get_url(year) print(url) raw_html = query_url(url) rank = parse_ranks(raw_html, year) ranks += rank with open('team_ranks.csv', 'w') as f: writer = csv.writer(f , lineterminator='\n') writer.writerow(['year', 'country', 'rank']) for rank in ranks: writer.writerow(rank)
from sklearn.neural_network import MLPClassifier games = pd.read_csv('games.csv') # SwedenGames = games[(games.teamTitle == 'Sweden')] # y = SwedenGames['score'] y = y.astype('int') # X = SwedenGames.drop(['score', 'teamTitle', 'againstTitle'], axis=1) # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25) mlp = MLPClassifier() mlp.fit(X_train, y_train); predictions = mlp.predict(X_test) print('Accuracy: {:.2}'.format( accuracy_score(y_test, mlp.predict(X_test)) ))
# categorical_features_indices = [1, 2, 4] train_pool = Pool(X_train, y_train, cat_features=categorical_features_indices) validate_pool = Pool(X_test, y_test, cat_features=categorical_features_indices) # , GridSearchCV. best_params = { 'iterations': 500, 'depth': 10, 'learning_rate': 0.1, 'l2_leaf_reg': 1, 'eval_metric': 'Accuracy', 'random_seed': 42, 'logging_level': 'Silent', 'use_best_model': True } cb_model = CatBoostClassifier(**best_params) cb_model.fit(train_pool, eval_set=validate_pool) print('Accuracy: {:.2}'.format( accuracy_score(y_test, cb_model.predict(X_test)) ))
def get_prediction(country, against): y = SwdenGames['score'] y = y.astype('int') X = SwdenGames.drop(['score', 'againstTitle'], axis=1) train_pool = Pool(X, y, cat_features=[1, 2, 4]) query = [ get_team_rank(country, 2018), 0, 1 if country == 'Russia' else 0, get_team_rank(against, 2018), against] return cb_model.predict_proba([query])[0] team_1 = 'Belgium' team_2 = 'France' result = get_prediction(team_1, team_2) if result[0] > result[1]: print(f" {team_1} {team_2} {result[0]*100:.1f}%") else: print(f" {team_1} {team_2} {result[1]*100:.1f}%")
Source: https://habr.com/ru/post/427273/
All Articles