




from requests import get from requests.exceptions import RequestException from contextlib import closing from bs4 import BeautifulSoup def query_url(url):    try: with closing(get(url, stream=True)) as resp:        if is_good_response(resp):        return resp.content else:         return None   except RequestException as e: log_error('Error during requests to {0} : {1}'.format(url, str(e))) return None def is_good_response(resp):    content_type = resp.headers['Content-Type'].lower()    return (resp.status_code == 200        and content_type is not None        and content_type.find('html') > -1) def log_error(e):    print(e) def parse_ranks(raw_html, year):    html = BeautifulSoup(raw_html, 'html.parser')    ranks = []    for tr in html.select('tr'):        tds = tr.select("td")    if len(tds) == 10:        rank = (year, tds[2].text, tds[7].text)        ranks.append(rank)    return ranks   def get_url(year):    if year in range(1960, 1999): method = 1    if year in range(1999, 2004): method = 2    if year in range(2004, 2009): method = 3    if year in range(2009, 2018): method = 4    if year in range(2018, 2019): method = 5    return f"https://kassiesa.home.xs4all.nl/bert/uefa/data/method{method}/crank{year}.html" ranks = [] for year in range(1960, 2019):    url = get_url(year)    print(url)    raw_html = query_url(url)    rank = parse_ranks(raw_html, year)    ranks += rank   with open('team_ranks.csv', 'w') as f:    writer = csv.writer(f , lineterminator='\n')    writer.writerow(['year', 'country', 'rank'])    for rank in ranks:    writer.writerow(rank) 
 from sklearn.neural_network import MLPClassifier games = pd.read_csv('games.csv') #    SwedenGames = games[(games.teamTitle == 'Sweden')] #   y = SwedenGames['score'] y = y.astype('int') #   X = SwedenGames.drop(['score', 'teamTitle', 'againstTitle'], axis=1) #       X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25) mlp = MLPClassifier() mlp.fit(X_train, y_train); predictions = mlp.predict(X_test) print('Accuracy: {:.2}'.format(   accuracy_score(y_test, mlp.predict(X_test)) ))  #     categorical_features_indices = [1, 2, 4] train_pool = Pool(X_train,  y_train, cat_features=categorical_features_indices) validate_pool = Pool(X_test, y_test, cat_features=categorical_features_indices) #      ,      GridSearchCV.   best_params = {   'iterations': 500,   'depth': 10,   'learning_rate': 0.1,   'l2_leaf_reg': 1,   'eval_metric': 'Accuracy',   'random_seed': 42,   'logging_level': 'Silent',   'use_best_model': True } cb_model = CatBoostClassifier(**best_params) cb_model.fit(train_pool, eval_set=validate_pool) print('Accuracy: {:.2}'.format(   accuracy_score(y_test, cb_model.predict(X_test)) ))  def get_prediction(country, against):   y = SwdenGames['score']   y = y.astype('int')   X = SwdenGames.drop(['score', 'againstTitle'], axis=1)   train_pool = Pool(X, y, cat_features=[1, 2, 4])   query = [ get_team_rank(country, 2018), 0,   1 if country == 'Russia' else 0,   get_team_rank(against, 2018),   against]   return cb_model.predict_proba([query])[0] team_1 = 'Belgium' team_2 = 'France' result = get_prediction(team_1, team_2) if result[0] > result[1]:   print(f" {team_1}    {team_2}   {result[0]*100:.1f}%") else: print(f" {team_1}   {team_2}   {result[1]*100:.1f}%") 
Source: https://habr.com/ru/post/427273/
All Articles