import requests # HTTP- import urllib.request # HTTP from bs4 import BeautifulSoup # HTML import re #
page_count = [] # , perehod = '' # , download = [] #
if __name__ == '__main__': # u = str(input(' :\n')) #input - . , base_url = 'http://go.mail.ru/zaycev?sbmt=1486991736446&q='+u # http , count=0 # page_count = [base_url] # , print(' . ...') #print - while True: # try: #try . , , except, page_count = page_count+[get_page_count(get_html(page_count[count]),page_count)] # , get_page_count, page_count . get_html ( http) page_count[count], count . , - , - count = count + 1 #, except TypeError: # , TypeError. break # break print(" - ",len(page_count)) # len,
def get_html(url): # url, page_count[count] response = urllib.request.urlopen(url) # «» httplib, , return response.read() # read
def get_page_count(html,page_count): # page_count ( ) html , : get_html(page_count[count]) soup = BeautifulSoup(html, "html.parser") # html- href = soup.find('a', text = '') # - "". "" - , . (Shift+Ctrl+C). base_url = 'http://go.mail.ru' # , . , page_count = base_url + href['href'] # . href, html-, , ['href'] ( html-). return page_count #
print('') # '' try: # for i in page_count: # perehod = parsing1(get_html(i),perehod) # , url . - url except TypeError: #, TypeError ( ) print(' ') # ' '
def parsing1(html,perehod): # soup = BeautifulSoup(html, "html.parser") # html- perehod = [] # , for row in soup.find_all('a'): # , ( , , 20) if re.findall(r'', str(row)): # re, html-, , , row, - ( "a" , ), perehod=perehod+[row['href']] # , return perehod #
for y in perehod: # download = parsing2(get_html(y),download #download - -
def parsing2(html,download): # soup = BeautifulSoup(html, "html.parser") # html table = soup.find('a', {'id':'audiotrack-download-link'}) # html "a" "id". - href='' # name='' # if table != None: #: , else row = soup.find('h1', {'class':"block__header block__header_audiotrack"}) # "h1" name = re.sub(r'\n\t\t\t\t\t\t','',row.text) # - . html- row href=table.get('href') # .get('href') ['href'] - download=[href]+[name] #, , return download # else: # download return download #
if download != []: #, , req = requests.get(download[0],stream = True) # stream True if req.status_code == requests.codes.ok: # http , with open(download[1]+'.mp3', 'wb') as a: # a.write(req.content) # write
Source: https://habr.com/ru/post/322502/
All Articles