# -*- coding:utf-8 -*- # parser.py from BeautifulSoup import BeautifulSoup from urllib2 import urlopen, URLError from draw import Drawer class Parser(object): def __init__(self, address='http://habrahabr.ru/people/page', begin = 1, end = 3098): self.drawer = Drawer() self.queue_user = [] self.__begin = begin self.__end = end self.__address = address def parse(self): for i in xrange(self.__begin, self.__end): try: doc = BeautifulSoup(urlopen(self.__address + str(i))) # . page = doc.findAll('td', attrs = {'class':'user'}) for user in page: # print 'Parsing for user: %s' %user.dl.dt.a.string doc = BeautifulSoup(urlopen(user.dl.dt.a['href'])) page = doc.findAll('a', attrs = {'rel' : 'friend'}) # if page: for friend in page: self.drawer.graph.add_nodes_from((user.dl.dt.a.string, friend.string)) self.drawer.graph.add_edge(user.dl.dt.a.string, friend.string) print "Add edge (%s, %s)"%(user.dl.dt.a.string, friend.string) else: self.drawer.graph.add_node(user.dl.dt.a.string) except URLError: # - i -= 1 print 'Nodes: %s' %self.drawer.graph.size() self.drawer.draw() if __name__ == '__main__': parse = Parser(end=8) parse.parse()
# -*- coding:utf-8 -*- # draw.py import networkx as nx import matplotlib.pyplot as plt class Drawer(object): def __init__(self, file_name = 'graph.png'): self.graph = nx.Graph() self.file_name = file_name def draw(self): ''' ''' nx.draw(self.graph,pos=nx.spring_layout(self.graph), node_size=3500, nodecolor='r',edge_color='b', node_shape='o') # plt.gcf().set_size_inches(100,100) plt.savefig(self.file_name)
Source: https://habr.com/ru/post/126417/
All Articles