ru.wikipedia.org/w/api.php?action=query&format=xml&titles=_&prop=links
* This source code was highlighted with Source Code Highlighter .
- < api >
- < query >
- < normalized >
- < n from = "Habrahabr" to = "Habrahabr" />
- </ normalized >
- < pages >
- < page pageid = "340809" ns = "0" title = "Habrahabr" >
- < links >
- < pl ns = "0" title = "2006" />
- < pl ns = "0" title = "2006" />
- < pl ns = "0" title = "2007" />
- < pl ns = "0" title = "Digg.com" />
- < pl ns = "0" title = "Linux.org.ru" />
- < pl ns = "0" title = "News 2.0" />
- < pl ns = "0" title = "Newsland" />
- < pl ns = "0" title = "Pligg" />
- < pl ns = "0" title = "Slashdot" />
- < pl ns = "0" title = "URL" />
- </ links >
- </ page >
- </ pages >
- </ query >
- < query-continue >
- < links plcontinue = "340809 | 0 | Blog" />
- </ query-continue >
- </ api >
class LinksListHandler(xml.sax.handler.ContentHandler):
* This source code was highlighted with Source Code Highlighter .
- def get_links (page):
- #See wiki api documentation http: //en.wikipedia.org/w/api.php
- query_val = { 'action' : 'query' ,
- 'prop' : 'links' ,
- 'titles' : page,
- 'format' : 'xml' }
- url = wiki_url () + '?' + urllib.urlencode (query_val)
- request = urllib2.Request (url)
- verbose_message ( "Wiki url:" + url)
- try :
- response = urllib2.urlopen (request)
- except urllib2.HTTPError:
- print "HTTP request error!"
- sys.exit (1)
- #verbose_message ( "Response xml: \ n" + response.read ())
- lh = LinksListHandler ()
- saxparser = xml.sax.make_parser ()
- saxparser.setContentHandler (lh)
- saxparser.parse (response)
- return lh.links
* This source code was highlighted with Source Code Highlighter .
- def make_wiki_graph (wiki_page, depth):
- gv = AGraph ()
- page_list = [wiki_page]
- temp_list = []
- verbose_message ( 'Create graph for' + wiki_page)
- pageLinks = get_links (wiki_page)
- gv.add_node (wiki_page)
- for i in range (depth):
- print '>>>> Get' + str (i) + 'level'
- for page in page_list:
- list = get_links (page)
- node = gv.get_node (page)
- node.attr [ 'fontsize' ] = "% i" % (MIN_FONT * 2 * (depth - i))
- for link in list:
- verbose_message (page + "=>" + link)
- gv.add_edge (page, link)
- temp_list.append (link)
- page_list = temp_list
- temp_list = []
- return gv
Source: https://habr.com/ru/post/56209/
All Articles