📜 ⬆️ ⬇️

Rendering Python directories with NetworkX

Leafing through on the Habré section of Python came across an interesting article about the NetworkX library . Impressed by the beautiful graphs, I decided to increase my python-skill and delve into the networkx.
image

Prologue


The first question is where to get the data for visualization? It is not interesting to generate random ones, they were bundled with the module. Then I remembered the Dos utility tree, which lists the file system directories as a tree. It was decided to write a beautiful analogue in Python and draw everything in networkx using matplotlib.

Act one


Copy Source | Copy HTML def get_tree (tree=[u "E:\\" ,], G=nx.Graph(), itr= 0 , max_itr= 900 ): point = tree.pop( 0 ) itr = itr + 1 sub_tree = [ os .path.join(point, x) for x in os .listdir(point) if os .path.isdir( os .path.join(point, x)) and not is_hidden_dir( os .path.join(point, x))] if sub_tree: tree.extend(sub_tree) G.add_edges_from( map ( lambda b : (point, b), sub_tree)) if tree and itr <= max_itr: return get_tree (tree, G, itr) else : return G
  1. Copy Source | Copy HTML def get_tree (tree=[u "E:\\" ,], G=nx.Graph(), itr= 0 , max_itr= 900 ): point = tree.pop( 0 ) itr = itr + 1 sub_tree = [ os .path.join(point, x) for x in os .listdir(point) if os .path.isdir( os .path.join(point, x)) and not is_hidden_dir( os .path.join(point, x))] if sub_tree: tree.extend(sub_tree) G.add_edges_from( map ( lambda b : (point, b), sub_tree)) if tree and itr <= max_itr: return get_tree (tree, G, itr) else : return G
  2. Copy Source | Copy HTML def get_tree (tree=[u "E:\\" ,], G=nx.Graph(), itr= 0 , max_itr= 900 ): point = tree.pop( 0 ) itr = itr + 1 sub_tree = [ os .path.join(point, x) for x in os .listdir(point) if os .path.isdir( os .path.join(point, x)) and not is_hidden_dir( os .path.join(point, x))] if sub_tree: tree.extend(sub_tree) G.add_edges_from( map ( lambda b : (point, b), sub_tree)) if tree and itr <= max_itr: return get_tree (tree, G, itr) else : return G
  3. Copy Source | Copy HTML def get_tree (tree=[u "E:\\" ,], G=nx.Graph(), itr= 0 , max_itr= 900 ): point = tree.pop( 0 ) itr = itr + 1 sub_tree = [ os .path.join(point, x) for x in os .listdir(point) if os .path.isdir( os .path.join(point, x)) and not is_hidden_dir( os .path.join(point, x))] if sub_tree: tree.extend(sub_tree) G.add_edges_from( map ( lambda b : (point, b), sub_tree)) if tree and itr <= max_itr: return get_tree (tree, G, itr) else : return G
  4. Copy Source | Copy HTML def get_tree (tree=[u "E:\\" ,], G=nx.Graph(), itr= 0 , max_itr= 900 ): point = tree.pop( 0 ) itr = itr + 1 sub_tree = [ os .path.join(point, x) for x in os .listdir(point) if os .path.isdir( os .path.join(point, x)) and not is_hidden_dir( os .path.join(point, x))] if sub_tree: tree.extend(sub_tree) G.add_edges_from( map ( lambda b : (point, b), sub_tree)) if tree and itr <= max_itr: return get_tree (tree, G, itr) else : return G
  5. Copy Source | Copy HTML def get_tree (tree=[u "E:\\" ,], G=nx.Graph(), itr= 0 , max_itr= 900 ): point = tree.pop( 0 ) itr = itr + 1 sub_tree = [ os .path.join(point, x) for x in os .listdir(point) if os .path.isdir( os .path.join(point, x)) and not is_hidden_dir( os .path.join(point, x))] if sub_tree: tree.extend(sub_tree) G.add_edges_from( map ( lambda b : (point, b), sub_tree)) if tree and itr <= max_itr: return get_tree (tree, G, itr) else : return G
  6. Copy Source | Copy HTML def get_tree (tree=[u "E:\\" ,], G=nx.Graph(), itr= 0 , max_itr= 900 ): point = tree.pop( 0 ) itr = itr + 1 sub_tree = [ os .path.join(point, x) for x in os .listdir(point) if os .path.isdir( os .path.join(point, x)) and not is_hidden_dir( os .path.join(point, x))] if sub_tree: tree.extend(sub_tree) G.add_edges_from( map ( lambda b : (point, b), sub_tree)) if tree and itr <= max_itr: return get_tree (tree, G, itr) else : return G
  7. Copy Source | Copy HTML def get_tree (tree=[u "E:\\" ,], G=nx.Graph(), itr= 0 , max_itr= 900 ): point = tree.pop( 0 ) itr = itr + 1 sub_tree = [ os .path.join(point, x) for x in os .listdir(point) if os .path.isdir( os .path.join(point, x)) and not is_hidden_dir( os .path.join(point, x))] if sub_tree: tree.extend(sub_tree) G.add_edges_from( map ( lambda b : (point, b), sub_tree)) if tree and itr <= max_itr: return get_tree (tree, G, itr) else : return G
  8. Copy Source | Copy HTML def get_tree (tree=[u "E:\\" ,], G=nx.Graph(), itr= 0 , max_itr= 900 ): point = tree.pop( 0 ) itr = itr + 1 sub_tree = [ os .path.join(point, x) for x in os .listdir(point) if os .path.isdir( os .path.join(point, x)) and not is_hidden_dir( os .path.join(point, x))] if sub_tree: tree.extend(sub_tree) G.add_edges_from( map ( lambda b : (point, b), sub_tree)) if tree and itr <= max_itr: return get_tree (tree, G, itr) else : return G
  9. Copy Source | Copy HTML def get_tree (tree=[u "E:\\" ,], G=nx.Graph(), itr= 0 , max_itr= 900 ): point = tree.pop( 0 ) itr = itr + 1 sub_tree = [ os .path.join(point, x) for x in os .listdir(point) if os .path.isdir( os .path.join(point, x)) and not is_hidden_dir( os .path.join(point, x))] if sub_tree: tree.extend(sub_tree) G.add_edges_from( map ( lambda b : (point, b), sub_tree)) if tree and itr <= max_itr: return get_tree (tree, G, itr) else : return G
  10. Copy Source | Copy HTML def get_tree (tree=[u "E:\\" ,], G=nx.Graph(), itr= 0 , max_itr= 900 ): point = tree.pop( 0 ) itr = itr + 1 sub_tree = [ os .path.join(point, x) for x in os .listdir(point) if os .path.isdir( os .path.join(point, x)) and not is_hidden_dir( os .path.join(point, x))] if sub_tree: tree.extend(sub_tree) G.add_edges_from( map ( lambda b : (point, b), sub_tree)) if tree and itr <= max_itr: return get_tree (tree, G, itr) else : return G
  11. Copy Source | Copy HTML def get_tree (tree=[u "E:\\" ,], G=nx.Graph(), itr= 0 , max_itr= 900 ): point = tree.pop( 0 ) itr = itr + 1 sub_tree = [ os .path.join(point, x) for x in os .listdir(point) if os .path.isdir( os .path.join(point, x)) and not is_hidden_dir( os .path.join(point, x))] if sub_tree: tree.extend(sub_tree) G.add_edges_from( map ( lambda b : (point, b), sub_tree)) if tree and itr <= max_itr: return get_tree (tree, G, itr) else : return G

Here it becomes clear that the music is not so much, judging by the resulting graph.
The code itself does not represent anything complicated. The function recursively calls itself until the list of directories for parsing the tree ends or until the specified number of iterations is released. With each call from the list of directories, the first element is pushed out for which the subdirectories of the standard python function os.listdir () are searched. Then the lists are merged and the resulting connections are added to the structure of the graph. The main trick is the function is_hidden_dir (). It checks if the file is hidden. At first, I thought that this trivial task could be solved by means of the language itself. But it turned out that this is simple only in Unix systems, and in Windows, under which I write, this task takes on shades of masochism.

Act Two


Copy Source | Copy HTML
  1. def is_hidden_dir (d):
  2. import sys, subprocess
  3. if sys . platform .startswith ( "win" ):
  4. p = subprocess .check_output ([ "attrib" , d])
  5. return true if 'h' in p [: 12 ] else False
  6. else :
  7. Return True if os .path.basename (d) [ 0 ] == '.' else False


First we check the system, the user. If not Windows, then the definition of the hidden property of a file is trivial - the first character of the directory must be a period "."
In the case of Windows, everything is atypical. Thinking how to do something like this, I realized that there would be no explicit properties for a file like a dot in nix. There was no desire to understand WinAPI under Python. It remained to use the dirty hacks, which I did.
')
A quick search in Google managed to find the attrib console utility. It was originally planned to just run it through os.system (), but the documentation dissuaded me. Moreover, the output of the utility was important to me, and not the fact of its successful work. In manah, the necessary subprocess.check_output () function was found, which returns the result of the argument. Further verification is simple, in the first 12 characters it is necessary to find the occurrence of the flag "H". But even here Windows did not let me relax. While he was processing directories with only Latin characters in their names, everything was calm, but when he got to Russian letters, he refused to work flatly. It is logical that he was not satisfied with the encoding. I put in a decoder with a light heart
Copy Source | Copy HTML
  1. p = subprocess .check_output ([ "attrib" , d.encode ( 'cp1251' )])

But the python quickly convinced me of the success of such an idea, refusing this time to work with English-language names.
The result was a compromise solution.
Copy Source | Copy HTML
  1. p = subprocess .check_output ([ "attrib" , d.encode ( 'cp1251' ) if isinstance (d, unicode) else d])


Act three. Final


It remains only to draw the generated graph on networkx, for which everything was started. No need to wonder for a long time, that would understand that here it was necessary to dig deeper. The rendering function is temptingly simple:
Copy Source | Copy HTML
  1. import networkx as nx
  2. import matplotlib.pyplot as plt
  3. def main ():
  4. G = get_tree ()
  5. nx.draw (G, with_labels = False, node_color = "blue" , alpha = 0. 6 , node_size = 50 )
  6. plt.savefig ( "edge_colormap.png" )
  7. plt.show ()

Parameters are indicative in nx.draw () are optional, except for the graph itself. NetwokX supports 2 visual data rendering libraries: matplotlib and pygraphviz . At first I decided to use pygraphviz. I downloaded it from offsite, installed it, started installing pygraphviz for python wrapper for it, but then pip cursed and said that pygraphviz refuses to deal with windows. Okay, I thought, we have an alternative. Matplotlib got up without any questions, but when I started the script with the graph I began to resent that I still do not use NumPy . Download and put NumPy. He did not ask me anything and just started working.

The final code looks like this:
Copy Source | Copy HTML
  1. # - * - encoding: utf-8
  2. import networkx as nx
  3. import matplotlib.pyplot as plt
  4. import os
  5. def get_tree (tree = [u "E: \\ Music" ,], G = nx.Graph (), itr = 0 , max_itr = 900 ):
  6. point = tree.pop ( 0 )
  7. itr = itr + 1
  8. sub_tree = [ os .path.join (point, x) for x in os .listdir (point) if os .path.isdir ( os .path.join (point, x)) and not is_hidden_dir ( os .path.join ( point, x))]
  9. if sub_tree:
  10. tree.extend (sub_tree)
  11. G.add_edges_from ( map ( lambda b: (point, b), sub_tree))
  12. if tree and itr <= max_itr:
  13. return get_tree (tree, G, itr)
  14. else :
  15. return G
  16. def is_hidden_dir (d):
  17. import sys, subprocess
  18. if sys . platform .startswith ( "win" ):
  19. p = subprocess .check_output ([ "attrib" , d.encode ( 'cp1251' ) if isinstance (d, unicode) else d])
  20. return true if 'h' in p [: 12 ] else False
  21. else :
  22. Return True if os .path.basename (d) [ 0 ] == '.' else False
  23. def main ():
  24. G = get_tree ()
  25. nx.draw (G, with_labels = False, node_color = "blue" , alpha = 0. 6 , node_size = 50 )
  26. plt.savefig ( "edge_colormap.png" )
  27. plt.show ()
  28. if __name__ == "__main__" :
  29. main ()


A curtain


It was not possible to get a snapshot of the entire system. There is not enough memory or hands, which is more likely.
Some final pictures:
<img title = "" border = "0" alt = "" src = "

Source: https://habr.com/ru/post/129344/


All Articles