Problem
This program runs through a preprocessed makefile (make --print-data-base | python make_graph.py [options]
) to assemble a directed graph of its variables.
I originally put together the program to visualize assignment redundancies in a particularly messy build, but I haven’t used it much since.
I’ve used it on fairly large builds and haven’t experienced any particular performance issues, although I’m sure it can be improved in that regard.
I’d love to know what I can do to improve the program. In particular I’d like to make sure it’s somewhat pythonic.
Comments about the overall approach are welcome, but I’m reluctant to delve into GNU Make grammar!
import argparse
import graphviz
import re
import subprocess
import sys
import unittest
def all_assignments(database):
assignments = {}
# accept target-specific variables
re_assignment = re.compile(r'.*?([^:#= ]+) :?= .*$')
re_variable = re.compile(r'$(([^:#= ]+?))')
for line in database:
if not any(assign in line for assign in (' = ', ' := ')):
continue
match_assignee = re_assignment.match(line)
if not match_assignee:
continue
assignee = match_assignee.group(1)
assignments.setdefault(assignee, set())
for match_variable in re.finditer(re_variable, line):
assignments[assignee].add(match_variable.group(1))
return assignments
def without_edges(assignments):
# not assigned other variables
singles = {assignee for (assignee, variables) in
assignments.iteritems() if len(variables) == 0}
# and not assigned to another variables
for (_, variables) in assignments.iteritems():
singles.difference_update(variables)
return singles
def trim(assignments, vars_to_trim):
for var in vars_to_trim:
assignments.pop(var, None)
return assignments
# Alternatively, can be acquired using make --print-data-base -f /dev/null
echo_internal = """
echo:
@echo $(subst <,<,$(.VARIABLES))
""" # on my system, <D is the first variable
def internal_variables():
variables = subprocess.check_output(['make', '--eval', echo_internal])
return set(variables.split())
def graph_assignments(assignments, include_internal):
qualifying_assignments = trim(assignments,
set(without_edges(assignments)))
return (qualifying_assignments if include_internal else
trim(qualifying_assignments, internal_variables()))
def nodes(assignments):
nodes = {assignee for (assignee, _) in assignments.iteritems()}
for (_, variables) in assignments.iteritems():
nodes.update(variables)
return nodes
class TestAssignments(unittest.TestCase):
# This particular edge wouldn't appear from --print-data-base
# output, since GNU Make would expand the variable immediately
def test_immediate(self):
s = ('A := an'
'B := $(A)n')
self.assertEqual(all_assignments(s.splitlines()),
{'A' : set(),
'B' : {'A'}})
def test_deferred(self):
s = ('A = an'
'B = $(A)n')
self.assertEqual(all_assignments(s.splitlines()),
{'A' : set(),
'B' : {'A'}})
def test_empty(self):
self.assertEqual(all_assignments('B = $(A)n'.splitlines()),
{'B' : {'A'}})
def test_multiple(self):
self.assertEqual(all_assignments('A = $(B)$(C) $(D)n'.splitlines()),
{'A' : {'B', 'C', 'D'}})
def test_without_edges(self):
self.assertEqual(without_edges({'A' : set(),
'B' : {'A'},
'C' : set()}), {'C'})
def test_nodes(self):
self.assertEqual(nodes({'A' : set(),
'B' : {'A'},
'C' : set()}), {'A', 'B', 'C'})
def add_nodes(dot, nodes):
for node in nodes:
dot.node(node)
def add_edges(dot, assignments):
for (assignee, variables) in assignments.iteritems():
for variable in variables:
dot.edge(assignee, variable)
def output_graph(assignments, graph_name, view):
dot = graphviz.Digraph(comment = 'GNU Make Variable Directional Graph')
add_nodes(dot, nodes(assignments))
add_edges(dot, assignments)
dot.render(graph_name, view = view)
def output_text(assignments):
for (assignee, variables) in sorted(assignments.iteritems()):
sys.stdout.write('%s = %sn' % (assignee, ' '.join(sorted(variables))))
def make_graph(database, graph_name, as_text, include_internal, view):
assignments = graph_assignments(all_assignments(database), include_internal)
if as_text:
output_text(assignments)
else:
output_graph(assignments, graph_name, view)
if __name__ == "__main__":
parser = argparse.ArgumentParser(__file__)
parser.add_argument('--database', type = argparse.FileType('r'),
help = ("GNU Make database filename; if no filename is"
" provided the database is expected on the"
" standard input stream"))
parser.add_argument('--graph-name', default = 'graph', dest = 'graph_name',
help = ("Graph name; defaults to 'graph'"))
parser.add_argument('--include-internal', action = 'store_true',
help = "Include internal and implicit variables")
parser.add_argument('--list', dest = 'as_text', action = 'store_true',
help = "Output as text to the standard output stream")
parser.add_argument('--no-view', dest = 'view', action = 'store_false',
help = "Don't open the assembled graph")
args = vars(parser.parse_args())
database = args['database'] if args['database'] else sys.stdin
make_graph(database,args['graph_name'], args['as_text'],
args['include_internal'], args['view'])
if database != sys.stdin:
database.close()
Solution
I’m only going to comment on the argparse
part.
For starter, for function calls with long names and rather long value (esp. the help
one), I mostly prefer the second indentation proposed by PEP8; but it’s mostly a matter of taste. Speaking about PEP8, it also state that you should remove spaces around the =
sign for named arguments.
Second, I like to make the parser building logic into its own function. Who knows one of your future project may want to import this one and extend its parser. (It actually hapened to me once.)
You also don’t need the dest = 'graph_name'
as it is already the default name assigned to (look at how --include-internal
is stored into include_internal
); I’d rather use metavar='NAME'
here instead. And speaking about storage of values, I don’t see any added value to using vars
on the Namespace
returned by parser.parse_args()
as accessing the attributes directly would work the same.
I would also drop the __file__
from the ArgumentParser
call as it is already pretty much what argparse
is doing by default, but add a description
to it. It is usually good practice to use the module docstring for that, so better add one too.
Finally your handling of sys.stdin
compared to a path to a file seems off. Instead of checking it manually after parsing the arguments, you could pass default=sys.stdin
to the database argument. And since you are dealing with files, rather than closing it after the fact, you should use a with
statement. In the following code I won’t make any special case for sys.stdin
as closing it does not affect the rest of your program (which is empty).
Proposed improvements:
"""GNU Make variable relationship graph.
This program runs through a preprocessed makefile
(make --print-data-base | python make_graph.py [options])
to assemble a directed graph of its variables.
"""
import sys
import argparse
...
def command_line_parser():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
'--database', type=argparse.FileType('r'), default=sys.stdin
help="GNU Make database filename; if no filename is"
" provided the database is expected on the"
" standard input stream")
parser.add_argument(
'--graph-name', default='graph', metavar='NAME',
help="Graph name; defaults to 'graph'")
parser.add_argument(
'--include-internal', action='store_true',
help="Include internal and implicit variables")
parser.add_argument(
'--list', dest='as_text', action='store_true',
help="Output as text to the standard output stream")
parser.add_argument(
'--no-view', dest='view', action='store_false',
help="Don't open the assembled graph")
return parser
if __name__ == '__main__':
args = command_line_parser().parse_args()
with args.database as database:
make_graph(database, args.graph_name, args.as_text,
args.include_internal, args.view)