GNU Make variable relationship graph using Python

Posted on


This program runs through a preprocessed makefile (make --print-data-base | python [options]) to assemble a directed graph of its variables.
I originally put together the program to visualize assignment redundancies in a particularly messy build, but I haven’t used it much since.
I’ve used it on fairly large builds and haven’t experienced any particular performance issues, although I’m sure it can be improved in that regard.

I’d love to know what I can do to improve the program. In particular I’d like to make sure it’s somewhat pythonic.
Comments about the overall approach are welcome, but I’m reluctant to delve into GNU Make grammar!

import argparse
import graphviz
import re
import subprocess
import sys
import unittest

def all_assignments(database):
    assignments = {}

    # accept target-specific variables
    re_assignment = re.compile(r'.*?([^:#= ]+) :?= .*$')
    re_variable = re.compile(r'$(([^:#= ]+?))')
    for line in database:
        if not any(assign in line for assign in (' = ', ' := ')):

        match_assignee = re_assignment.match(line)
        if not match_assignee:

        assignee =
        assignments.setdefault(assignee, set())
        for match_variable in re.finditer(re_variable, line):

    return assignments

def without_edges(assignments):
   # not assigned other variables
    singles = {assignee for (assignee, variables) in
               assignments.iteritems() if len(variables) == 0}
    # and not assigned to another variables
    for (_, variables) in assignments.iteritems():

    return singles

def trim(assignments, vars_to_trim):
    for var in vars_to_trim:
        assignments.pop(var, None)
    return assignments

# Alternatively, can be acquired using make --print-data-base -f /dev/null
echo_internal = """
    @echo $(subst <,<,$(.VARIABLES))
"""  # on my system, <D is the first variable

def internal_variables():
    variables = subprocess.check_output(['make', '--eval', echo_internal])
    return set(variables.split())

def graph_assignments(assignments, include_internal):
    qualifying_assignments = trim(assignments,
    return (qualifying_assignments if include_internal else
            trim(qualifying_assignments, internal_variables()))

def nodes(assignments):
    nodes = {assignee for (assignee, _) in assignments.iteritems()}
    for (_, variables) in assignments.iteritems():
    return nodes

class TestAssignments(unittest.TestCase):
    # This particular edge wouldn't appear from --print-data-base
    # output, since GNU Make would expand the variable immediately
    def test_immediate(self):
        s = ('A := an'
             'B := $(A)n')
                         {'A' : set(),
                          'B' : {'A'}})

    def test_deferred(self):
        s = ('A = an'
             'B = $(A)n')
                         {'A' : set(),
                          'B' : {'A'}})

    def test_empty(self):
        self.assertEqual(all_assignments('B = $(A)n'.splitlines()),
                         {'B' : {'A'}})

    def test_multiple(self):
        self.assertEqual(all_assignments('A = $(B)$(C) $(D)n'.splitlines()),
                         {'A' : {'B', 'C', 'D'}})

    def test_without_edges(self):
        self.assertEqual(without_edges({'A' : set(),
 'B' : {'A'},
 'C' : set()}), {'C'})

    def test_nodes(self):
        self.assertEqual(nodes({'A' : set(),
                                'B' : {'A'},
                                'C' : set()}), {'A', 'B', 'C'})

def add_nodes(dot, nodes):
    for node in nodes:

def add_edges(dot, assignments):
    for (assignee, variables) in assignments.iteritems():
        for variable in variables:
            dot.edge(assignee, variable)

def output_graph(assignments, graph_name, view):
    dot = graphviz.Digraph(comment = 'GNU Make Variable Directional Graph')

    add_nodes(dot, nodes(assignments))
    add_edges(dot, assignments)

    dot.render(graph_name, view = view)

def output_text(assignments):
    for (assignee, variables) in sorted(assignments.iteritems()):
        sys.stdout.write('%s = %sn' % (assignee, ' '.join(sorted(variables))))

def make_graph(database, graph_name, as_text, include_internal, view):
    assignments = graph_assignments(all_assignments(database), include_internal)
    if as_text:
        output_graph(assignments, graph_name, view)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(__file__)
    parser.add_argument('--database', type = argparse.FileType('r'),
                        help = ("GNU Make database filename; if no filename is"
                                " provided the database is expected on the"
                                " standard input stream"))
    parser.add_argument('--graph-name', default = 'graph', dest = 'graph_name',
                        help = ("Graph name; defaults to 'graph'"))
    parser.add_argument('--include-internal', action = 'store_true',
                        help = "Include internal and implicit variables")
    parser.add_argument('--list', dest = 'as_text', action = 'store_true',
                        help = "Output as text to the standard output stream")
    parser.add_argument('--no-view', dest = 'view', action = 'store_false',
                        help = "Don't open the assembled graph")

    args = vars(parser.parse_args())
    database = args['database'] if args['database'] else sys.stdin
    make_graph(database,args['graph_name'], args['as_text'],
               args['include_internal'], args['view'])

    if database != sys.stdin:


I’m only going to comment on the argparse part.

For starter, for function calls with long names and rather long value (esp. the help one), I mostly prefer the second indentation proposed by PEP8; but it’s mostly a matter of taste. Speaking about PEP8, it also state that you should remove spaces around the = sign for named arguments.

Second, I like to make the parser building logic into its own function. Who knows one of your future project may want to import this one and extend its parser. (It actually hapened to me once.)

You also don’t need the dest = 'graph_name' as it is already the default name assigned to (look at how --include-internal is stored into include_internal); I’d rather use metavar='NAME' here instead. And speaking about storage of values, I don’t see any added value to using vars on the Namespace returned by parser.parse_args() as accessing the attributes directly would work the same.

I would also drop the __file__ from the ArgumentParser call as it is already pretty much what argparse is doing by default, but add a description to it. It is usually good practice to use the module docstring for that, so better add one too.

Finally your handling of sys.stdin compared to a path to a file seems off. Instead of checking it manually after parsing the arguments, you could pass default=sys.stdin to the database argument. And since you are dealing with files, rather than closing it after the fact, you should use a with statement. In the following code I won’t make any special case for sys.stdin as closing it does not affect the rest of your program (which is empty).

Proposed improvements:

"""GNU Make variable relationship graph.

This program runs through a preprocessed makefile
(make --print-data-base | python [options])
to assemble a directed graph of its variables.

import sys
import argparse


def command_line_parser():
    parser = argparse.ArgumentParser(description=__doc__)
            '--database', type=argparse.FileType('r'), default=sys.stdin
            help="GNU Make database filename; if no filename is"
            " provided the database is expected on the"
            " standard input stream")
            '--graph-name', default='graph', metavar='NAME',
            help="Graph name; defaults to 'graph'")
            '--include-internal', action='store_true',
            help="Include internal and implicit variables")
            '--list', dest='as_text', action='store_true',
            help="Output as text to the standard output stream")
            '--no-view', dest='view', action='store_false',
            help="Don't open the assembled graph")
    return parser

if __name__ == '__main__':
    args = command_line_parser().parse_args()
    with args.database as database:
        make_graph(database, args.graph_name, args.as_text,
                   args.include_internal, args.view)

Leave a Reply

Your email address will not be published. Required fields are marked *