···
from pprint import pprint
from collections import defaultdict
121
+
def debug(msg, *args, **kwargs):
124
+
"DEBUG: {}".format(
125
+
msg.format(*args, **kwargs)
# Find paths in the original dataset which are never referenced by
def find_roots(closures):
···
341
+
subgraphs_cache = {}
def make_graph_segment_from_root(root, lookup):
343
+
global subgraphs_cache
333
-
children[ref] = make_graph_segment_from_root(ref, lookup)
346
+
# make_graph_segment_from_root is a pure function, and will
347
+
# always return the same result based on a given input. Thus,
348
+
# cache computation.
350
+
# Python's assignment will use a pointer, preventing memory
351
+
# bloat for large graphs.
352
+
if ref not in subgraphs_cache:
353
+
debug("Subgraph Cache miss on {}".format(ref))
354
+
subgraphs_cache[ref] = make_graph_segment_from_root(ref, lookup)
356
+
debug("Subgraph Cache hit on {}".format(ref))
357
+
children[ref] = subgraphs_cache[ref]
class TestMakeGraphSegmentFromRoot(unittest.TestCase):
···
408
+
popularity_cache = {}
def graph_popularity_contest(full_graph):
410
+
global popularity_cache
popularity = defaultdict(int)
for path, subgraph in full_graph.items():
388
-
subcontest = graph_popularity_contest(subgraph)
414
+
# graph_popularity_contest is a pure function, and will
415
+
# always return the same result based on a given input. Thus,
416
+
# cache computation.
418
+
# Python's assignment will use a pointer, preventing memory
419
+
# bloat for large graphs.
420
+
if path not in popularity_cache:
421
+
debug("Popularity Cache miss on {}", path)
422
+
popularity_cache[path] = graph_popularity_contest(subgraph)
424
+
debug("Popularity Cache hit on {}", path)
426
+
subcontest = popularity_cache[path]
for subpath, subpopularity in subcontest.items():
428
+
debug("Calculating popularity for {}", subpath)
popularity[subpath] += subpopularity + 1
···
516
+
debug("Loading from {}", filename)
with open(filename) as f:
···
540
+
debug("Finding roots from {}", key)
roots = find_roots(graph);
542
+
debug("Making lookup for {}", key)
lookup = make_lookup(graph)
547
+
debug("Making full graph for {}", root)
full_graph[root] = make_graph_segment_from_root(root, lookup)
507
-
ordered = order_by_popularity(graph_popularity_contest(full_graph))
550
+
debug("Running contest")
551
+
contest = graph_popularity_contest(full_graph)
552
+
debug("Ordering by popularity")
553
+
ordered = order_by_popularity(contest)
554
+
debug("Checking for missing paths")
for path in all_paths(graph):