From 8cf0a8ca9cb387a73691bc1a7e9999ae326d858e Mon Sep 17 00:00:00 2001 From: James Bardin Date: Tue, 3 Oct 2017 11:08:32 -0400 Subject: [PATCH] faster DAG transitive reduction In the case of highly-connected graphs, the TransitiveReduction process was far too computationally intensive. Since no operations are applied to the nodes, and the walk order is not even user visible, we don't need to sort them n^2 times. --- dag/dag.go | 21 +++++++++++++++---- dag/dag_test.go | 56 +++++++++++++++++++++++++++++++++++++++++++++++++ dag/marshal.go | 12 +++++++++++ 3 files changed, 85 insertions(+), 4 deletions(-) diff --git a/dag/dag.go b/dag/dag.go index f8776bc51..b7eb10c33 100644 --- a/dag/dag.go +++ b/dag/dag.go @@ -106,7 +106,7 @@ func (g *AcyclicGraph) TransitiveReduction() { uTargets := g.DownEdges(u) vs := AsVertexList(g.DownEdges(u)) - g.DepthFirstWalk(vs, func(v Vertex, d int) error { + g.depthFirstWalk(vs, false, func(v Vertex, d int) error { shared := uTargets.Intersection(g.DownEdges(v)) for _, vPrime := range AsVertexList(shared) { g.RemoveEdge(BasicEdge(u, vPrime)) @@ -187,9 +187,18 @@ type vertexAtDepth struct { } // depthFirstWalk does a depth-first walk of the graph starting from -// the vertices in start. This is not exported now but it would make sense -// to export this publicly at some point. +// the vertices in start. func (g *AcyclicGraph) DepthFirstWalk(start []Vertex, f DepthWalkFunc) error { + return g.depthFirstWalk(start, true, f) +} + +// This internal method provides the option of not sorting the vertices during +// the walk, which we use for the Transitive reduction. +// Some configurations can lead to fully-connected subgraphs, which makes our +// transitive reduction algorithm O(n^3). This is still passable for the size +// of our graphs, but the additional n^2 sort operations would make this +// uncomputable in a reasonable amount of time. +func (g *AcyclicGraph) depthFirstWalk(start []Vertex, sorted bool, f DepthWalkFunc) error { defer g.debug.BeginOperation(typeDepthFirstWalk, "").End("") seen := make(map[Vertex]struct{}) @@ -219,7 +228,11 @@ func (g *AcyclicGraph) DepthFirstWalk(start []Vertex, f DepthWalkFunc) error { // Visit targets of this in a consistent order. targets := AsVertexList(g.DownEdges(current.Vertex)) - sort.Sort(byVertexName(targets)) + + if sorted { + sort.Sort(byVertexName(targets)) + } + for _, t := range targets { frontier = append(frontier, &vertexAtDepth{ Vertex: t, diff --git a/dag/dag_test.go b/dag/dag_test.go index 4f8aa3d0d..ae9d87244 100644 --- a/dag/dag_test.go +++ b/dag/dag_test.go @@ -7,6 +7,7 @@ import ( "log" "os" "reflect" + "strconv" "strings" "sync" "testing" @@ -106,6 +107,61 @@ func TestAyclicGraphTransReduction_more(t *testing.T) { } } +// use this to simulate slow sort operations +type counter struct { + Name string + Calls int64 +} + +func (s *counter) String() string { + s.Calls++ + return s.Name +} + +// Make sure we can reduce a sizable, fully-connected graph. +func TestAyclicGraphTransReduction_fullyConnected(t *testing.T) { + var g AcyclicGraph + + const nodeCount = 200 + nodes := make([]*counter, nodeCount) + for i := 0; i < nodeCount; i++ { + nodes[i] = &counter{Name: strconv.Itoa(i)} + } + + // Add them all to the graph + for _, n := range nodes { + g.Add(n) + } + + // connect them all + for i := range nodes { + for j := range nodes { + if i == j { + continue + } + g.Connect(BasicEdge(nodes[i], nodes[j])) + } + } + + g.TransitiveReduction() + + vertexNameCalls := int64(0) + for _, n := range nodes { + vertexNameCalls += n.Calls + } + + switch { + case vertexNameCalls > 2*nodeCount: + // Make calling it more the 2x per node fatal. + // If we were sorting this would give us roughly ln(n)(n^3) calls, or + // >59000000 calls for 200 vertices. + t.Fatalf("VertexName called %d times", vertexNameCalls) + case vertexNameCalls > 0: + // we don't expect any calls, but a change here isn't necessarily fatal + t.Logf("WARNING: VertexName called %d times", vertexNameCalls) + } +} + func TestAcyclicGraphValidate(t *testing.T) { var g AcyclicGraph g.Add(1) diff --git a/dag/marshal.go b/dag/marshal.go index 16d5dd6dd..c567d2719 100644 --- a/dag/marshal.go +++ b/dag/marshal.go @@ -273,6 +273,9 @@ func (e *encoder) Encode(i interface{}) { } func (e *encoder) Add(v Vertex) { + if e == nil { + return + } e.Encode(marshalTransform{ Type: typeTransform, AddVertex: newMarshalVertex(v), @@ -281,6 +284,9 @@ func (e *encoder) Add(v Vertex) { // Remove records the removal of Vertex v. func (e *encoder) Remove(v Vertex) { + if e == nil { + return + } e.Encode(marshalTransform{ Type: typeTransform, RemoveVertex: newMarshalVertex(v), @@ -288,6 +294,9 @@ func (e *encoder) Remove(v Vertex) { } func (e *encoder) Connect(edge Edge) { + if e == nil { + return + } e.Encode(marshalTransform{ Type: typeTransform, AddEdge: newMarshalEdge(edge), @@ -295,6 +304,9 @@ func (e *encoder) Connect(edge Edge) { } func (e *encoder) RemoveEdge(edge Edge) { + if e == nil { + return + } e.Encode(marshalTransform{ Type: typeTransform, RemoveEdge: newMarshalEdge(edge),