Set Intersection #performance (#28183)

* Set Intersection #performance

Intersection is faster for sets of different sizes if one iterates over the shorter set and checks the presence of an element in a larger one. For an edge case consider `s` having 1M entries and `other` no entries at all. In this case original code will iterate over 1M entries in `s` not finding anything and then returning an empty result set. In the patched code the iteration won't happen at all and result is returned immediately.

This change is inspired by profiling a relatively large terraform configuration, where the time to validate was sped up 4x with this change.
This commit is contained in:
Dennis Gursky 2021-03-24 10:04:37 -07:00 committed by GitHub
parent 07ecfb13f0
commit 550de86135
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 36 additions and 7 deletions

View File

@ -38,17 +38,18 @@ func (s Set) Include(v interface{}) bool {
// Intersection computes the set intersection with other.
func (s Set) Intersection(other Set) Set {
result := make(Set)
if s == nil {
if s == nil || other == nil {
return result
}
if other != nil {
for _, v := range s {
if other.Include(v) {
result.Add(v)
}
// Iteration over a smaller set has better performance.
if other.Len() < s.Len() {
s, other = other, s
}
for _, v := range s {
if other.Include(v) {
result.Add(v)
}
}
return result
}

View File

@ -119,3 +119,31 @@ func TestSetCopy(t *testing.T) {
}
}
func makeSet(n int) Set {
ret := make(Set, n)
for i := 0; i < n; i++ {
ret.Add(i)
}
return ret
}
func BenchmarkSetIntersection_100_100000(b *testing.B) {
small := makeSet(100)
large := makeSet(100000)
b.ResetTimer()
for n := 0; n < b.N; n++ {
small.Intersection(large)
}
}
func BenchmarkSetIntersection_100000_100(b *testing.B) {
small := makeSet(100)
large := makeSet(100000)
b.ResetTimer()
for n := 0; n < b.N; n++ {
large.Intersection(small)
}
}