diff --git a/svchost/label_iter.go b/svchost/label_iter.go new file mode 100644 index 000000000..af8ccbab2 --- /dev/null +++ b/svchost/label_iter.go @@ -0,0 +1,69 @@ +package svchost + +import ( + "strings" +) + +// A labelIter allows iterating over domain name labels. +// +// This type is copied from golang.org/x/net/idna, where it is used +// to segment hostnames into their separate labels for analysis. We use +// it for the same purpose here, in ForComparison. +type labelIter struct { + orig string + slice []string + curStart int + curEnd int + i int +} + +func (l *labelIter) reset() { + l.curStart = 0 + l.curEnd = 0 + l.i = 0 +} + +func (l *labelIter) done() bool { + return l.curStart >= len(l.orig) +} + +func (l *labelIter) result() string { + if l.slice != nil { + return strings.Join(l.slice, ".") + } + return l.orig +} + +func (l *labelIter) label() string { + if l.slice != nil { + return l.slice[l.i] + } + p := strings.IndexByte(l.orig[l.curStart:], '.') + l.curEnd = l.curStart + p + if p == -1 { + l.curEnd = len(l.orig) + } + return l.orig[l.curStart:l.curEnd] +} + +// next sets the value to the next label. It skips the last label if it is empty. +func (l *labelIter) next() { + l.i++ + if l.slice != nil { + if l.i >= len(l.slice) || l.i == len(l.slice)-1 && l.slice[l.i] == "" { + l.curStart = len(l.orig) + } + } else { + l.curStart = l.curEnd + 1 + if l.curStart == len(l.orig)-1 && l.orig[l.curStart] == '.' { + l.curStart = len(l.orig) + } + } +} + +func (l *labelIter) set(s string) { + if l.slice == nil { + l.slice = strings.Split(l.orig, ".") + } + l.slice[l.i] = s +} diff --git a/svchost/svchost.go b/svchost/svchost.go new file mode 100644 index 000000000..4eded142c --- /dev/null +++ b/svchost/svchost.go @@ -0,0 +1,207 @@ +// Package svchost deals with the representations of the so-called "friendly +// hostnames" that we use to represent systems that provide Terraform-native +// remote services, such as module registry, remote operations, etc. +// +// Friendly hostnames are specified such that, as much as possible, they +// are consistent with how web browsers think of hostnames, so that users +// can bring their intuitions about how hostnames behave when they access +// a Terraform Enterprise instance's web UI (or indeed any other website) +// and have this behave in a similar way. +package svchost + +import ( + "errors" + "fmt" + "strconv" + "strings" + + "golang.org/x/net/idna" +) + +// Hostname is specialized name for string that indicates that the string +// has been converted to (or was already in) the storage and comparison form. +// +// Hostname values are not suitable for display in the user-interface. Use +// the ForDisplay method to obtain a form suitable for display in the UI. +// +// Unlike user-supplied hostnames, strings of type Hostname (assuming they +// were constructed by a function within this package) can be compared for +// equality using the standard Go == operator. +type Hostname string + +// acePrefix is the ASCII Compatible Encoding prefix, used to indicate that +// a domain name label is in "punycode" form. +const acePrefix = "xn--" + +// displayProfile is a very liberal idna profile that we use to do +// normalization for display without imposing validation rules. +var displayProfile = idna.New( + idna.MapForLookup(), + idna.Transitional(true), +) + +// ForDisplay takes a user-specified hostname and returns a normalized form of +// it suitable for display in the UI. +// +// If the input is so invalid that no normalization can be performed then +// this will return the input, assuming that the caller still wants to +// display _something_. This function is, however, more tolerant than the +// other functions in this package and will make a best effort to prepare +// _any_ given hostname for display. +// +// For validation, use either IsValid (for explicit validation) or +// ForComparison (which implicitly validates, returning an error if invalid). +func ForDisplay(given string) string { + var portPortion string + if colonPos := strings.Index(given, ":"); colonPos != -1 { + given, portPortion = given[:colonPos], given[colonPos:] + } + portPortion, _ = normalizePortPortion(portPortion) + + ascii, err := displayProfile.ToASCII(given) + if err != nil { + return given + portPortion + } + display, err := displayProfile.ToUnicode(ascii) + if err != nil { + return given + portPortion + } + return display + portPortion +} + +// IsValid returns true if the given user-specified hostname is a valid +// service hostname. +// +// Validity is determined by complying with the RFC 5891 requirements for +// names that are valid for domain lookup (section 5), with the additional +// requirement that user-supplied forms must not _already_ contain +// Punycode segments. +func IsValid(given string) bool { + _, err := ForComparison(given) + return err == nil +} + +// ForComparison takes a user-specified hostname and returns a normalized +// form of it suitable for storage and comparison. The result is not suitable +// for display to end-users because it uses Punycode to represent non-ASCII +// characters, and this form is unreadable for non-ASCII-speaking humans. +// +// The result is typed as Hostname -- a specialized name for string -- so that +// other APIs can make it clear within the type system whether they expect a +// user-specified or display-form hostname or a value already normalized for +// comparison. +// +// The returned Hostname is not valid if the returned error is non-nil. +func ForComparison(given string) (Hostname, error) { + var portPortion string + if colonPos := strings.Index(given, ":"); colonPos != -1 { + given, portPortion = given[:colonPos], given[colonPos:] + } + + var err error + portPortion, err = normalizePortPortion(portPortion) + if err != nil { + return Hostname(""), err + } + + if given == "" { + return Hostname(""), fmt.Errorf("empty string is not a valid hostname") + } + + // First we'll apply our additional constraint that Punycode must not + // be given directly by the user. This is not an IDN specification + // requirement, but we prohibit it to force users to use human-readable + // hostname forms within Terraform configuration. + labels := labelIter{orig: given} + for ; !labels.done(); labels.next() { + label := labels.label() + if label == "" { + return Hostname(""), fmt.Errorf( + "hostname contains empty label (two consecutive periods)", + ) + } + if strings.HasPrefix(label, acePrefix) { + return Hostname(""), fmt.Errorf( + "hostname label %q specified in punycode format; service hostnames must be given in unicode", + label, + ) + } + } + + result, err := idna.Lookup.ToASCII(given) + if err != nil { + return Hostname(""), err + } + return Hostname(result + portPortion), nil +} + +// ForDisplay returns a version of the receiver that is appropriate for display +// in the UI. This includes converting any punycode labels to their +// corresponding Unicode characters. +// +// A round-trip through ForComparison and this ForDisplay method does not +// guarantee the same result as calling this package's top-level ForDisplay +// function, since a round-trip through the Hostname type implies stricter +// handling than we do when doing basic display-only processing. +func (h Hostname) ForDisplay() string { + given := string(h) + var portPortion string + if colonPos := strings.Index(given, ":"); colonPos != -1 { + given, portPortion = given[:colonPos], given[colonPos:] + } + // We don't normalize the port portion here because we assume it's + // already been normalized on the way in. + + result, err := idna.Lookup.ToUnicode(given) + if err != nil { + // Should never happen, since type Hostname indicates that a string + // passed through our validation rules. + panic(fmt.Errorf("ForDisplay called on invalid Hostname: %s", err)) + } + return result + portPortion +} + +func (h Hostname) String() string { + return string(h) +} + +func (h Hostname) GoString() string { + return fmt.Sprintf("svchost.Hostname(%q)", string(h)) +} + +// normalizePortPortion attempts to normalize the "port portion" of a hostname, +// which begins with the first colon in the hostname and should be followed +// by a string of decimal digits. +// +// If the port portion is valid, a normalized version of it is returned along +// with a nil error. +// +// If the port portion is invalid, the input string is returned verbatim along +// with a non-nil error. +// +// An empty string is a valid port portion representing the absense of a port. +// If non-empty, the first character must be a colon. +func normalizePortPortion(s string) (string, error) { + if s == "" { + return s, nil + } + + if s[0] != ':' { + // should never happen, since caller tends to guarantee the presence + // of a colon due to how it's extracted from the string. + return s, errors.New("port portion is missing its initial colon") + } + + numStr := s[1:] + num, err := strconv.Atoi(numStr) + if err != nil { + return s, errors.New("port portion contains non-digit characters") + } + if num == 443 { + return "", nil // ":443" is the default + } + if num > 65535 { + return s, errors.New("port number is greater than 65535") + } + return fmt.Sprintf(":%d", num), nil +} diff --git a/svchost/svchost_test.go b/svchost/svchost_test.go new file mode 100644 index 000000000..f3cfc75c6 --- /dev/null +++ b/svchost/svchost_test.go @@ -0,0 +1,213 @@ +package svchost + +import "testing" + +func TestForDisplay(t *testing.T) { + tests := []struct { + Input string + Want string + }{ + { + "", + "", + }, + { + "example.com", + "example.com", + }, + { + "invalid", + "invalid", + }, + { + "localhost", + "localhost", + }, + { + "localhost:1211", + "localhost:1211", + }, + { + "HashiCorp.com", + "hashicorp.com", + }, + { + "Испытание.com", + "испытание.com", + }, + { + "münchen.de", // this is a precomposed u with diaeresis + "münchen.de", // this is a precomposed u with diaeresis + }, + { + "münchen.de", // this is a separate u and combining diaeresis + "münchen.de", // this is a precomposed u with diaeresis + }, + { + "example.com:443", + "example.com", + }, + { + "example.com:81", + "example.com:81", + }, + { + "example.com:boo", + "example.com:boo", // invalid, but tolerated for display purposes + }, + { + "example.com:boo:boo", + "example.com:boo:boo", // invalid, but tolerated for display purposes + }, + { + "example.com:081", + "example.com:81", + }, + } + + for _, test := range tests { + t.Run(test.Input, func(t *testing.T) { + got := ForDisplay(test.Input) + if got != test.Want { + t.Errorf("wrong result\ninput: %s\ngot: %s\nwant: %s", test.Input, got, test.Want) + } + }) + } +} + +func TestForComparison(t *testing.T) { + tests := []struct { + Input string + Want string + Err bool + }{ + { + "", + "", + true, + }, + { + "example.com", + "example.com", + false, + }, + { + "example.com:443", + "example.com", + false, + }, + { + "example.com:81", + "example.com:81", + false, + }, + { + "example.com:081", + "example.com:81", + false, + }, + { + "invalid", + "invalid", + false, // the "invalid" TLD is, confusingly, a valid hostname syntactically + }, + { + "localhost", // supported for local testing only + "localhost", + false, + }, + { + "localhost:1211", // supported for local testing only + "localhost:1211", + false, + }, + { + "HashiCorp.com", + "hashicorp.com", + false, + }, + { + "Испытание.com", + "xn--80akhbyknj4f.com", + false, + }, + { + "münchen.de", // this is a precomposed u with diaeresis + "xn--mnchen-3ya.de", + false, + }, + { + "münchen.de", // this is a separate u and combining diaeresis + "xn--mnchen-3ya.de", + false, + }, + { + "blah..blah", + "", + true, + }, + { + "example.com:boo", + "", + true, + }, + { + "example.com:80:boo", + "", + true, + }, + } + + for _, test := range tests { + t.Run(test.Input, func(t *testing.T) { + got, err := ForComparison(test.Input) + if (err != nil) != test.Err { + if test.Err { + t.Error("unexpected success; want error") + } else { + t.Errorf("unexpected error; want success\nerror: %s", err) + } + } + if string(got) != test.Want { + t.Errorf("wrong result\ninput: %s\ngot: %s\nwant: %s", test.Input, got, test.Want) + } + }) + } +} + +func TestHostnameForDisplay(t *testing.T) { + tests := []struct { + Input string + Want string + }{ + { + "example.com", + "example.com", + }, + { + "example.com:81", + "example.com:81", + }, + { + "xn--80akhbyknj4f.com", + "испытание.com", + }, + { + "xn--80akhbyknj4f.com:8080", + "испытание.com:8080", + }, + { + "xn--mnchen-3ya.de", + "münchen.de", // this is a precomposed u with diaeresis + }, + } + + for _, test := range tests { + t.Run(test.Input, func(t *testing.T) { + got := Hostname(test.Input).ForDisplay() + if got != test.Want { + t.Errorf("wrong result\ninput: %s\ngot: %s\nwant: %s", test.Input, got, test.Want) + } + }) + } +}