terraform/config/lang/lex.go

package lang

import (
	"bytes"
	"fmt"
	"strconv"
	"unicode"
	"unicode/utf8"

	"github.com/hashicorp/terraform/config/lang/ast"
)

//go:generate go tool yacc -p parser lang.y

// The parser expects the lexer to return 0 on EOF.
const lexEOF = 0

// The parser uses the type <prefix>Lex as a lexer.  It must provide
// the methods Lex(*<prefix>SymType) int and Error(string).
type parserLex struct {
	Err   error
	Input string

	mode               parserMode
	interpolationDepth int
	pos                int
	width              int
	col, line          int
	lastLine           int
	astPos             *ast.Pos
}

// parserToken is the token yielded to the parser. The value can be
// determined within the parser type based on the enum value returned
// from Lex.
type parserToken struct {
	Value interface{}
	Pos   ast.Pos
}

// parserMode keeps track of what mode we're in for the parser. We have
// two modes: literal and interpolation. Literal mode is when strings
// don't have to be quoted, and interpolations are defined as ${foo}.
// Interpolation mode means that strings have to be quoted and unquoted
// things are identifiers, such as foo("bar").
type parserMode uint8

const (
	parserModeInvalid parserMode = 0
	parserModeLiteral            = 1 << iota
	parserModeInterpolation
)

// The parser calls this method to get each new token.
func (x *parserLex) Lex(yylval *parserSymType) int {
	// We always start in literal mode, since programs don't start
	// in an interpolation. ex. "foo ${bar}" vs "bar" (and assuming interp.)
	if x.mode == parserModeInvalid {
		x.mode = parserModeLiteral
	}

	// Defer an update to set the proper column/line we read the next token.
	defer func() {
		if yylval.token != nil && yylval.token.Pos.Column == 0 {
			yylval.token.Pos = *x.astPos
		}
	}()

	x.astPos = nil
	return x.lex(yylval)
}

func (x *parserLex) lex(yylval *parserSymType) int {
	switch x.mode {
	case parserModeLiteral:
		return x.lexModeLiteral(yylval)
	case parserModeInterpolation:
		return x.lexModeInterpolation(yylval)
	default:
		x.Error(fmt.Sprintf("Unknown parse mode: %d", x.mode))
		return lexEOF
	}
}

func (x *parserLex) lexModeLiteral(yylval *parserSymType) int {
	for {
		c := x.next()
		if c == lexEOF {
			return lexEOF
		}

		// Are we starting an interpolation?
		if c == '$' && x.peek() == '{' {
			x.next()
			x.interpolationDepth++
			x.mode = parserModeInterpolation
			return PROGRAM_BRACKET_LEFT
		}

		// We're just a normal string that isn't part of any interpolation yet.
		x.backup()
		result, terminated := x.lexString(yylval, x.interpolationDepth > 0)

		// If the string terminated and we're within an interpolation already
		// then that means that we finished a nested string, so pop
		// back out to interpolation mode.
		if terminated && x.interpolationDepth > 0 {
			x.mode = parserModeInterpolation

			// If the string is empty, just skip it. We're still in
			// an interpolation so we do this to avoid empty nodes.
			if yylval.token.Value.(string) == "" {
				return x.lex(yylval)
			}
		}

		return result
	}
}

func (x *parserLex) lexModeInterpolation(yylval *parserSymType) int {
	for {
		c := x.next()
		if c == lexEOF {
			return lexEOF
		}

		// Ignore all whitespace
		if unicode.IsSpace(c) {
			continue
		}

		// If we see a double quote then we're lexing a string since
		// we're in interpolation mode.
		if c == '"' {
			result, terminated := x.lexString(yylval, true)
			if !terminated {
				// The string didn't end, which means that we're in the
				// middle of starting another interpolation.
				x.mode = parserModeLiteral

				// If the string is empty and we're starting an interpolation,
				// then just skip it to avoid empty string AST nodes
				if yylval.token.Value.(string) == "" {
					return x.lex(yylval)
				}
			}

			return result
		}

		// If we are seeing a number, it is the start of a number. Lex it.
		if c >= '0' && c <= '9' {
			x.backup()
			return x.lexNumber(yylval)
		}

		switch c {
		case '}':
			// '}' means we ended the interpolation. Pop back into
			// literal mode and reduce our interpolation depth.
			x.interpolationDepth--
			x.mode = parserModeLiteral
			return PROGRAM_BRACKET_RIGHT
		case '(':
			return PAREN_LEFT
		case ')':
			return PAREN_RIGHT
		case ',':
			return COMMA
		default:
			x.backup()
			return x.lexId(yylval)
		}
	}
}

func (x *parserLex) lexId(yylval *parserSymType) int {
	var b bytes.Buffer
	for {
		c := x.next()
		if c == lexEOF {
			break
		}

		// If this isn't a character we want in an ID, return out.
		// One day we should make this a regexp.
		if c != '_' &&
			c != '-' &&
			c != '.' &&
			c != '*' &&
			!unicode.IsLetter(c) &&
			!unicode.IsNumber(c) {
			x.backup()
			break
		}

		if _, err := b.WriteRune(c); err != nil {
			x.Error(err.Error())
			return lexEOF
		}
	}

	yylval.token = &parserToken{Value: b.String()}
	return IDENTIFIER
}

// lexNumber lexes out a number: an integer or a float.
func (x *parserLex) lexNumber(yylval *parserSymType) int {
	var b bytes.Buffer
	gotPeriod := false
	for {
		c := x.next()
		if c == lexEOF {
			break
		}

		// If we see a period, we might be getting a float..
		if c == '.' {
			// If we've already seen a period, then ignore it, and
			// exit. This will probably result in a syntax error later.
			if gotPeriod {
				x.backup()
				break
			}

			gotPeriod = true
		} else if c < '0' || c > '9' {
			// If we're not seeing a number, then also exit.
			x.backup()
			break
		}

		if _, err := b.WriteRune(c); err != nil {
			x.Error(fmt.Sprintf("internal error: %s", err))
			return lexEOF
		}
	}

	// If we didn't see a period, it is an int
	if !gotPeriod {
		v, err := strconv.ParseInt(b.String(), 0, 0)
		if err != nil {
			x.Error(fmt.Sprintf("expected number: %s", err))
			return lexEOF
		}

		yylval.token = &parserToken{Value: int(v)}
		return INTEGER
	}

	// If we did see a period, it is a float
	f, err := strconv.ParseFloat(b.String(), 64)
	if err != nil {
		x.Error(fmt.Sprintf("expected float: %s", err))
		return lexEOF
	}

	yylval.token = &parserToken{Value: f}
	return FLOAT
}

func (x *parserLex) lexString(yylval *parserSymType, quoted bool) (int, bool) {
	var b bytes.Buffer
	terminated := false
	for {
		c := x.next()
		if c == lexEOF {
			if quoted {
				x.Error("unterminated string")
			}

			break
		}

		// Behavior is a bit different if we're lexing within a quoted string.
		if quoted {
			// If its a double quote, we've reached the end of the string
			if c == '"' {
				terminated = true
				break
			}

			// Let's check to see if we're escaping anything.
			if c == '\\' {
				switch n := x.next(); n {
				case '\\':
					fallthrough
				case '"':
					c = n
				case 'n':
					c = '\n'
				default:
					x.backup()
				}
			}
		}

		// If we hit a dollar sign, then check if we're starting
		// another interpolation. If so, then we're done.
		if c == '$' {
			n := x.peek()

			// If it is '{', then we're starting another interpolation
			if n == '{' {
				x.backup()
				break
			}

			// If it is '$', then we're escaping a dollar sign
			if n == '$' {
				x.next()
			}
		}

		if _, err := b.WriteRune(c); err != nil {
			x.Error(err.Error())
			return lexEOF, false
		}
	}

	yylval.token = &parserToken{Value: b.String()}
	return STRING, terminated
}

// Return the next rune for the lexer.
func (x *parserLex) next() rune {
	if int(x.pos) >= len(x.Input) {
		x.width = 0
		return lexEOF
	}

	r, w := utf8.DecodeRuneInString(x.Input[x.pos:])
	x.width = w
	x.pos += x.width

	if x.line == 0 {
		x.line = 1
		x.col = 1
	} else {
		x.col += 1
	}

	if r == '\n' {
		x.lastLine = x.col
		x.line += 1
		x.col = 1
	}

	if x.astPos == nil {
		x.astPos = &ast.Pos{Column: x.col, Line: x.line}
	}

	return r
}

// peek returns but does not consume the next rune in the input
func (x *parserLex) peek() rune {
	r := x.next()
	x.backup()
	return r
}

// backup steps back one rune. Can only be called once per next.
func (x *parserLex) backup() {
	x.pos -= x.width
	x.col -= 1

	// If we are at column 0, we're backing up across a line boundary
	// so we need to be careful to get the proper value.
	if x.col == 0 {
		x.col = x.lastLine
		x.line -= 1
	}
}

// The parser calls this method on a parse error.
func (x *parserLex) Error(s string) {
	x.Err = fmt.Errorf("parse error: %s", s)
}
config/lang: initial work 2015-01-11 21:38:45 +01:00			`package lang`

			`import (`
			`"bytes"`
			`"fmt"`
config/lang: integer and float types 2015-01-12 17:53:27 +01:00			`"strconv"`
config/lang: initial work 2015-01-11 21:38:45 +01:00			`"unicode"`
			`"unicode/utf8"`
config/lang: have position in AST 2015-01-12 09:28:47 +01:00
			`"github.com/hashicorp/terraform/config/lang/ast"`
config/lang: initial work 2015-01-11 21:38:45 +01:00			`)`

			`//go:generate go tool yacc -p parser lang.y`

			`// The parser expects the lexer to return 0 on EOF.`
			`const lexEOF = 0`

			`// The parser uses the type <prefix>Lex as a lexer. It must provide`
			`// the methods Lex(*<prefix>SymType) int and Error(string).`
			`type parserLex struct {`
			`Err error`
			`Input string`

config/lang: can parse nested interpolations 2015-01-11 22:59:24 +01:00			`mode parserMode`
config/lang: initial work 2015-01-11 21:38:45 +01:00			`interpolationDepth int`
			`pos int`
			`width int`
config/lang: have position in AST 2015-01-12 09:28:47 +01:00			`col, line int`
			`lastLine int`
			`astPos *ast.Pos`
			`}`

			`// parserToken is the token yielded to the parser. The value can be`
			`// determined within the parser type based on the enum value returned`
			`// from Lex.`
			`type parserToken struct {`
			`Value interface{}`
			`Pos ast.Pos`
config/lang: initial work 2015-01-11 21:38:45 +01:00			`}`

config/lang: can parse nested interpolations 2015-01-11 22:59:24 +01:00			`// parserMode keeps track of what mode we're in for the parser. We have`
			`// two modes: literal and interpolation. Literal mode is when strings`
			`// don't have to be quoted, and interpolations are defined as ${foo}.`
			`// Interpolation mode means that strings have to be quoted and unquoted`
			`// things are identifiers, such as foo("bar").`
			`type parserMode uint8`

			`const (`
			`parserModeInvalid parserMode = 0`
			`parserModeLiteral = 1 << iota`
			`parserModeInterpolation`
			`)`

config/lang: initial work 2015-01-11 21:38:45 +01:00			`// The parser calls this method to get each new token.`
			`func (x parserLex) Lex(yylval parserSymType) int {`
config/lang: remove unused code 2015-01-14 19:11:29 +01:00			`// We always start in literal mode, since programs don't start`
			`// in an interpolation. ex. "foo ${bar}" vs "bar" (and assuming interp.)`
config/lang: can parse nested interpolations 2015-01-11 22:59:24 +01:00			`if x.mode == parserModeInvalid {`
			`x.mode = parserModeLiteral`
			`}`

config/lang: remove unused code 2015-01-14 19:11:29 +01:00			`// Defer an update to set the proper column/line we read the next token.`
config/lang: have position in AST 2015-01-12 09:28:47 +01:00			`defer func() {`
			`if yylval.token != nil && yylval.token.Pos.Column == 0 {`
			`yylval.token.Pos = *x.astPos`
			`}`
			`}()`

			`x.astPos = nil`
			`return x.lex(yylval)`
			`}`

			`func (x parserLex) lex(yylval parserSymType) int {`
config/lang: can parse nested interpolations 2015-01-11 22:59:24 +01:00			`switch x.mode {`
			`case parserModeLiteral:`
			`return x.lexModeLiteral(yylval)`
			`case parserModeInterpolation:`
			`return x.lexModeInterpolation(yylval)`
			`default:`
config/lang: address vet reports Fixes the following vet reports: config/lang/check_types.go:98: arg n for printf verb %d of wrong type: *github.com/hashicorp/terraform/config/lang/ast.Concat config/lang/lex.go:80: arg x.mode for printf verb %s of wrong type: lang.parserMode 2015-01-14 22:04:28 +01:00			`x.Error(fmt.Sprintf("Unknown parse mode: %d", x.mode))`
config/lang: can parse nested interpolations 2015-01-11 22:59:24 +01:00			`return lexEOF`
			`}`
			`}`

			`func (x parserLex) lexModeLiteral(yylval parserSymType) int {`
config/lang: initial work 2015-01-11 21:38:45 +01:00			`for {`
			`c := x.next()`
			`if c == lexEOF {`
			`return lexEOF`
			`}`

			`// Are we starting an interpolation?`
			`if c == '$' && x.peek() == '{' {`
			`x.next()`
			`x.interpolationDepth++`
config/lang: can parse nested interpolations 2015-01-11 22:59:24 +01:00			`x.mode = parserModeInterpolation`
config/lang: initial work 2015-01-11 21:38:45 +01:00			`return PROGRAM_BRACKET_LEFT`
			`}`

config/lang: can parse nested interpolations 2015-01-11 22:59:24 +01:00			`// We're just a normal string that isn't part of any interpolation yet.`
			`x.backup()`
			`result, terminated := x.lexString(yylval, x.interpolationDepth > 0)`

			`// If the string terminated and we're within an interpolation already`
			`// then that means that we finished a nested string, so pop`
			`// back out to interpolation mode.`
			`if terminated && x.interpolationDepth > 0 {`
			`x.mode = parserModeInterpolation`

			`// If the string is empty, just skip it. We're still in`
			`// an interpolation so we do this to avoid empty nodes.`
config/lang: have position in AST 2015-01-12 09:28:47 +01:00			`if yylval.token.Value.(string) == "" {`
			`return x.lex(yylval)`
config/lang: can parse nested interpolations 2015-01-11 22:59:24 +01:00			`}`
			`}`

			`return result`
			`}`
			`}`

			`func (x parserLex) lexModeInterpolation(yylval parserSymType) int {`
			`for {`
			`c := x.next()`
			`if c == lexEOF {`
			`return lexEOF`
config/lang: more implemented 2015-01-11 22:03:37 +01:00			`}`

			`// Ignore all whitespace`
			`if unicode.IsSpace(c) {`
			`continue`
			`}`

config/lang: remove unused code 2015-01-14 19:11:29 +01:00			`// If we see a double quote then we're lexing a string since`
			`// we're in interpolation mode.`
config/lang: more implemented 2015-01-11 22:03:37 +01:00			`if c == '"' {`
config/lang: can parse nested interpolations 2015-01-11 22:59:24 +01:00			`result, terminated := x.lexString(yylval, true)`
			`if !terminated {`
			`// The string didn't end, which means that we're in the`
			`// middle of starting another interpolation.`
			`x.mode = parserModeLiteral`

			`// If the string is empty and we're starting an interpolation,`
			`// then just skip it to avoid empty string AST nodes`
config/lang: have position in AST 2015-01-12 09:28:47 +01:00			`if yylval.token.Value.(string) == "" {`
			`return x.lex(yylval)`
config/lang: can parse nested interpolations 2015-01-11 22:59:24 +01:00			`}`
			`}`

			`return result`
config/lang: initial work 2015-01-11 21:38:45 +01:00			`}`

config/lang: integer and float types 2015-01-12 17:53:27 +01:00			`// If we are seeing a number, it is the start of a number. Lex it.`
			`if c >= '0' && c <= '9' {`
			`x.backup()`
			`return x.lexNumber(yylval)`
			`}`

config/lang: initial work 2015-01-11 21:38:45 +01:00			`switch c {`
			`case '}':`
config/lang: remove unused code 2015-01-14 19:11:29 +01:00			`// '}' means we ended the interpolation. Pop back into`
			`// literal mode and reduce our interpolation depth.`
config/lang: initial work 2015-01-11 21:38:45 +01:00			`x.interpolationDepth--`
config/lang: can parse nested interpolations 2015-01-11 22:59:24 +01:00			`x.mode = parserModeLiteral`
config/lang: initial work 2015-01-11 21:38:45 +01:00			`return PROGRAM_BRACKET_RIGHT`
config/lang: more implemented 2015-01-11 22:03:37 +01:00			`case '(':`
			`return PAREN_LEFT`
			`case ')':`
			`return PAREN_RIGHT`
			`case ',':`
			`return COMMA`
config/lang: initial work 2015-01-11 21:38:45 +01:00			`default:`
			`x.backup()`
config/lang: more implemented 2015-01-11 22:03:37 +01:00			`return x.lexId(yylval)`
config/lang: initial work 2015-01-11 21:38:45 +01:00			`}`
			`}`
			`}`

			`func (x parserLex) lexId(yylval parserSymType) int {`
			`var b bytes.Buffer`
			`for {`
			`c := x.next()`
			`if c == lexEOF {`
			`break`
			`}`

			`// If this isn't a character we want in an ID, return out.`
			`// One day we should make this a regexp.`
			`if c != '_' &&`
			`c != '-' &&`
			`c != '.' &&`
			`c != '*' &&`
			`!unicode.IsLetter(c) &&`
			`!unicode.IsNumber(c) {`
			`x.backup()`
			`break`
			`}`

			`if _, err := b.WriteRune(c); err != nil {`
			`x.Error(err.Error())`
			`return lexEOF`
			`}`
			`}`

config/lang: have position in AST 2015-01-12 09:28:47 +01:00			`yylval.token = &parserToken{Value: b.String()}`
config/lang: initial work 2015-01-11 21:38:45 +01:00			`return IDENTIFIER`
			`}`

config/lang: integer and float types 2015-01-12 17:53:27 +01:00			`// lexNumber lexes out a number: an integer or a float.`
			`func (x parserLex) lexNumber(yylval parserSymType) int {`
			`var b bytes.Buffer`
			`gotPeriod := false`
			`for {`
			`c := x.next()`
			`if c == lexEOF {`
			`break`
			`}`

			`// If we see a period, we might be getting a float..`
			`if c == '.' {`
			`// If we've already seen a period, then ignore it, and`
			`// exit. This will probably result in a syntax error later.`
			`if gotPeriod {`
			`x.backup()`
			`break`
			`}`

			`gotPeriod = true`
			`} else if c < '0' \|\| c > '9' {`
			`// If we're not seeing a number, then also exit.`
			`x.backup()`
			`break`
			`}`

			`if _, err := b.WriteRune(c); err != nil {`
			`x.Error(fmt.Sprintf("internal error: %s", err))`
			`return lexEOF`
			`}`
			`}`

			`// If we didn't see a period, it is an int`
			`if !gotPeriod {`
			`v, err := strconv.ParseInt(b.String(), 0, 0)`
			`if err != nil {`
			`x.Error(fmt.Sprintf("expected number: %s", err))`
			`return lexEOF`
			`}`

			`yylval.token = &parserToken{Value: int(v)}`
			`return INTEGER`
			`}`

			`// If we did see a period, it is a float`
			`f, err := strconv.ParseFloat(b.String(), 64)`
			`if err != nil {`
			`x.Error(fmt.Sprintf("expected float: %s", err))`
			`return lexEOF`
			`}`

			`yylval.token = &parserToken{Value: f}`
			`return FLOAT`
			`}`

config/lang: can parse nested interpolations 2015-01-11 22:59:24 +01:00			`func (x parserLex) lexString(yylval parserSymType, quoted bool) (int, bool) {`
config/lang: initial work 2015-01-11 21:38:45 +01:00			`var b bytes.Buffer`
config/lang: can parse nested interpolations 2015-01-11 22:59:24 +01:00			`terminated := false`
config/lang: initial work 2015-01-11 21:38:45 +01:00			`for {`
			`c := x.next()`
			`if c == lexEOF {`
config/lang: can parse nested interpolations 2015-01-11 22:59:24 +01:00			`if quoted {`
			`x.Error("unterminated string")`
			`}`

config/lang: initial work 2015-01-11 21:38:45 +01:00			`break`
			`}`

			`// Behavior is a bit different if we're lexing within a quoted string.`
			`if quoted {`
			`// If its a double quote, we've reached the end of the string`
			`if c == '"' {`
config/lang: can parse nested interpolations 2015-01-11 22:59:24 +01:00			`terminated = true`
config/lang: initial work 2015-01-11 21:38:45 +01:00			`break`
			`}`

			`// Let's check to see if we're escaping anything.`
			`if c == '\\' {`
			`switch n := x.next(); n {`
			`case '\\':`
			`fallthrough`
			`case '"':`
			`c = n`
			`case 'n':`
			`c = '\n'`
			`default:`
			`x.backup()`
			`}`
			`}`
			`}`

			`// If we hit a dollar sign, then check if we're starting`
			`// another interpolation. If so, then we're done.`
config/lang: escaping interpolations with double dollar signs 2015-01-12 19:59:43 +01:00			`if c == '$' {`
			`n := x.peek()`

			`// If it is '{', then we're starting another interpolation`
			`if n == '{' {`
			`x.backup()`
			`break`
			`}`

			`// If it is '$', then we're escaping a dollar sign`
			`if n == '$' {`
			`x.next()`
			`}`
config/lang: initial work 2015-01-11 21:38:45 +01:00			`}`

			`if _, err := b.WriteRune(c); err != nil {`
			`x.Error(err.Error())`
config/lang: can parse nested interpolations 2015-01-11 22:59:24 +01:00			`return lexEOF, false`
config/lang: initial work 2015-01-11 21:38:45 +01:00			`}`
			`}`

config/lang: have position in AST 2015-01-12 09:28:47 +01:00			`yylval.token = &parserToken{Value: b.String()}`
config/lang: can parse nested interpolations 2015-01-11 22:59:24 +01:00			`return STRING, terminated`
config/lang: initial work 2015-01-11 21:38:45 +01:00			`}`

			`// Return the next rune for the lexer.`
			`func (x *parserLex) next() rune {`
			`if int(x.pos) >= len(x.Input) {`
			`x.width = 0`
			`return lexEOF`
			`}`

			`r, w := utf8.DecodeRuneInString(x.Input[x.pos:])`
			`x.width = w`
			`x.pos += x.width`
config/lang: have position in AST 2015-01-12 09:28:47 +01:00
			`if x.line == 0 {`
			`x.line = 1`
			`x.col = 1`
			`} else {`
			`x.col += 1`
			`}`

			`if r == '\n' {`
			`x.lastLine = x.col`
			`x.line += 1`
			`x.col = 1`
			`}`

			`if x.astPos == nil {`
			`x.astPos = &ast.Pos{Column: x.col, Line: x.line}`
			`}`

config/lang: initial work 2015-01-11 21:38:45 +01:00			`return r`
			`}`

			`// peek returns but does not consume the next rune in the input`
			`func (x *parserLex) peek() rune {`
			`r := x.next()`
			`x.backup()`
			`return r`
			`}`

			`// backup steps back one rune. Can only be called once per next.`
			`func (x *parserLex) backup() {`
			`x.pos -= x.width`
config/lang: have position in AST 2015-01-12 09:28:47 +01:00			`x.col -= 1`

			`// If we are at column 0, we're backing up across a line boundary`
			`// so we need to be careful to get the proper value.`
			`if x.col == 0 {`
			`x.col = x.lastLine`
			`x.line -= 1`
			`}`
config/lang: initial work 2015-01-11 21:38:45 +01:00			`}`

			`// The parser calls this method on a parse error.`
			`func (x *parserLex) Error(s string) {`
			`x.Err = fmt.Errorf("parse error: %s", s)`
			`}`