terraform/config/lang/lex.go

381 lines
8.1 KiB
Go

package lang
import (
"bytes"
"fmt"
"strconv"
"unicode"
"unicode/utf8"
"github.com/hashicorp/terraform/config/lang/ast"
)
//go:generate go tool yacc -p parser lang.y
// The parser expects the lexer to return 0 on EOF.
const lexEOF = 0
// The parser uses the type <prefix>Lex as a lexer. It must provide
// the methods Lex(*<prefix>SymType) int and Error(string).
type parserLex struct {
Err error
Input string
mode parserMode
interpolationDepth int
pos int
width int
col, line int
lastLine int
astPos *ast.Pos
}
// parserToken is the token yielded to the parser. The value can be
// determined within the parser type based on the enum value returned
// from Lex.
type parserToken struct {
Value interface{}
Pos ast.Pos
}
// parserMode keeps track of what mode we're in for the parser. We have
// two modes: literal and interpolation. Literal mode is when strings
// don't have to be quoted, and interpolations are defined as ${foo}.
// Interpolation mode means that strings have to be quoted and unquoted
// things are identifiers, such as foo("bar").
type parserMode uint8
const (
parserModeInvalid parserMode = 0
parserModeLiteral = 1 << iota
parserModeInterpolation
)
// The parser calls this method to get each new token.
func (x *parserLex) Lex(yylval *parserSymType) int {
// We always start in literal mode, since programs don't start
// in an interpolation. ex. "foo ${bar}" vs "bar" (and assuming interp.)
if x.mode == parserModeInvalid {
x.mode = parserModeLiteral
}
// Defer an update to set the proper column/line we read the next token.
defer func() {
if yylval.token != nil && yylval.token.Pos.Column == 0 {
yylval.token.Pos = *x.astPos
}
}()
x.astPos = nil
return x.lex(yylval)
}
func (x *parserLex) lex(yylval *parserSymType) int {
switch x.mode {
case parserModeLiteral:
return x.lexModeLiteral(yylval)
case parserModeInterpolation:
return x.lexModeInterpolation(yylval)
default:
x.Error(fmt.Sprintf("Unknown parse mode: %d", x.mode))
return lexEOF
}
}
func (x *parserLex) lexModeLiteral(yylval *parserSymType) int {
for {
c := x.next()
if c == lexEOF {
return lexEOF
}
// Are we starting an interpolation?
if c == '$' && x.peek() == '{' {
x.next()
x.interpolationDepth++
x.mode = parserModeInterpolation
return PROGRAM_BRACKET_LEFT
}
// We're just a normal string that isn't part of any interpolation yet.
x.backup()
result, terminated := x.lexString(yylval, x.interpolationDepth > 0)
// If the string terminated and we're within an interpolation already
// then that means that we finished a nested string, so pop
// back out to interpolation mode.
if terminated && x.interpolationDepth > 0 {
x.mode = parserModeInterpolation
// If the string is empty, just skip it. We're still in
// an interpolation so we do this to avoid empty nodes.
if yylval.token.Value.(string) == "" {
return x.lex(yylval)
}
}
return result
}
}
func (x *parserLex) lexModeInterpolation(yylval *parserSymType) int {
for {
c := x.next()
if c == lexEOF {
return lexEOF
}
// Ignore all whitespace
if unicode.IsSpace(c) {
continue
}
// If we see a double quote then we're lexing a string since
// we're in interpolation mode.
if c == '"' {
result, terminated := x.lexString(yylval, true)
if !terminated {
// The string didn't end, which means that we're in the
// middle of starting another interpolation.
x.mode = parserModeLiteral
// If the string is empty and we're starting an interpolation,
// then just skip it to avoid empty string AST nodes
if yylval.token.Value.(string) == "" {
return x.lex(yylval)
}
}
return result
}
// If we are seeing a number, it is the start of a number. Lex it.
if c >= '0' && c <= '9' {
x.backup()
return x.lexNumber(yylval)
}
switch c {
case '}':
// '}' means we ended the interpolation. Pop back into
// literal mode and reduce our interpolation depth.
x.interpolationDepth--
x.mode = parserModeLiteral
return PROGRAM_BRACKET_RIGHT
case '(':
return PAREN_LEFT
case ')':
return PAREN_RIGHT
case ',':
return COMMA
default:
x.backup()
return x.lexId(yylval)
}
}
}
func (x *parserLex) lexId(yylval *parserSymType) int {
var b bytes.Buffer
for {
c := x.next()
if c == lexEOF {
break
}
// If this isn't a character we want in an ID, return out.
// One day we should make this a regexp.
if c != '_' &&
c != '-' &&
c != '.' &&
c != '*' &&
!unicode.IsLetter(c) &&
!unicode.IsNumber(c) {
x.backup()
break
}
if _, err := b.WriteRune(c); err != nil {
x.Error(err.Error())
return lexEOF
}
}
yylval.token = &parserToken{Value: b.String()}
return IDENTIFIER
}
// lexNumber lexes out a number: an integer or a float.
func (x *parserLex) lexNumber(yylval *parserSymType) int {
var b bytes.Buffer
gotPeriod := false
for {
c := x.next()
if c == lexEOF {
break
}
// If we see a period, we might be getting a float..
if c == '.' {
// If we've already seen a period, then ignore it, and
// exit. This will probably result in a syntax error later.
if gotPeriod {
x.backup()
break
}
gotPeriod = true
} else if c < '0' || c > '9' {
// If we're not seeing a number, then also exit.
x.backup()
break
}
if _, err := b.WriteRune(c); err != nil {
x.Error(fmt.Sprintf("internal error: %s", err))
return lexEOF
}
}
// If we didn't see a period, it is an int
if !gotPeriod {
v, err := strconv.ParseInt(b.String(), 0, 0)
if err != nil {
x.Error(fmt.Sprintf("expected number: %s", err))
return lexEOF
}
yylval.token = &parserToken{Value: int(v)}
return INTEGER
}
// If we did see a period, it is a float
f, err := strconv.ParseFloat(b.String(), 64)
if err != nil {
x.Error(fmt.Sprintf("expected float: %s", err))
return lexEOF
}
yylval.token = &parserToken{Value: f}
return FLOAT
}
func (x *parserLex) lexString(yylval *parserSymType, quoted bool) (int, bool) {
var b bytes.Buffer
terminated := false
for {
c := x.next()
if c == lexEOF {
if quoted {
x.Error("unterminated string")
}
break
}
// Behavior is a bit different if we're lexing within a quoted string.
if quoted {
// If its a double quote, we've reached the end of the string
if c == '"' {
terminated = true
break
}
// Let's check to see if we're escaping anything.
if c == '\\' {
switch n := x.next(); n {
case '\\':
fallthrough
case '"':
c = n
case 'n':
c = '\n'
default:
x.backup()
}
}
}
// If we hit a dollar sign, then check if we're starting
// another interpolation. If so, then we're done.
if c == '$' {
n := x.peek()
// If it is '{', then we're starting another interpolation
if n == '{' {
x.backup()
break
}
// If it is '$', then we're escaping a dollar sign
if n == '$' {
x.next()
}
}
if _, err := b.WriteRune(c); err != nil {
x.Error(err.Error())
return lexEOF, false
}
}
yylval.token = &parserToken{Value: b.String()}
return STRING, terminated
}
// Return the next rune for the lexer.
func (x *parserLex) next() rune {
if int(x.pos) >= len(x.Input) {
x.width = 0
return lexEOF
}
r, w := utf8.DecodeRuneInString(x.Input[x.pos:])
x.width = w
x.pos += x.width
if x.line == 0 {
x.line = 1
x.col = 1
} else {
x.col += 1
}
if r == '\n' {
x.lastLine = x.col
x.line += 1
x.col = 1
}
if x.astPos == nil {
x.astPos = &ast.Pos{Column: x.col, Line: x.line}
}
return r
}
// peek returns but does not consume the next rune in the input
func (x *parserLex) peek() rune {
r := x.next()
x.backup()
return r
}
// backup steps back one rune. Can only be called once per next.
func (x *parserLex) backup() {
x.pos -= x.width
x.col -= 1
// If we are at column 0, we're backing up across a line boundary
// so we need to be careful to get the proper value.
if x.col == 0 {
x.col = x.lastLine
x.line -= 1
}
}
// The parser calls this method on a parse error.
func (x *parserLex) Error(s string) {
x.Err = fmt.Errorf("parse error: %s", s)
}