vetrag/vendor/github.com/mfridman/interpolate/parser.go

282 lines
7.6 KiB
Go

package interpolate
import (
"fmt"
"strconv"
"strings"
"unicode"
"unicode/utf8"
)
// This is a recursive descent parser for our grammar. Because it can contain nested expressions
// like ${LLAMAS:-${ROCK:-true}} we can't use regular expressions. The simplest possible alternative
// is a recursive parser like this. It parses a chunk and then calls a function to parse that
// further and so on and so forth. It results in a tree of objects that represent the things we've
// parsed (an AST). This means that the logic for how expansions work lives in those objects, and
// the logic for how we go from plain text to parsed objects lives here.
//
// To keep things simple, we do our "lexing" or "scanning" just as a few functions at the end of the
// file rather than as a dedicated lexer that emits tokens. This matches the simplicity of the
// format we are parsing relatively well
//
// Below is an EBNF grammar for the language. The parser was built by basically turning this into
// functions and structs named the same reading the string bite by bite (peekRune and nextRune)
/*
EscapedBackslash = "\\" EscapedDollar = ( "\$" | "$$") Identifier = letter { letters |
digit | "_" } Expansion = "$" ( Identifier | Brace ) Brace = "{" Identifier [
Identifier BraceOperation ] "}" Text = { EscapedBackslash | EscapedDollar | all characters except
"$" } Expression = { Text | Expansion } EmptyValue = ":-" { Expression } UnsetValue =
"-" { Expression } Substring = ":" number [ ":" number ] Required = "?" { Expression }
Operation = EmptyValue | UnsetValue | Substring | Required
*/
const (
eof = -1
)
// Parser takes a string and parses out a tree of structs that represent text and Expansions
type Parser struct {
input string // the string we are scanning
pos int // the current position
}
// NewParser returns a new instance of a Parser
func NewParser(str string) *Parser {
return &Parser{
input: str,
pos: 0,
}
}
// Parse expansions out of the internal text and return them as a tree of Expressions
func (p *Parser) Parse() (Expression, error) {
return p.parseExpression()
}
func (p *Parser) parseExpression(stop ...rune) (Expression, error) {
var expr Expression
var stopStr = string(stop)
for {
c := p.peekRune()
if c == eof || strings.ContainsRune(stopStr, c) {
break
}
// check for our escaped characters first, as we assume nothing subsequently is escaped
if strings.HasPrefix(p.input[p.pos:], `\\`) {
p.pos += 2
expr = append(expr, ExpressionItem{Text: `\\`})
continue
} else if strings.HasPrefix(p.input[p.pos:], `\$`) || strings.HasPrefix(p.input[p.pos:], `$$`) {
p.pos += 2
expr = append(expr, ExpressionItem{Text: `$`})
continue
}
// Ignore bash shell expansions
if strings.HasPrefix(p.input[p.pos:], `$(`) {
p.pos += 2
expr = append(expr, ExpressionItem{Text: `$(`})
continue
}
// If we run into a dollar sign and it's not the last char, it's an expansion
if c == '$' && p.pos < (len(p.input)-1) {
expansion, err := p.parseExpansion()
if err != nil {
return nil, err
}
expr = append(expr, ExpressionItem{Expansion: expansion})
continue
}
// nibble a character, otherwise if it's a \ or a $ we can loop
c = p.nextRune()
// Scan as much as we can into text
text := p.scanUntil(func(r rune) bool {
return (r == '$' || r == '\\' || strings.ContainsRune(stopStr, r))
})
expr = append(expr, ExpressionItem{Text: string(c) + text})
}
return expr, nil
}
func (p *Parser) parseExpansion() (Expansion, error) {
if c := p.nextRune(); c != '$' {
return nil, fmt.Errorf("Expected expansion to start with $, got %c", c)
}
// if we have an open brace, this is a brace expansion
if c := p.peekRune(); c == '{' {
return p.parseBraceExpansion()
}
identifier, err := p.scanIdentifier()
if err != nil {
return nil, err
}
return VariableExpansion{Identifier: identifier}, nil
}
func (p *Parser) parseBraceExpansion() (Expansion, error) {
if c := p.nextRune(); c != '{' {
return nil, fmt.Errorf("Expected brace expansion to start with {, got %c", c)
}
identifier, err := p.scanIdentifier()
if err != nil {
return nil, err
}
if c := p.peekRune(); c == '}' {
_ = p.nextRune()
return VariableExpansion{Identifier: identifier}, nil
}
var operator string
var exp Expansion
// Parse an operator, some trickery is needed to handle : vs :-
if op1 := p.nextRune(); op1 == ':' {
if op2 := p.peekRune(); op2 == '-' {
_ = p.nextRune()
operator = ":-"
} else {
operator = ":"
}
} else if op1 == '?' || op1 == '-' {
operator = string(op1)
} else {
return nil, fmt.Errorf("Expected an operator, got %c", op1)
}
switch operator {
case `:-`:
exp, err = p.parseEmptyValueExpansion(identifier)
if err != nil {
return nil, err
}
case `-`:
exp, err = p.parseUnsetValueExpansion(identifier)
if err != nil {
return nil, err
}
case `:`:
exp, err = p.parseSubstringExpansion(identifier)
if err != nil {
return nil, err
}
case `?`:
exp, err = p.parseRequiredExpansion(identifier)
if err != nil {
return nil, err
}
}
if c := p.nextRune(); c != '}' {
return nil, fmt.Errorf("Expected brace expansion to end with }, got %c", c)
}
return exp, nil
}
func (p *Parser) parseEmptyValueExpansion(identifier string) (Expansion, error) {
// parse an expression (text and expansions) up until the end of the brace
expr, err := p.parseExpression('}')
if err != nil {
return nil, err
}
return EmptyValueExpansion{Identifier: identifier, Content: expr}, nil
}
func (p *Parser) parseUnsetValueExpansion(identifier string) (Expansion, error) {
expr, err := p.parseExpression('}')
if err != nil {
return nil, err
}
return UnsetValueExpansion{Identifier: identifier, Content: expr}, nil
}
func (p *Parser) parseSubstringExpansion(identifier string) (Expansion, error) {
offset := p.scanUntil(func(r rune) bool {
return r == ':' || r == '}'
})
offsetInt, err := strconv.Atoi(strings.TrimSpace(offset))
if err != nil {
return nil, fmt.Errorf("Unable to parse offset: %v", err)
}
if c := p.peekRune(); c == '}' {
return SubstringExpansion{Identifier: identifier, Offset: offsetInt}, nil
}
_ = p.nextRune()
length := p.scanUntil(func(r rune) bool {
return r == '}'
})
lengthInt, err := strconv.Atoi(strings.TrimSpace(length))
if err != nil {
return nil, fmt.Errorf("Unable to parse length: %v", err)
}
return SubstringExpansion{Identifier: identifier, Offset: offsetInt, Length: lengthInt, HasLength: true}, nil
}
func (p *Parser) parseRequiredExpansion(identifier string) (Expansion, error) {
expr, err := p.parseExpression('}')
if err != nil {
return nil, err
}
return RequiredExpansion{Identifier: identifier, Message: expr}, nil
}
func (p *Parser) scanUntil(f func(rune) bool) string {
start := p.pos
for int(p.pos) < len(p.input) {
c, size := utf8.DecodeRuneInString(p.input[p.pos:])
if c == utf8.RuneError || f(c) {
break
}
p.pos += size
}
return p.input[start:p.pos]
}
func (p *Parser) scanIdentifier() (string, error) {
if c := p.peekRune(); !unicode.IsLetter(c) {
return "", fmt.Errorf("Expected identifier to start with a letter, got %c", c)
}
var notIdentifierChar = func(r rune) bool {
return (!unicode.IsLetter(r) && !unicode.IsNumber(r) && r != '_')
}
return p.scanUntil(notIdentifierChar), nil
}
func (p *Parser) nextRune() rune {
if int(p.pos) >= len(p.input) {
return eof
}
c, size := utf8.DecodeRuneInString(p.input[p.pos:])
p.pos += size
return c
}
func (p *Parser) peekRune() rune {
if int(p.pos) >= len(p.input) {
return eof
}
c, _ := utf8.DecodeRuneInString(p.input[p.pos:])
return c
}