code.dwrz.net

Go monorepo.
Log | Files | Refs

parser.go (15311B)


      1 package jmespath
      2 
      3 import (
      4 	"encoding/json"
      5 	"fmt"
      6 	"strconv"
      7 	"strings"
      8 )
      9 
     10 type astNodeType int
     11 
     12 //go:generate stringer -type astNodeType
     13 const (
     14 	ASTEmpty astNodeType = iota
     15 	ASTComparator
     16 	ASTCurrentNode
     17 	ASTExpRef
     18 	ASTFunctionExpression
     19 	ASTField
     20 	ASTFilterProjection
     21 	ASTFlatten
     22 	ASTIdentity
     23 	ASTIndex
     24 	ASTIndexExpression
     25 	ASTKeyValPair
     26 	ASTLiteral
     27 	ASTMultiSelectHash
     28 	ASTMultiSelectList
     29 	ASTOrExpression
     30 	ASTAndExpression
     31 	ASTNotExpression
     32 	ASTPipe
     33 	ASTProjection
     34 	ASTSubexpression
     35 	ASTSlice
     36 	ASTValueProjection
     37 )
     38 
     39 // ASTNode represents the abstract syntax tree of a JMESPath expression.
     40 type ASTNode struct {
     41 	nodeType astNodeType
     42 	value    interface{}
     43 	children []ASTNode
     44 }
     45 
     46 func (node ASTNode) String() string {
     47 	return node.PrettyPrint(0)
     48 }
     49 
     50 // PrettyPrint will pretty print the parsed AST.
     51 // The AST is an implementation detail and this pretty print
     52 // function is provided as a convenience method to help with
     53 // debugging.  You should not rely on its output as the internal
     54 // structure of the AST may change at any time.
     55 func (node ASTNode) PrettyPrint(indent int) string {
     56 	spaces := strings.Repeat(" ", indent)
     57 	output := fmt.Sprintf("%s%s {\n", spaces, node.nodeType)
     58 	nextIndent := indent + 2
     59 	if node.value != nil {
     60 		if converted, ok := node.value.(fmt.Stringer); ok {
     61 			// Account for things like comparator nodes
     62 			// that are enums with a String() method.
     63 			output += fmt.Sprintf("%svalue: %s\n", strings.Repeat(" ", nextIndent), converted.String())
     64 		} else {
     65 			output += fmt.Sprintf("%svalue: %#v\n", strings.Repeat(" ", nextIndent), node.value)
     66 		}
     67 	}
     68 	lastIndex := len(node.children)
     69 	if lastIndex > 0 {
     70 		output += fmt.Sprintf("%schildren: {\n", strings.Repeat(" ", nextIndent))
     71 		childIndent := nextIndent + 2
     72 		for _, elem := range node.children {
     73 			output += elem.PrettyPrint(childIndent)
     74 		}
     75 	}
     76 	output += fmt.Sprintf("%s}\n", spaces)
     77 	return output
     78 }
     79 
     80 var bindingPowers = map[tokType]int{
     81 	tEOF:                0,
     82 	tUnquotedIdentifier: 0,
     83 	tQuotedIdentifier:   0,
     84 	tRbracket:           0,
     85 	tRparen:             0,
     86 	tComma:              0,
     87 	tRbrace:             0,
     88 	tNumber:             0,
     89 	tCurrent:            0,
     90 	tExpref:             0,
     91 	tColon:              0,
     92 	tPipe:               1,
     93 	tOr:                 2,
     94 	tAnd:                3,
     95 	tEQ:                 5,
     96 	tLT:                 5,
     97 	tLTE:                5,
     98 	tGT:                 5,
     99 	tGTE:                5,
    100 	tNE:                 5,
    101 	tFlatten:            9,
    102 	tStar:               20,
    103 	tFilter:             21,
    104 	tDot:                40,
    105 	tNot:                45,
    106 	tLbrace:             50,
    107 	tLbracket:           55,
    108 	tLparen:             60,
    109 }
    110 
    111 // Parser holds state about the current expression being parsed.
    112 type Parser struct {
    113 	expression string
    114 	tokens     []token
    115 	index      int
    116 }
    117 
    118 // NewParser creates a new JMESPath parser.
    119 func NewParser() *Parser {
    120 	p := Parser{}
    121 	return &p
    122 }
    123 
    124 // Parse will compile a JMESPath expression.
    125 func (p *Parser) Parse(expression string) (ASTNode, error) {
    126 	lexer := NewLexer()
    127 	p.expression = expression
    128 	p.index = 0
    129 	tokens, err := lexer.tokenize(expression)
    130 	if err != nil {
    131 		return ASTNode{}, err
    132 	}
    133 	p.tokens = tokens
    134 	parsed, err := p.parseExpression(0)
    135 	if err != nil {
    136 		return ASTNode{}, err
    137 	}
    138 	if p.current() != tEOF {
    139 		return ASTNode{}, p.syntaxError(fmt.Sprintf(
    140 			"Unexpected token at the end of the expression: %s", p.current()))
    141 	}
    142 	return parsed, nil
    143 }
    144 
    145 func (p *Parser) parseExpression(bindingPower int) (ASTNode, error) {
    146 	var err error
    147 	leftToken := p.lookaheadToken(0)
    148 	p.advance()
    149 	leftNode, err := p.nud(leftToken)
    150 	if err != nil {
    151 		return ASTNode{}, err
    152 	}
    153 	currentToken := p.current()
    154 	for bindingPower < bindingPowers[currentToken] {
    155 		p.advance()
    156 		leftNode, err = p.led(currentToken, leftNode)
    157 		if err != nil {
    158 			return ASTNode{}, err
    159 		}
    160 		currentToken = p.current()
    161 	}
    162 	return leftNode, nil
    163 }
    164 
    165 func (p *Parser) parseIndexExpression() (ASTNode, error) {
    166 	if p.lookahead(0) == tColon || p.lookahead(1) == tColon {
    167 		return p.parseSliceExpression()
    168 	}
    169 	indexStr := p.lookaheadToken(0).value
    170 	parsedInt, err := strconv.Atoi(indexStr)
    171 	if err != nil {
    172 		return ASTNode{}, err
    173 	}
    174 	indexNode := ASTNode{nodeType: ASTIndex, value: parsedInt}
    175 	p.advance()
    176 	if err := p.match(tRbracket); err != nil {
    177 		return ASTNode{}, err
    178 	}
    179 	return indexNode, nil
    180 }
    181 
    182 func (p *Parser) parseSliceExpression() (ASTNode, error) {
    183 	parts := []*int{nil, nil, nil}
    184 	index := 0
    185 	current := p.current()
    186 	for current != tRbracket && index < 3 {
    187 		if current == tColon {
    188 			index++
    189 			p.advance()
    190 		} else if current == tNumber {
    191 			parsedInt, err := strconv.Atoi(p.lookaheadToken(0).value)
    192 			if err != nil {
    193 				return ASTNode{}, err
    194 			}
    195 			parts[index] = &parsedInt
    196 			p.advance()
    197 		} else {
    198 			return ASTNode{}, p.syntaxError(
    199 				"Expected tColon or tNumber" + ", received: " + p.current().String())
    200 		}
    201 		current = p.current()
    202 	}
    203 	if err := p.match(tRbracket); err != nil {
    204 		return ASTNode{}, err
    205 	}
    206 	return ASTNode{
    207 		nodeType: ASTSlice,
    208 		value:    parts,
    209 	}, nil
    210 }
    211 
    212 func (p *Parser) match(tokenType tokType) error {
    213 	if p.current() == tokenType {
    214 		p.advance()
    215 		return nil
    216 	}
    217 	return p.syntaxError("Expected " + tokenType.String() + ", received: " + p.current().String())
    218 }
    219 
    220 func (p *Parser) led(tokenType tokType, node ASTNode) (ASTNode, error) {
    221 	switch tokenType {
    222 	case tDot:
    223 		if p.current() != tStar {
    224 			right, err := p.parseDotRHS(bindingPowers[tDot])
    225 			return ASTNode{
    226 				nodeType: ASTSubexpression,
    227 				children: []ASTNode{node, right},
    228 			}, err
    229 		}
    230 		p.advance()
    231 		right, err := p.parseProjectionRHS(bindingPowers[tDot])
    232 		return ASTNode{
    233 			nodeType: ASTValueProjection,
    234 			children: []ASTNode{node, right},
    235 		}, err
    236 	case tPipe:
    237 		right, err := p.parseExpression(bindingPowers[tPipe])
    238 		return ASTNode{nodeType: ASTPipe, children: []ASTNode{node, right}}, err
    239 	case tOr:
    240 		right, err := p.parseExpression(bindingPowers[tOr])
    241 		return ASTNode{nodeType: ASTOrExpression, children: []ASTNode{node, right}}, err
    242 	case tAnd:
    243 		right, err := p.parseExpression(bindingPowers[tAnd])
    244 		return ASTNode{nodeType: ASTAndExpression, children: []ASTNode{node, right}}, err
    245 	case tLparen:
    246 		name := node.value
    247 		var args []ASTNode
    248 		for p.current() != tRparen {
    249 			expression, err := p.parseExpression(0)
    250 			if err != nil {
    251 				return ASTNode{}, err
    252 			}
    253 			if p.current() == tComma {
    254 				if err := p.match(tComma); err != nil {
    255 					return ASTNode{}, err
    256 				}
    257 			}
    258 			args = append(args, expression)
    259 		}
    260 		if err := p.match(tRparen); err != nil {
    261 			return ASTNode{}, err
    262 		}
    263 		return ASTNode{
    264 			nodeType: ASTFunctionExpression,
    265 			value:    name,
    266 			children: args,
    267 		}, nil
    268 	case tFilter:
    269 		return p.parseFilter(node)
    270 	case tFlatten:
    271 		left := ASTNode{nodeType: ASTFlatten, children: []ASTNode{node}}
    272 		right, err := p.parseProjectionRHS(bindingPowers[tFlatten])
    273 		return ASTNode{
    274 			nodeType: ASTProjection,
    275 			children: []ASTNode{left, right},
    276 		}, err
    277 	case tEQ, tNE, tGT, tGTE, tLT, tLTE:
    278 		right, err := p.parseExpression(bindingPowers[tokenType])
    279 		if err != nil {
    280 			return ASTNode{}, err
    281 		}
    282 		return ASTNode{
    283 			nodeType: ASTComparator,
    284 			value:    tokenType,
    285 			children: []ASTNode{node, right},
    286 		}, nil
    287 	case tLbracket:
    288 		tokenType := p.current()
    289 		var right ASTNode
    290 		var err error
    291 		if tokenType == tNumber || tokenType == tColon {
    292 			right, err = p.parseIndexExpression()
    293 			if err != nil {
    294 				return ASTNode{}, err
    295 			}
    296 			return p.projectIfSlice(node, right)
    297 		}
    298 		// Otherwise this is a projection.
    299 		if err := p.match(tStar); err != nil {
    300 			return ASTNode{}, err
    301 		}
    302 		if err := p.match(tRbracket); err != nil {
    303 			return ASTNode{}, err
    304 		}
    305 		right, err = p.parseProjectionRHS(bindingPowers[tStar])
    306 		if err != nil {
    307 			return ASTNode{}, err
    308 		}
    309 		return ASTNode{
    310 			nodeType: ASTProjection,
    311 			children: []ASTNode{node, right},
    312 		}, nil
    313 	}
    314 	return ASTNode{}, p.syntaxError("Unexpected token: " + tokenType.String())
    315 }
    316 
    317 func (p *Parser) nud(token token) (ASTNode, error) {
    318 	switch token.tokenType {
    319 	case tJSONLiteral:
    320 		var parsed interface{}
    321 		err := json.Unmarshal([]byte(token.value), &parsed)
    322 		if err != nil {
    323 			return ASTNode{}, err
    324 		}
    325 		return ASTNode{nodeType: ASTLiteral, value: parsed}, nil
    326 	case tStringLiteral:
    327 		return ASTNode{nodeType: ASTLiteral, value: token.value}, nil
    328 	case tUnquotedIdentifier:
    329 		return ASTNode{
    330 			nodeType: ASTField,
    331 			value:    token.value,
    332 		}, nil
    333 	case tQuotedIdentifier:
    334 		node := ASTNode{nodeType: ASTField, value: token.value}
    335 		if p.current() == tLparen {
    336 			return ASTNode{}, p.syntaxErrorToken("Can't have quoted identifier as function name.", token)
    337 		}
    338 		return node, nil
    339 	case tStar:
    340 		left := ASTNode{nodeType: ASTIdentity}
    341 		var right ASTNode
    342 		var err error
    343 		if p.current() == tRbracket {
    344 			right = ASTNode{nodeType: ASTIdentity}
    345 		} else {
    346 			right, err = p.parseProjectionRHS(bindingPowers[tStar])
    347 		}
    348 		return ASTNode{nodeType: ASTValueProjection, children: []ASTNode{left, right}}, err
    349 	case tFilter:
    350 		return p.parseFilter(ASTNode{nodeType: ASTIdentity})
    351 	case tLbrace:
    352 		return p.parseMultiSelectHash()
    353 	case tFlatten:
    354 		left := ASTNode{
    355 			nodeType: ASTFlatten,
    356 			children: []ASTNode{{nodeType: ASTIdentity}},
    357 		}
    358 		right, err := p.parseProjectionRHS(bindingPowers[tFlatten])
    359 		if err != nil {
    360 			return ASTNode{}, err
    361 		}
    362 		return ASTNode{nodeType: ASTProjection, children: []ASTNode{left, right}}, nil
    363 	case tLbracket:
    364 		tokenType := p.current()
    365 		//var right ASTNode
    366 		if tokenType == tNumber || tokenType == tColon {
    367 			right, err := p.parseIndexExpression()
    368 			if err != nil {
    369 				return ASTNode{}, nil
    370 			}
    371 			return p.projectIfSlice(ASTNode{nodeType: ASTIdentity}, right)
    372 		} else if tokenType == tStar && p.lookahead(1) == tRbracket {
    373 			p.advance()
    374 			p.advance()
    375 			right, err := p.parseProjectionRHS(bindingPowers[tStar])
    376 			if err != nil {
    377 				return ASTNode{}, err
    378 			}
    379 			return ASTNode{
    380 				nodeType: ASTProjection,
    381 				children: []ASTNode{{nodeType: ASTIdentity}, right},
    382 			}, nil
    383 		} else {
    384 			return p.parseMultiSelectList()
    385 		}
    386 	case tCurrent:
    387 		return ASTNode{nodeType: ASTCurrentNode}, nil
    388 	case tExpref:
    389 		expression, err := p.parseExpression(bindingPowers[tExpref])
    390 		if err != nil {
    391 			return ASTNode{}, err
    392 		}
    393 		return ASTNode{nodeType: ASTExpRef, children: []ASTNode{expression}}, nil
    394 	case tNot:
    395 		expression, err := p.parseExpression(bindingPowers[tNot])
    396 		if err != nil {
    397 			return ASTNode{}, err
    398 		}
    399 		return ASTNode{nodeType: ASTNotExpression, children: []ASTNode{expression}}, nil
    400 	case tLparen:
    401 		expression, err := p.parseExpression(0)
    402 		if err != nil {
    403 			return ASTNode{}, err
    404 		}
    405 		if err := p.match(tRparen); err != nil {
    406 			return ASTNode{}, err
    407 		}
    408 		return expression, nil
    409 	case tEOF:
    410 		return ASTNode{}, p.syntaxErrorToken("Incomplete expression", token)
    411 	}
    412 
    413 	return ASTNode{}, p.syntaxErrorToken("Invalid token: "+token.tokenType.String(), token)
    414 }
    415 
    416 func (p *Parser) parseMultiSelectList() (ASTNode, error) {
    417 	var expressions []ASTNode
    418 	for {
    419 		expression, err := p.parseExpression(0)
    420 		if err != nil {
    421 			return ASTNode{}, err
    422 		}
    423 		expressions = append(expressions, expression)
    424 		if p.current() == tRbracket {
    425 			break
    426 		}
    427 		err = p.match(tComma)
    428 		if err != nil {
    429 			return ASTNode{}, err
    430 		}
    431 	}
    432 	err := p.match(tRbracket)
    433 	if err != nil {
    434 		return ASTNode{}, err
    435 	}
    436 	return ASTNode{
    437 		nodeType: ASTMultiSelectList,
    438 		children: expressions,
    439 	}, nil
    440 }
    441 
    442 func (p *Parser) parseMultiSelectHash() (ASTNode, error) {
    443 	var children []ASTNode
    444 	for {
    445 		keyToken := p.lookaheadToken(0)
    446 		if err := p.match(tUnquotedIdentifier); err != nil {
    447 			if err := p.match(tQuotedIdentifier); err != nil {
    448 				return ASTNode{}, p.syntaxError("Expected tQuotedIdentifier or tUnquotedIdentifier")
    449 			}
    450 		}
    451 		keyName := keyToken.value
    452 		err := p.match(tColon)
    453 		if err != nil {
    454 			return ASTNode{}, err
    455 		}
    456 		value, err := p.parseExpression(0)
    457 		if err != nil {
    458 			return ASTNode{}, err
    459 		}
    460 		node := ASTNode{
    461 			nodeType: ASTKeyValPair,
    462 			value:    keyName,
    463 			children: []ASTNode{value},
    464 		}
    465 		children = append(children, node)
    466 		if p.current() == tComma {
    467 			err := p.match(tComma)
    468 			if err != nil {
    469 				return ASTNode{}, nil
    470 			}
    471 		} else if p.current() == tRbrace {
    472 			err := p.match(tRbrace)
    473 			if err != nil {
    474 				return ASTNode{}, nil
    475 			}
    476 			break
    477 		}
    478 	}
    479 	return ASTNode{
    480 		nodeType: ASTMultiSelectHash,
    481 		children: children,
    482 	}, nil
    483 }
    484 
    485 func (p *Parser) projectIfSlice(left ASTNode, right ASTNode) (ASTNode, error) {
    486 	indexExpr := ASTNode{
    487 		nodeType: ASTIndexExpression,
    488 		children: []ASTNode{left, right},
    489 	}
    490 	if right.nodeType == ASTSlice {
    491 		right, err := p.parseProjectionRHS(bindingPowers[tStar])
    492 		return ASTNode{
    493 			nodeType: ASTProjection,
    494 			children: []ASTNode{indexExpr, right},
    495 		}, err
    496 	}
    497 	return indexExpr, nil
    498 }
    499 func (p *Parser) parseFilter(node ASTNode) (ASTNode, error) {
    500 	var right, condition ASTNode
    501 	var err error
    502 	condition, err = p.parseExpression(0)
    503 	if err != nil {
    504 		return ASTNode{}, err
    505 	}
    506 	if err := p.match(tRbracket); err != nil {
    507 		return ASTNode{}, err
    508 	}
    509 	if p.current() == tFlatten {
    510 		right = ASTNode{nodeType: ASTIdentity}
    511 	} else {
    512 		right, err = p.parseProjectionRHS(bindingPowers[tFilter])
    513 		if err != nil {
    514 			return ASTNode{}, err
    515 		}
    516 	}
    517 
    518 	return ASTNode{
    519 		nodeType: ASTFilterProjection,
    520 		children: []ASTNode{node, right, condition},
    521 	}, nil
    522 }
    523 
    524 func (p *Parser) parseDotRHS(bindingPower int) (ASTNode, error) {
    525 	lookahead := p.current()
    526 	if tokensOneOf([]tokType{tQuotedIdentifier, tUnquotedIdentifier, tStar}, lookahead) {
    527 		return p.parseExpression(bindingPower)
    528 	} else if lookahead == tLbracket {
    529 		if err := p.match(tLbracket); err != nil {
    530 			return ASTNode{}, err
    531 		}
    532 		return p.parseMultiSelectList()
    533 	} else if lookahead == tLbrace {
    534 		if err := p.match(tLbrace); err != nil {
    535 			return ASTNode{}, err
    536 		}
    537 		return p.parseMultiSelectHash()
    538 	}
    539 	return ASTNode{}, p.syntaxError("Expected identifier, lbracket, or lbrace")
    540 }
    541 
    542 func (p *Parser) parseProjectionRHS(bindingPower int) (ASTNode, error) {
    543 	current := p.current()
    544 	if bindingPowers[current] < 10 {
    545 		return ASTNode{nodeType: ASTIdentity}, nil
    546 	} else if current == tLbracket {
    547 		return p.parseExpression(bindingPower)
    548 	} else if current == tFilter {
    549 		return p.parseExpression(bindingPower)
    550 	} else if current == tDot {
    551 		err := p.match(tDot)
    552 		if err != nil {
    553 			return ASTNode{}, err
    554 		}
    555 		return p.parseDotRHS(bindingPower)
    556 	} else {
    557 		return ASTNode{}, p.syntaxError("Error")
    558 	}
    559 }
    560 
    561 func (p *Parser) lookahead(number int) tokType {
    562 	return p.lookaheadToken(number).tokenType
    563 }
    564 
    565 func (p *Parser) current() tokType {
    566 	return p.lookahead(0)
    567 }
    568 
    569 func (p *Parser) lookaheadToken(number int) token {
    570 	return p.tokens[p.index+number]
    571 }
    572 
    573 func (p *Parser) advance() {
    574 	p.index++
    575 }
    576 
    577 func tokensOneOf(elements []tokType, token tokType) bool {
    578 	for _, elem := range elements {
    579 		if elem == token {
    580 			return true
    581 		}
    582 	}
    583 	return false
    584 }
    585 
    586 func (p *Parser) syntaxError(msg string) SyntaxError {
    587 	return SyntaxError{
    588 		msg:        msg,
    589 		Expression: p.expression,
    590 		Offset:     p.lookaheadToken(0).position,
    591 	}
    592 }
    593 
    594 // Create a SyntaxError based on the provided token.
    595 // This differs from syntaxError() which creates a SyntaxError
    596 // based on the current lookahead token.
    597 func (p *Parser) syntaxErrorToken(msg string, t token) SyntaxError {
    598 	return SyntaxError{
    599 		msg:        msg,
    600 		Expression: p.expression,
    601 		Offset:     t.position,
    602 	}
    603 }