parser.go (15311B)
1 package jmespath 2 3 import ( 4 "encoding/json" 5 "fmt" 6 "strconv" 7 "strings" 8 ) 9 10 type astNodeType int 11 12 //go:generate stringer -type astNodeType 13 const ( 14 ASTEmpty astNodeType = iota 15 ASTComparator 16 ASTCurrentNode 17 ASTExpRef 18 ASTFunctionExpression 19 ASTField 20 ASTFilterProjection 21 ASTFlatten 22 ASTIdentity 23 ASTIndex 24 ASTIndexExpression 25 ASTKeyValPair 26 ASTLiteral 27 ASTMultiSelectHash 28 ASTMultiSelectList 29 ASTOrExpression 30 ASTAndExpression 31 ASTNotExpression 32 ASTPipe 33 ASTProjection 34 ASTSubexpression 35 ASTSlice 36 ASTValueProjection 37 ) 38 39 // ASTNode represents the abstract syntax tree of a JMESPath expression. 40 type ASTNode struct { 41 nodeType astNodeType 42 value interface{} 43 children []ASTNode 44 } 45 46 func (node ASTNode) String() string { 47 return node.PrettyPrint(0) 48 } 49 50 // PrettyPrint will pretty print the parsed AST. 51 // The AST is an implementation detail and this pretty print 52 // function is provided as a convenience method to help with 53 // debugging. You should not rely on its output as the internal 54 // structure of the AST may change at any time. 55 func (node ASTNode) PrettyPrint(indent int) string { 56 spaces := strings.Repeat(" ", indent) 57 output := fmt.Sprintf("%s%s {\n", spaces, node.nodeType) 58 nextIndent := indent + 2 59 if node.value != nil { 60 if converted, ok := node.value.(fmt.Stringer); ok { 61 // Account for things like comparator nodes 62 // that are enums with a String() method. 63 output += fmt.Sprintf("%svalue: %s\n", strings.Repeat(" ", nextIndent), converted.String()) 64 } else { 65 output += fmt.Sprintf("%svalue: %#v\n", strings.Repeat(" ", nextIndent), node.value) 66 } 67 } 68 lastIndex := len(node.children) 69 if lastIndex > 0 { 70 output += fmt.Sprintf("%schildren: {\n", strings.Repeat(" ", nextIndent)) 71 childIndent := nextIndent + 2 72 for _, elem := range node.children { 73 output += elem.PrettyPrint(childIndent) 74 } 75 } 76 output += fmt.Sprintf("%s}\n", spaces) 77 return output 78 } 79 80 var bindingPowers = map[tokType]int{ 81 tEOF: 0, 82 tUnquotedIdentifier: 0, 83 tQuotedIdentifier: 0, 84 tRbracket: 0, 85 tRparen: 0, 86 tComma: 0, 87 tRbrace: 0, 88 tNumber: 0, 89 tCurrent: 0, 90 tExpref: 0, 91 tColon: 0, 92 tPipe: 1, 93 tOr: 2, 94 tAnd: 3, 95 tEQ: 5, 96 tLT: 5, 97 tLTE: 5, 98 tGT: 5, 99 tGTE: 5, 100 tNE: 5, 101 tFlatten: 9, 102 tStar: 20, 103 tFilter: 21, 104 tDot: 40, 105 tNot: 45, 106 tLbrace: 50, 107 tLbracket: 55, 108 tLparen: 60, 109 } 110 111 // Parser holds state about the current expression being parsed. 112 type Parser struct { 113 expression string 114 tokens []token 115 index int 116 } 117 118 // NewParser creates a new JMESPath parser. 119 func NewParser() *Parser { 120 p := Parser{} 121 return &p 122 } 123 124 // Parse will compile a JMESPath expression. 125 func (p *Parser) Parse(expression string) (ASTNode, error) { 126 lexer := NewLexer() 127 p.expression = expression 128 p.index = 0 129 tokens, err := lexer.tokenize(expression) 130 if err != nil { 131 return ASTNode{}, err 132 } 133 p.tokens = tokens 134 parsed, err := p.parseExpression(0) 135 if err != nil { 136 return ASTNode{}, err 137 } 138 if p.current() != tEOF { 139 return ASTNode{}, p.syntaxError(fmt.Sprintf( 140 "Unexpected token at the end of the expression: %s", p.current())) 141 } 142 return parsed, nil 143 } 144 145 func (p *Parser) parseExpression(bindingPower int) (ASTNode, error) { 146 var err error 147 leftToken := p.lookaheadToken(0) 148 p.advance() 149 leftNode, err := p.nud(leftToken) 150 if err != nil { 151 return ASTNode{}, err 152 } 153 currentToken := p.current() 154 for bindingPower < bindingPowers[currentToken] { 155 p.advance() 156 leftNode, err = p.led(currentToken, leftNode) 157 if err != nil { 158 return ASTNode{}, err 159 } 160 currentToken = p.current() 161 } 162 return leftNode, nil 163 } 164 165 func (p *Parser) parseIndexExpression() (ASTNode, error) { 166 if p.lookahead(0) == tColon || p.lookahead(1) == tColon { 167 return p.parseSliceExpression() 168 } 169 indexStr := p.lookaheadToken(0).value 170 parsedInt, err := strconv.Atoi(indexStr) 171 if err != nil { 172 return ASTNode{}, err 173 } 174 indexNode := ASTNode{nodeType: ASTIndex, value: parsedInt} 175 p.advance() 176 if err := p.match(tRbracket); err != nil { 177 return ASTNode{}, err 178 } 179 return indexNode, nil 180 } 181 182 func (p *Parser) parseSliceExpression() (ASTNode, error) { 183 parts := []*int{nil, nil, nil} 184 index := 0 185 current := p.current() 186 for current != tRbracket && index < 3 { 187 if current == tColon { 188 index++ 189 p.advance() 190 } else if current == tNumber { 191 parsedInt, err := strconv.Atoi(p.lookaheadToken(0).value) 192 if err != nil { 193 return ASTNode{}, err 194 } 195 parts[index] = &parsedInt 196 p.advance() 197 } else { 198 return ASTNode{}, p.syntaxError( 199 "Expected tColon or tNumber" + ", received: " + p.current().String()) 200 } 201 current = p.current() 202 } 203 if err := p.match(tRbracket); err != nil { 204 return ASTNode{}, err 205 } 206 return ASTNode{ 207 nodeType: ASTSlice, 208 value: parts, 209 }, nil 210 } 211 212 func (p *Parser) match(tokenType tokType) error { 213 if p.current() == tokenType { 214 p.advance() 215 return nil 216 } 217 return p.syntaxError("Expected " + tokenType.String() + ", received: " + p.current().String()) 218 } 219 220 func (p *Parser) led(tokenType tokType, node ASTNode) (ASTNode, error) { 221 switch tokenType { 222 case tDot: 223 if p.current() != tStar { 224 right, err := p.parseDotRHS(bindingPowers[tDot]) 225 return ASTNode{ 226 nodeType: ASTSubexpression, 227 children: []ASTNode{node, right}, 228 }, err 229 } 230 p.advance() 231 right, err := p.parseProjectionRHS(bindingPowers[tDot]) 232 return ASTNode{ 233 nodeType: ASTValueProjection, 234 children: []ASTNode{node, right}, 235 }, err 236 case tPipe: 237 right, err := p.parseExpression(bindingPowers[tPipe]) 238 return ASTNode{nodeType: ASTPipe, children: []ASTNode{node, right}}, err 239 case tOr: 240 right, err := p.parseExpression(bindingPowers[tOr]) 241 return ASTNode{nodeType: ASTOrExpression, children: []ASTNode{node, right}}, err 242 case tAnd: 243 right, err := p.parseExpression(bindingPowers[tAnd]) 244 return ASTNode{nodeType: ASTAndExpression, children: []ASTNode{node, right}}, err 245 case tLparen: 246 name := node.value 247 var args []ASTNode 248 for p.current() != tRparen { 249 expression, err := p.parseExpression(0) 250 if err != nil { 251 return ASTNode{}, err 252 } 253 if p.current() == tComma { 254 if err := p.match(tComma); err != nil { 255 return ASTNode{}, err 256 } 257 } 258 args = append(args, expression) 259 } 260 if err := p.match(tRparen); err != nil { 261 return ASTNode{}, err 262 } 263 return ASTNode{ 264 nodeType: ASTFunctionExpression, 265 value: name, 266 children: args, 267 }, nil 268 case tFilter: 269 return p.parseFilter(node) 270 case tFlatten: 271 left := ASTNode{nodeType: ASTFlatten, children: []ASTNode{node}} 272 right, err := p.parseProjectionRHS(bindingPowers[tFlatten]) 273 return ASTNode{ 274 nodeType: ASTProjection, 275 children: []ASTNode{left, right}, 276 }, err 277 case tEQ, tNE, tGT, tGTE, tLT, tLTE: 278 right, err := p.parseExpression(bindingPowers[tokenType]) 279 if err != nil { 280 return ASTNode{}, err 281 } 282 return ASTNode{ 283 nodeType: ASTComparator, 284 value: tokenType, 285 children: []ASTNode{node, right}, 286 }, nil 287 case tLbracket: 288 tokenType := p.current() 289 var right ASTNode 290 var err error 291 if tokenType == tNumber || tokenType == tColon { 292 right, err = p.parseIndexExpression() 293 if err != nil { 294 return ASTNode{}, err 295 } 296 return p.projectIfSlice(node, right) 297 } 298 // Otherwise this is a projection. 299 if err := p.match(tStar); err != nil { 300 return ASTNode{}, err 301 } 302 if err := p.match(tRbracket); err != nil { 303 return ASTNode{}, err 304 } 305 right, err = p.parseProjectionRHS(bindingPowers[tStar]) 306 if err != nil { 307 return ASTNode{}, err 308 } 309 return ASTNode{ 310 nodeType: ASTProjection, 311 children: []ASTNode{node, right}, 312 }, nil 313 } 314 return ASTNode{}, p.syntaxError("Unexpected token: " + tokenType.String()) 315 } 316 317 func (p *Parser) nud(token token) (ASTNode, error) { 318 switch token.tokenType { 319 case tJSONLiteral: 320 var parsed interface{} 321 err := json.Unmarshal([]byte(token.value), &parsed) 322 if err != nil { 323 return ASTNode{}, err 324 } 325 return ASTNode{nodeType: ASTLiteral, value: parsed}, nil 326 case tStringLiteral: 327 return ASTNode{nodeType: ASTLiteral, value: token.value}, nil 328 case tUnquotedIdentifier: 329 return ASTNode{ 330 nodeType: ASTField, 331 value: token.value, 332 }, nil 333 case tQuotedIdentifier: 334 node := ASTNode{nodeType: ASTField, value: token.value} 335 if p.current() == tLparen { 336 return ASTNode{}, p.syntaxErrorToken("Can't have quoted identifier as function name.", token) 337 } 338 return node, nil 339 case tStar: 340 left := ASTNode{nodeType: ASTIdentity} 341 var right ASTNode 342 var err error 343 if p.current() == tRbracket { 344 right = ASTNode{nodeType: ASTIdentity} 345 } else { 346 right, err = p.parseProjectionRHS(bindingPowers[tStar]) 347 } 348 return ASTNode{nodeType: ASTValueProjection, children: []ASTNode{left, right}}, err 349 case tFilter: 350 return p.parseFilter(ASTNode{nodeType: ASTIdentity}) 351 case tLbrace: 352 return p.parseMultiSelectHash() 353 case tFlatten: 354 left := ASTNode{ 355 nodeType: ASTFlatten, 356 children: []ASTNode{{nodeType: ASTIdentity}}, 357 } 358 right, err := p.parseProjectionRHS(bindingPowers[tFlatten]) 359 if err != nil { 360 return ASTNode{}, err 361 } 362 return ASTNode{nodeType: ASTProjection, children: []ASTNode{left, right}}, nil 363 case tLbracket: 364 tokenType := p.current() 365 //var right ASTNode 366 if tokenType == tNumber || tokenType == tColon { 367 right, err := p.parseIndexExpression() 368 if err != nil { 369 return ASTNode{}, nil 370 } 371 return p.projectIfSlice(ASTNode{nodeType: ASTIdentity}, right) 372 } else if tokenType == tStar && p.lookahead(1) == tRbracket { 373 p.advance() 374 p.advance() 375 right, err := p.parseProjectionRHS(bindingPowers[tStar]) 376 if err != nil { 377 return ASTNode{}, err 378 } 379 return ASTNode{ 380 nodeType: ASTProjection, 381 children: []ASTNode{{nodeType: ASTIdentity}, right}, 382 }, nil 383 } else { 384 return p.parseMultiSelectList() 385 } 386 case tCurrent: 387 return ASTNode{nodeType: ASTCurrentNode}, nil 388 case tExpref: 389 expression, err := p.parseExpression(bindingPowers[tExpref]) 390 if err != nil { 391 return ASTNode{}, err 392 } 393 return ASTNode{nodeType: ASTExpRef, children: []ASTNode{expression}}, nil 394 case tNot: 395 expression, err := p.parseExpression(bindingPowers[tNot]) 396 if err != nil { 397 return ASTNode{}, err 398 } 399 return ASTNode{nodeType: ASTNotExpression, children: []ASTNode{expression}}, nil 400 case tLparen: 401 expression, err := p.parseExpression(0) 402 if err != nil { 403 return ASTNode{}, err 404 } 405 if err := p.match(tRparen); err != nil { 406 return ASTNode{}, err 407 } 408 return expression, nil 409 case tEOF: 410 return ASTNode{}, p.syntaxErrorToken("Incomplete expression", token) 411 } 412 413 return ASTNode{}, p.syntaxErrorToken("Invalid token: "+token.tokenType.String(), token) 414 } 415 416 func (p *Parser) parseMultiSelectList() (ASTNode, error) { 417 var expressions []ASTNode 418 for { 419 expression, err := p.parseExpression(0) 420 if err != nil { 421 return ASTNode{}, err 422 } 423 expressions = append(expressions, expression) 424 if p.current() == tRbracket { 425 break 426 } 427 err = p.match(tComma) 428 if err != nil { 429 return ASTNode{}, err 430 } 431 } 432 err := p.match(tRbracket) 433 if err != nil { 434 return ASTNode{}, err 435 } 436 return ASTNode{ 437 nodeType: ASTMultiSelectList, 438 children: expressions, 439 }, nil 440 } 441 442 func (p *Parser) parseMultiSelectHash() (ASTNode, error) { 443 var children []ASTNode 444 for { 445 keyToken := p.lookaheadToken(0) 446 if err := p.match(tUnquotedIdentifier); err != nil { 447 if err := p.match(tQuotedIdentifier); err != nil { 448 return ASTNode{}, p.syntaxError("Expected tQuotedIdentifier or tUnquotedIdentifier") 449 } 450 } 451 keyName := keyToken.value 452 err := p.match(tColon) 453 if err != nil { 454 return ASTNode{}, err 455 } 456 value, err := p.parseExpression(0) 457 if err != nil { 458 return ASTNode{}, err 459 } 460 node := ASTNode{ 461 nodeType: ASTKeyValPair, 462 value: keyName, 463 children: []ASTNode{value}, 464 } 465 children = append(children, node) 466 if p.current() == tComma { 467 err := p.match(tComma) 468 if err != nil { 469 return ASTNode{}, nil 470 } 471 } else if p.current() == tRbrace { 472 err := p.match(tRbrace) 473 if err != nil { 474 return ASTNode{}, nil 475 } 476 break 477 } 478 } 479 return ASTNode{ 480 nodeType: ASTMultiSelectHash, 481 children: children, 482 }, nil 483 } 484 485 func (p *Parser) projectIfSlice(left ASTNode, right ASTNode) (ASTNode, error) { 486 indexExpr := ASTNode{ 487 nodeType: ASTIndexExpression, 488 children: []ASTNode{left, right}, 489 } 490 if right.nodeType == ASTSlice { 491 right, err := p.parseProjectionRHS(bindingPowers[tStar]) 492 return ASTNode{ 493 nodeType: ASTProjection, 494 children: []ASTNode{indexExpr, right}, 495 }, err 496 } 497 return indexExpr, nil 498 } 499 func (p *Parser) parseFilter(node ASTNode) (ASTNode, error) { 500 var right, condition ASTNode 501 var err error 502 condition, err = p.parseExpression(0) 503 if err != nil { 504 return ASTNode{}, err 505 } 506 if err := p.match(tRbracket); err != nil { 507 return ASTNode{}, err 508 } 509 if p.current() == tFlatten { 510 right = ASTNode{nodeType: ASTIdentity} 511 } else { 512 right, err = p.parseProjectionRHS(bindingPowers[tFilter]) 513 if err != nil { 514 return ASTNode{}, err 515 } 516 } 517 518 return ASTNode{ 519 nodeType: ASTFilterProjection, 520 children: []ASTNode{node, right, condition}, 521 }, nil 522 } 523 524 func (p *Parser) parseDotRHS(bindingPower int) (ASTNode, error) { 525 lookahead := p.current() 526 if tokensOneOf([]tokType{tQuotedIdentifier, tUnquotedIdentifier, tStar}, lookahead) { 527 return p.parseExpression(bindingPower) 528 } else if lookahead == tLbracket { 529 if err := p.match(tLbracket); err != nil { 530 return ASTNode{}, err 531 } 532 return p.parseMultiSelectList() 533 } else if lookahead == tLbrace { 534 if err := p.match(tLbrace); err != nil { 535 return ASTNode{}, err 536 } 537 return p.parseMultiSelectHash() 538 } 539 return ASTNode{}, p.syntaxError("Expected identifier, lbracket, or lbrace") 540 } 541 542 func (p *Parser) parseProjectionRHS(bindingPower int) (ASTNode, error) { 543 current := p.current() 544 if bindingPowers[current] < 10 { 545 return ASTNode{nodeType: ASTIdentity}, nil 546 } else if current == tLbracket { 547 return p.parseExpression(bindingPower) 548 } else if current == tFilter { 549 return p.parseExpression(bindingPower) 550 } else if current == tDot { 551 err := p.match(tDot) 552 if err != nil { 553 return ASTNode{}, err 554 } 555 return p.parseDotRHS(bindingPower) 556 } else { 557 return ASTNode{}, p.syntaxError("Error") 558 } 559 } 560 561 func (p *Parser) lookahead(number int) tokType { 562 return p.lookaheadToken(number).tokenType 563 } 564 565 func (p *Parser) current() tokType { 566 return p.lookahead(0) 567 } 568 569 func (p *Parser) lookaheadToken(number int) token { 570 return p.tokens[p.index+number] 571 } 572 573 func (p *Parser) advance() { 574 p.index++ 575 } 576 577 func tokensOneOf(elements []tokType, token tokType) bool { 578 for _, elem := range elements { 579 if elem == token { 580 return true 581 } 582 } 583 return false 584 } 585 586 func (p *Parser) syntaxError(msg string) SyntaxError { 587 return SyntaxError{ 588 msg: msg, 589 Expression: p.expression, 590 Offset: p.lookaheadToken(0).position, 591 } 592 } 593 594 // Create a SyntaxError based on the provided token. 595 // This differs from syntaxError() which creates a SyntaxError 596 // based on the current lookahead token. 597 func (p *Parser) syntaxErrorToken(msg string, t token) SyntaxError { 598 return SyntaxError{ 599 msg: msg, 600 Expression: p.expression, 601 Offset: t.position, 602 } 603 }