code.dwrz.net

Go monorepo.
Log | Files | Refs

ini_lexer.go (2819B)


      1 package ini
      2 
      3 import (
      4 	"bytes"
      5 	"io"
      6 	"io/ioutil"
      7 )
      8 
      9 // TokenType represents the various different tokens types
     10 type TokenType int
     11 
     12 func (t TokenType) String() string {
     13 	switch t {
     14 	case TokenNone:
     15 		return "none"
     16 	case TokenLit:
     17 		return "literal"
     18 	case TokenSep:
     19 		return "sep"
     20 	case TokenOp:
     21 		return "op"
     22 	case TokenWS:
     23 		return "ws"
     24 	case TokenNL:
     25 		return "newline"
     26 	case TokenComment:
     27 		return "comment"
     28 	case TokenComma:
     29 		return "comma"
     30 	default:
     31 		return ""
     32 	}
     33 }
     34 
     35 // TokenType enums
     36 const (
     37 	TokenNone = TokenType(iota)
     38 	TokenLit
     39 	TokenSep
     40 	TokenComma
     41 	TokenOp
     42 	TokenWS
     43 	TokenNL
     44 	TokenComment
     45 )
     46 
     47 type iniLexer struct{}
     48 
     49 // Tokenize will return a list of tokens during lexical analysis of the
     50 // io.Reader.
     51 func (l *iniLexer) Tokenize(r io.Reader) ([]Token, error) {
     52 	b, err := ioutil.ReadAll(r)
     53 	if err != nil {
     54 		return nil, &UnableToReadFile{Err: err}
     55 	}
     56 
     57 	return l.tokenize(b)
     58 }
     59 
     60 func (l *iniLexer) tokenize(b []byte) ([]Token, error) {
     61 	runes := bytes.Runes(b)
     62 	var err error
     63 	n := 0
     64 	tokenAmount := countTokens(runes)
     65 	tokens := make([]Token, tokenAmount)
     66 	count := 0
     67 
     68 	for len(runes) > 0 && count < tokenAmount {
     69 		switch {
     70 		case isWhitespace(runes[0]):
     71 			tokens[count], n, err = newWSToken(runes)
     72 		case isComma(runes[0]):
     73 			tokens[count], n = newCommaToken(), 1
     74 		case isComment(runes):
     75 			tokens[count], n, err = newCommentToken(runes)
     76 		case isNewline(runes):
     77 			tokens[count], n, err = newNewlineToken(runes)
     78 		case isSep(runes):
     79 			tokens[count], n, err = newSepToken(runes)
     80 		case isOp(runes):
     81 			tokens[count], n, err = newOpToken(runes)
     82 		default:
     83 			tokens[count], n, err = newLitToken(runes)
     84 		}
     85 
     86 		if err != nil {
     87 			return nil, err
     88 		}
     89 
     90 		count++
     91 
     92 		runes = runes[n:]
     93 	}
     94 
     95 	return tokens[:count], nil
     96 }
     97 
     98 func countTokens(runes []rune) int {
     99 	count, n := 0, 0
    100 	var err error
    101 
    102 	for len(runes) > 0 {
    103 		switch {
    104 		case isWhitespace(runes[0]):
    105 			_, n, err = newWSToken(runes)
    106 		case isComma(runes[0]):
    107 			_, n = newCommaToken(), 1
    108 		case isComment(runes):
    109 			_, n, err = newCommentToken(runes)
    110 		case isNewline(runes):
    111 			_, n, err = newNewlineToken(runes)
    112 		case isSep(runes):
    113 			_, n, err = newSepToken(runes)
    114 		case isOp(runes):
    115 			_, n, err = newOpToken(runes)
    116 		default:
    117 			_, n, err = newLitToken(runes)
    118 		}
    119 
    120 		if err != nil {
    121 			return 0
    122 		}
    123 
    124 		count++
    125 		runes = runes[n:]
    126 	}
    127 
    128 	return count + 1
    129 }
    130 
    131 // Token indicates a metadata about a given value.
    132 type Token struct {
    133 	t         TokenType
    134 	ValueType ValueType
    135 	base      int
    136 	raw       []rune
    137 }
    138 
    139 var emptyValue = Value{}
    140 
    141 func newToken(t TokenType, raw []rune, v ValueType) Token {
    142 	return Token{
    143 		t:         t,
    144 		raw:       raw,
    145 		ValueType: v,
    146 	}
    147 }
    148 
    149 // Raw return the raw runes that were consumed
    150 func (tok Token) Raw() []rune {
    151 	return tok.raw
    152 }
    153 
    154 // Type returns the token type
    155 func (tok Token) Type() TokenType {
    156 	return tok.t
    157 }