Files
kubesphere/vendor/github.com/pelletier/go-toml/lexer.go
Jeff 96d2ac4112 This is a huge commit, it does following things:
1. refactor kubesphere dependency service client creation, we can
disable dependency by config
2. dependencies can be configured by configuration file
3. refactor cmd package using cobra.Command, so we can use hypersphere
to invoke command sepearately. Later we only need to build one image to
contains all kubesphere core components. One command to rule them all!
4. live reloading configuration currently not implemented
2019-09-11 19:53:35 +08:00

751 lines
14 KiB
Go

// TOML lexer.
//
// Written using the principles developed by Rob Pike in
// http://www.youtube.com/watch?v=HxaD_trXwRE
package toml
import (
"bytes"
"errors"
"fmt"
"regexp"
"strconv"
"strings"
)
var dateRegexp *regexp.Regexp
// Define state functions
type tomlLexStateFn func() tomlLexStateFn
// Define lexer
type tomlLexer struct {
inputIdx int
input []rune // Textual source
currentTokenStart int
currentTokenStop int
tokens []token
depth int
line int
col int
endbufferLine int
endbufferCol int
}
// Basic read operations on input
func (l *tomlLexer) read() rune {
r := l.peek()
if r == '\n' {
l.endbufferLine++
l.endbufferCol = 1
} else {
l.endbufferCol++
}
l.inputIdx++
return r
}
func (l *tomlLexer) next() rune {
r := l.read()
if r != eof {
l.currentTokenStop++
}
return r
}
func (l *tomlLexer) ignore() {
l.currentTokenStart = l.currentTokenStop
l.line = l.endbufferLine
l.col = l.endbufferCol
}
func (l *tomlLexer) skip() {
l.next()
l.ignore()
}
func (l *tomlLexer) fastForward(n int) {
for i := 0; i < n; i++ {
l.next()
}
}
func (l *tomlLexer) emitWithValue(t tokenType, value string) {
l.tokens = append(l.tokens, token{
Position: Position{l.line, l.col},
typ: t,
val: value,
})
l.ignore()
}
func (l *tomlLexer) emit(t tokenType) {
l.emitWithValue(t, string(l.input[l.currentTokenStart:l.currentTokenStop]))
}
func (l *tomlLexer) peek() rune {
if l.inputIdx >= len(l.input) {
return eof
}
return l.input[l.inputIdx]
}
func (l *tomlLexer) peekString(size int) string {
maxIdx := len(l.input)
upperIdx := l.inputIdx + size // FIXME: potential overflow
if upperIdx > maxIdx {
upperIdx = maxIdx
}
return string(l.input[l.inputIdx:upperIdx])
}
func (l *tomlLexer) follow(next string) bool {
return next == l.peekString(len(next))
}
// Error management
func (l *tomlLexer) errorf(format string, args ...interface{}) tomlLexStateFn {
l.tokens = append(l.tokens, token{
Position: Position{l.line, l.col},
typ: tokenError,
val: fmt.Sprintf(format, args...),
})
return nil
}
// State functions
func (l *tomlLexer) lexVoid() tomlLexStateFn {
for {
next := l.peek()
switch next {
case '[':
return l.lexTableKey
case '#':
return l.lexComment(l.lexVoid)
case '=':
return l.lexEqual
case '\r':
fallthrough
case '\n':
l.skip()
continue
}
if isSpace(next) {
l.skip()
}
if l.depth > 0 {
return l.lexRvalue
}
if isKeyStartChar(next) {
return l.lexKey
}
if next == eof {
l.next()
break
}
}
l.emit(tokenEOF)
return nil
}
func (l *tomlLexer) lexRvalue() tomlLexStateFn {
for {
next := l.peek()
switch next {
case '.':
return l.errorf("cannot start float with a dot")
case '=':
return l.lexEqual
case '[':
l.depth++
return l.lexLeftBracket
case ']':
l.depth--
return l.lexRightBracket
case '{':
return l.lexLeftCurlyBrace
case '}':
return l.lexRightCurlyBrace
case '#':
return l.lexComment(l.lexRvalue)
case '"':
return l.lexString
case '\'':
return l.lexLiteralString
case ',':
return l.lexComma
case '\r':
fallthrough
case '\n':
l.skip()
if l.depth == 0 {
return l.lexVoid
}
return l.lexRvalue
case '_':
return l.errorf("cannot start number with underscore")
}
if l.follow("true") {
return l.lexTrue
}
if l.follow("false") {
return l.lexFalse
}
if l.follow("inf") {
return l.lexInf
}
if l.follow("nan") {
return l.lexNan
}
if isSpace(next) {
l.skip()
continue
}
if next == eof {
l.next()
break
}
possibleDate := l.peekString(35)
dateMatch := dateRegexp.FindString(possibleDate)
if dateMatch != "" {
l.fastForward(len(dateMatch))
return l.lexDate
}
if next == '+' || next == '-' || isDigit(next) {
return l.lexNumber
}
if isAlphanumeric(next) {
return l.lexKey
}
return l.errorf("no value can start with %c", next)
}
l.emit(tokenEOF)
return nil
}
func (l *tomlLexer) lexLeftCurlyBrace() tomlLexStateFn {
l.next()
l.emit(tokenLeftCurlyBrace)
return l.lexRvalue
}
func (l *tomlLexer) lexRightCurlyBrace() tomlLexStateFn {
l.next()
l.emit(tokenRightCurlyBrace)
return l.lexRvalue
}
func (l *tomlLexer) lexDate() tomlLexStateFn {
l.emit(tokenDate)
return l.lexRvalue
}
func (l *tomlLexer) lexTrue() tomlLexStateFn {
l.fastForward(4)
l.emit(tokenTrue)
return l.lexRvalue
}
func (l *tomlLexer) lexFalse() tomlLexStateFn {
l.fastForward(5)
l.emit(tokenFalse)
return l.lexRvalue
}
func (l *tomlLexer) lexInf() tomlLexStateFn {
l.fastForward(3)
l.emit(tokenInf)
return l.lexRvalue
}
func (l *tomlLexer) lexNan() tomlLexStateFn {
l.fastForward(3)
l.emit(tokenNan)
return l.lexRvalue
}
func (l *tomlLexer) lexEqual() tomlLexStateFn {
l.next()
l.emit(tokenEqual)
return l.lexRvalue
}
func (l *tomlLexer) lexComma() tomlLexStateFn {
l.next()
l.emit(tokenComma)
return l.lexRvalue
}
// Parse the key and emits its value without escape sequences.
// bare keys, basic string keys and literal string keys are supported.
func (l *tomlLexer) lexKey() tomlLexStateFn {
growingString := ""
for r := l.peek(); isKeyChar(r) || r == '\n' || r == '\r'; r = l.peek() {
if r == '"' {
l.next()
str, err := l.lexStringAsString(`"`, false, true)
if err != nil {
return l.errorf(err.Error())
}
growingString += str
l.next()
continue
} else if r == '\'' {
l.next()
str, err := l.lexLiteralStringAsString(`'`, false)
if err != nil {
return l.errorf(err.Error())
}
growingString += str
l.next()
continue
} else if r == '\n' {
return l.errorf("keys cannot contain new lines")
} else if isSpace(r) {
break
} else if !isValidBareChar(r) {
return l.errorf("keys cannot contain %c character", r)
}
growingString += string(r)
l.next()
}
l.emitWithValue(tokenKey, growingString)
return l.lexVoid
}
func (l *tomlLexer) lexComment(previousState tomlLexStateFn) tomlLexStateFn {
return func() tomlLexStateFn {
for next := l.peek(); next != '\n' && next != eof; next = l.peek() {
if next == '\r' && l.follow("\r\n") {
break
}
l.next()
}
l.ignore()
return previousState
}
}
func (l *tomlLexer) lexLeftBracket() tomlLexStateFn {
l.next()
l.emit(tokenLeftBracket)
return l.lexRvalue
}
func (l *tomlLexer) lexLiteralStringAsString(terminator string, discardLeadingNewLine bool) (string, error) {
growingString := ""
if discardLeadingNewLine {
if l.follow("\r\n") {
l.skip()
l.skip()
} else if l.peek() == '\n' {
l.skip()
}
}
// find end of string
for {
if l.follow(terminator) {
return growingString, nil
}
next := l.peek()
if next == eof {
break
}
growingString += string(l.next())
}
return "", errors.New("unclosed string")
}
func (l *tomlLexer) lexLiteralString() tomlLexStateFn {
l.skip()
// handle special case for triple-quote
terminator := "'"
discardLeadingNewLine := false
if l.follow("''") {
l.skip()
l.skip()
terminator = "'''"
discardLeadingNewLine = true
}
str, err := l.lexLiteralStringAsString(terminator, discardLeadingNewLine)
if err != nil {
return l.errorf(err.Error())
}
l.emitWithValue(tokenString, str)
l.fastForward(len(terminator))
l.ignore()
return l.lexRvalue
}
// Lex a string and return the results as a string.
// Terminator is the substring indicating the end of the token.
// The resulting string does not include the terminator.
func (l *tomlLexer) lexStringAsString(terminator string, discardLeadingNewLine, acceptNewLines bool) (string, error) {
growingString := ""
if discardLeadingNewLine {
if l.follow("\r\n") {
l.skip()
l.skip()
} else if l.peek() == '\n' {
l.skip()
}
}
for {
if l.follow(terminator) {
return growingString, nil
}
if l.follow("\\") {
l.next()
switch l.peek() {
case '\r':
fallthrough
case '\n':
fallthrough
case '\t':
fallthrough
case ' ':
// skip all whitespace chars following backslash
for strings.ContainsRune("\r\n\t ", l.peek()) {
l.next()
}
case '"':
growingString += "\""
l.next()
case 'n':
growingString += "\n"
l.next()
case 'b':
growingString += "\b"
l.next()
case 'f':
growingString += "\f"
l.next()
case '/':
growingString += "/"
l.next()
case 't':
growingString += "\t"
l.next()
case 'r':
growingString += "\r"
l.next()
case '\\':
growingString += "\\"
l.next()
case 'u':
l.next()
code := ""
for i := 0; i < 4; i++ {
c := l.peek()
if !isHexDigit(c) {
return "", errors.New("unfinished unicode escape")
}
l.next()
code = code + string(c)
}
intcode, err := strconv.ParseInt(code, 16, 32)
if err != nil {
return "", errors.New("invalid unicode escape: \\u" + code)
}
growingString += string(rune(intcode))
case 'U':
l.next()
code := ""
for i := 0; i < 8; i++ {
c := l.peek()
if !isHexDigit(c) {
return "", errors.New("unfinished unicode escape")
}
l.next()
code = code + string(c)
}
intcode, err := strconv.ParseInt(code, 16, 64)
if err != nil {
return "", errors.New("invalid unicode escape: \\U" + code)
}
growingString += string(rune(intcode))
default:
return "", errors.New("invalid escape sequence: \\" + string(l.peek()))
}
} else {
r := l.peek()
if 0x00 <= r && r <= 0x1F && !(acceptNewLines && (r == '\n' || r == '\r')) {
return "", fmt.Errorf("unescaped control character %U", r)
}
l.next()
growingString += string(r)
}
if l.peek() == eof {
break
}
}
return "", errors.New("unclosed string")
}
func (l *tomlLexer) lexString() tomlLexStateFn {
l.skip()
// handle special case for triple-quote
terminator := `"`
discardLeadingNewLine := false
acceptNewLines := false
if l.follow(`""`) {
l.skip()
l.skip()
terminator = `"""`
discardLeadingNewLine = true
acceptNewLines = true
}
str, err := l.lexStringAsString(terminator, discardLeadingNewLine, acceptNewLines)
if err != nil {
return l.errorf(err.Error())
}
l.emitWithValue(tokenString, str)
l.fastForward(len(terminator))
l.ignore()
return l.lexRvalue
}
func (l *tomlLexer) lexTableKey() tomlLexStateFn {
l.next()
if l.peek() == '[' {
// token '[[' signifies an array of tables
l.next()
l.emit(tokenDoubleLeftBracket)
return l.lexInsideTableArrayKey
}
// vanilla table key
l.emit(tokenLeftBracket)
return l.lexInsideTableKey
}
// Parse the key till "]]", but only bare keys are supported
func (l *tomlLexer) lexInsideTableArrayKey() tomlLexStateFn {
for r := l.peek(); r != eof; r = l.peek() {
switch r {
case ']':
if l.currentTokenStop > l.currentTokenStart {
l.emit(tokenKeyGroupArray)
}
l.next()
if l.peek() != ']' {
break
}
l.next()
l.emit(tokenDoubleRightBracket)
return l.lexVoid
case '[':
return l.errorf("table array key cannot contain ']'")
default:
l.next()
}
}
return l.errorf("unclosed table array key")
}
// Parse the key till "]" but only bare keys are supported
func (l *tomlLexer) lexInsideTableKey() tomlLexStateFn {
for r := l.peek(); r != eof; r = l.peek() {
switch r {
case ']':
if l.currentTokenStop > l.currentTokenStart {
l.emit(tokenKeyGroup)
}
l.next()
l.emit(tokenRightBracket)
return l.lexVoid
case '[':
return l.errorf("table key cannot contain ']'")
default:
l.next()
}
}
return l.errorf("unclosed table key")
}
func (l *tomlLexer) lexRightBracket() tomlLexStateFn {
l.next()
l.emit(tokenRightBracket)
return l.lexRvalue
}
type validRuneFn func(r rune) bool
func isValidHexRune(r rune) bool {
return r >= 'a' && r <= 'f' ||
r >= 'A' && r <= 'F' ||
r >= '0' && r <= '9' ||
r == '_'
}
func isValidOctalRune(r rune) bool {
return r >= '0' && r <= '7' || r == '_'
}
func isValidBinaryRune(r rune) bool {
return r == '0' || r == '1' || r == '_'
}
func (l *tomlLexer) lexNumber() tomlLexStateFn {
r := l.peek()
if r == '0' {
follow := l.peekString(2)
if len(follow) == 2 {
var isValidRune validRuneFn
switch follow[1] {
case 'x':
isValidRune = isValidHexRune
case 'o':
isValidRune = isValidOctalRune
case 'b':
isValidRune = isValidBinaryRune
default:
if follow[1] >= 'a' && follow[1] <= 'z' || follow[1] >= 'A' && follow[1] <= 'Z' {
return l.errorf("unknown number base: %s. possible options are x (hex) o (octal) b (binary)", string(follow[1]))
}
}
if isValidRune != nil {
l.next()
l.next()
digitSeen := false
for {
next := l.peek()
if !isValidRune(next) {
break
}
digitSeen = true
l.next()
}
if !digitSeen {
return l.errorf("number needs at least one digit")
}
l.emit(tokenInteger)
return l.lexRvalue
}
}
}
if r == '+' || r == '-' {
l.next()
if l.follow("inf") {
return l.lexInf
}
if l.follow("nan") {
return l.lexNan
}
}
pointSeen := false
expSeen := false
digitSeen := false
for {
next := l.peek()
if next == '.' {
if pointSeen {
return l.errorf("cannot have two dots in one float")
}
l.next()
if !isDigit(l.peek()) {
return l.errorf("float cannot end with a dot")
}
pointSeen = true
} else if next == 'e' || next == 'E' {
expSeen = true
l.next()
r := l.peek()
if r == '+' || r == '-' {
l.next()
}
} else if isDigit(next) {
digitSeen = true
l.next()
} else if next == '_' {
l.next()
} else {
break
}
if pointSeen && !digitSeen {
return l.errorf("cannot start float with a dot")
}
}
if !digitSeen {
return l.errorf("no digit in that number")
}
if pointSeen || expSeen {
l.emit(tokenFloat)
} else {
l.emit(tokenInteger)
}
return l.lexRvalue
}
func (l *tomlLexer) run() {
for state := l.lexVoid; state != nil; {
state = state()
}
}
func init() {
dateRegexp = regexp.MustCompile(`^\d{1,4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d{1,9})?(Z|[+-]\d{2}:\d{2})`)
}
// Entry point
func lexToml(inputBytes []byte) []token {
runes := bytes.Runes(inputBytes)
l := &tomlLexer{
input: runes,
tokens: make([]token, 0, 256),
line: 1,
col: 1,
endbufferLine: 1,
endbufferCol: 1,
}
l.run()
return l.tokens
}