Update dependencies (#5518)

This commit is contained in:
hongming
2023-02-12 23:09:20 +08:00
committed by GitHub
parent d3b35fb2da
commit a979342f56
1486 changed files with 126660 additions and 71128 deletions

View File

@@ -9,13 +9,10 @@ import (
"bytes"
"errors"
"fmt"
"regexp"
"strconv"
"strings"
)
var dateRegexp *regexp.Regexp
// Define state functions
type tomlLexStateFn func() tomlLexStateFn
@@ -26,7 +23,7 @@ type tomlLexer struct {
currentTokenStart int
currentTokenStop int
tokens []token
depth int
brackets []rune
line int
col int
endbufferLine int
@@ -123,6 +120,8 @@ func (l *tomlLexer) lexVoid() tomlLexStateFn {
for {
next := l.peek()
switch next {
case '}': // after '{'
return l.lexRightCurlyBrace
case '[':
return l.lexTableKey
case '#':
@@ -140,10 +139,6 @@ func (l *tomlLexer) lexVoid() tomlLexStateFn {
l.skip()
}
if l.depth > 0 {
return l.lexRvalue
}
if isKeyStartChar(next) {
return l.lexKey
}
@@ -167,10 +162,8 @@ func (l *tomlLexer) lexRvalue() tomlLexStateFn {
case '=':
return l.lexEqual
case '[':
l.depth++
return l.lexLeftBracket
case ']':
l.depth--
return l.lexRightBracket
case '{':
return l.lexLeftCurlyBrace
@@ -188,12 +181,10 @@ func (l *tomlLexer) lexRvalue() tomlLexStateFn {
fallthrough
case '\n':
l.skip()
if l.depth == 0 {
return l.lexVoid
if len(l.brackets) > 0 && l.brackets[len(l.brackets)-1] == '[' {
return l.lexRvalue
}
return l.lexRvalue
case '_':
return l.errorf("cannot start number with underscore")
return l.lexVoid
}
if l.follow("true") {
@@ -222,22 +213,12 @@ func (l *tomlLexer) lexRvalue() tomlLexStateFn {
break
}
possibleDate := l.peekString(35)
dateSubmatches := dateRegexp.FindStringSubmatch(possibleDate)
if dateSubmatches != nil && dateSubmatches[0] != "" {
l.fastForward(len(dateSubmatches[0]))
if dateSubmatches[2] == "" { // no timezone information => local date
return l.lexLocalDate
}
return l.lexDate
}
if next == '+' || next == '-' || isDigit(next) {
if next == '+' || next == '-' {
return l.lexNumber
}
if isAlphanumeric(next) {
return l.lexKey
if isDigit(next) {
return l.lexDateTimeOrNumber
}
return l.errorf("no value can start with %c", next)
@@ -247,28 +228,290 @@ func (l *tomlLexer) lexRvalue() tomlLexStateFn {
return nil
}
func (l *tomlLexer) lexDateTimeOrNumber() tomlLexStateFn {
// Could be either a date/time, or a digit.
// The options for date/times are:
// YYYY-... => date or date-time
// HH:... => time
// Anything else should be a number.
lookAhead := l.peekString(5)
if len(lookAhead) < 3 {
return l.lexNumber()
}
for idx, r := range lookAhead {
if !isDigit(r) {
if idx == 2 && r == ':' {
return l.lexDateTimeOrTime()
}
if idx == 4 && r == '-' {
return l.lexDateTimeOrTime()
}
return l.lexNumber()
}
}
return l.lexNumber()
}
func (l *tomlLexer) lexLeftCurlyBrace() tomlLexStateFn {
l.next()
l.emit(tokenLeftCurlyBrace)
l.brackets = append(l.brackets, '{')
return l.lexVoid
}
func (l *tomlLexer) lexRightCurlyBrace() tomlLexStateFn {
l.next()
l.emit(tokenRightCurlyBrace)
if len(l.brackets) == 0 || l.brackets[len(l.brackets)-1] != '{' {
return l.errorf("cannot have '}' here")
}
l.brackets = l.brackets[:len(l.brackets)-1]
return l.lexRvalue
}
func (l *tomlLexer) lexDate() tomlLexStateFn {
l.emit(tokenDate)
return l.lexRvalue
func (l *tomlLexer) lexDateTimeOrTime() tomlLexStateFn {
// Example matches:
// 1979-05-27T07:32:00Z
// 1979-05-27T00:32:00-07:00
// 1979-05-27T00:32:00.999999-07:00
// 1979-05-27 07:32:00Z
// 1979-05-27 00:32:00-07:00
// 1979-05-27 00:32:00.999999-07:00
// 1979-05-27T07:32:00
// 1979-05-27T00:32:00.999999
// 1979-05-27 07:32:00
// 1979-05-27 00:32:00.999999
// 1979-05-27
// 07:32:00
// 00:32:00.999999
// we already know those two are digits
l.next()
l.next()
// Got 2 digits. At that point it could be either a time or a date(-time).
r := l.next()
if r == ':' {
return l.lexTime()
}
return l.lexDateTime()
}
func (l *tomlLexer) lexLocalDate() tomlLexStateFn {
func (l *tomlLexer) lexDateTime() tomlLexStateFn {
// This state accepts an offset date-time, a local date-time, or a local date.
//
// v--- cursor
// 1979-05-27T07:32:00Z
// 1979-05-27T00:32:00-07:00
// 1979-05-27T00:32:00.999999-07:00
// 1979-05-27 07:32:00Z
// 1979-05-27 00:32:00-07:00
// 1979-05-27 00:32:00.999999-07:00
// 1979-05-27T07:32:00
// 1979-05-27T00:32:00.999999
// 1979-05-27 07:32:00
// 1979-05-27 00:32:00.999999
// 1979-05-27
// date
// already checked by lexRvalue
l.next() // digit
l.next() // -
for i := 0; i < 2; i++ {
r := l.next()
if !isDigit(r) {
return l.errorf("invalid month digit in date: %c", r)
}
}
r := l.next()
if r != '-' {
return l.errorf("expected - to separate month of a date, not %c", r)
}
for i := 0; i < 2; i++ {
r := l.next()
if !isDigit(r) {
return l.errorf("invalid day digit in date: %c", r)
}
}
l.emit(tokenLocalDate)
r = l.peek()
if r == eof {
return l.lexRvalue
}
if r != ' ' && r != 'T' {
return l.errorf("incorrect date/time separation character: %c", r)
}
if r == ' ' {
lookAhead := l.peekString(3)[1:]
if len(lookAhead) < 2 {
return l.lexRvalue
}
for _, r := range lookAhead {
if !isDigit(r) {
return l.lexRvalue
}
}
}
l.skip() // skip the T or ' '
// time
for i := 0; i < 2; i++ {
r := l.next()
if !isDigit(r) {
return l.errorf("invalid hour digit in time: %c", r)
}
}
r = l.next()
if r != ':' {
return l.errorf("time hour/minute separator should be :, not %c", r)
}
for i := 0; i < 2; i++ {
r := l.next()
if !isDigit(r) {
return l.errorf("invalid minute digit in time: %c", r)
}
}
r = l.next()
if r != ':' {
return l.errorf("time minute/second separator should be :, not %c", r)
}
for i := 0; i < 2; i++ {
r := l.next()
if !isDigit(r) {
return l.errorf("invalid second digit in time: %c", r)
}
}
r = l.peek()
if r == '.' {
l.next()
r := l.next()
if !isDigit(r) {
return l.errorf("expected at least one digit in time's fraction, not %c", r)
}
for {
r := l.peek()
if !isDigit(r) {
break
}
l.next()
}
}
l.emit(tokenLocalTime)
return l.lexTimeOffset
}
func (l *tomlLexer) lexTimeOffset() tomlLexStateFn {
// potential offset
// Z
// -07:00
// +07:00
// nothing
r := l.peek()
if r == 'Z' {
l.next()
l.emit(tokenTimeOffset)
} else if r == '+' || r == '-' {
l.next()
for i := 0; i < 2; i++ {
r := l.next()
if !isDigit(r) {
return l.errorf("invalid hour digit in time offset: %c", r)
}
}
r = l.next()
if r != ':' {
return l.errorf("time offset hour/minute separator should be :, not %c", r)
}
for i := 0; i < 2; i++ {
r := l.next()
if !isDigit(r) {
return l.errorf("invalid minute digit in time offset: %c", r)
}
}
l.emit(tokenTimeOffset)
}
return l.lexRvalue
}
func (l *tomlLexer) lexTime() tomlLexStateFn {
// v--- cursor
// 07:32:00
// 00:32:00.999999
for i := 0; i < 2; i++ {
r := l.next()
if !isDigit(r) {
return l.errorf("invalid minute digit in time: %c", r)
}
}
r := l.next()
if r != ':' {
return l.errorf("time minute/second separator should be :, not %c", r)
}
for i := 0; i < 2; i++ {
r := l.next()
if !isDigit(r) {
return l.errorf("invalid second digit in time: %c", r)
}
}
r = l.peek()
if r == '.' {
l.next()
r := l.next()
if !isDigit(r) {
return l.errorf("expected at least one digit in time's fraction, not %c", r)
}
for {
r := l.peek()
if !isDigit(r) {
break
}
l.next()
}
}
l.emit(tokenLocalTime)
return l.lexRvalue
}
func (l *tomlLexer) lexTrue() tomlLexStateFn {
l.fastForward(4)
l.emit(tokenTrue)
@@ -302,13 +545,16 @@ func (l *tomlLexer) lexEqual() tomlLexStateFn {
func (l *tomlLexer) lexComma() tomlLexStateFn {
l.next()
l.emit(tokenComma)
if len(l.brackets) > 0 && l.brackets[len(l.brackets)-1] == '{' {
return l.lexVoid
}
return l.lexRvalue
}
// Parse the key and emits its value without escape sequences.
// bare keys, basic string keys and literal string keys are supported.
func (l *tomlLexer) lexKey() tomlLexStateFn {
growingString := ""
var sb strings.Builder
for r := l.peek(); isKeyChar(r) || r == '\n' || r == '\r'; r = l.peek() {
if r == '"' {
@@ -317,7 +563,9 @@ func (l *tomlLexer) lexKey() tomlLexStateFn {
if err != nil {
return l.errorf(err.Error())
}
growingString += "\"" + str + "\""
sb.WriteString("\"")
sb.WriteString(str)
sb.WriteString("\"")
l.next()
continue
} else if r == '\'' {
@@ -326,22 +574,45 @@ func (l *tomlLexer) lexKey() tomlLexStateFn {
if err != nil {
return l.errorf(err.Error())
}
growingString += "'" + str + "'"
sb.WriteString("'")
sb.WriteString(str)
sb.WriteString("'")
l.next()
continue
} else if r == '\n' {
return l.errorf("keys cannot contain new lines")
} else if isSpace(r) {
break
var str strings.Builder
str.WriteString(" ")
// skip trailing whitespace
l.next()
for r = l.peek(); isSpace(r); r = l.peek() {
str.WriteRune(r)
l.next()
}
// break loop if not a dot
if r != '.' {
break
}
str.WriteString(".")
// skip trailing whitespace after dot
l.next()
for r = l.peek(); isSpace(r); r = l.peek() {
str.WriteRune(r)
l.next()
}
sb.WriteString(str.String())
continue
} else if r == '.' {
// skip
} else if !isValidBareChar(r) {
return l.errorf("keys cannot contain %c character", r)
}
growingString += string(r)
sb.WriteRune(r)
l.next()
}
l.emitWithValue(tokenKey, growingString)
l.emitWithValue(tokenKey, sb.String())
return l.lexVoid
}
@@ -361,11 +632,12 @@ func (l *tomlLexer) lexComment(previousState tomlLexStateFn) tomlLexStateFn {
func (l *tomlLexer) lexLeftBracket() tomlLexStateFn {
l.next()
l.emit(tokenLeftBracket)
l.brackets = append(l.brackets, '[')
return l.lexRvalue
}
func (l *tomlLexer) lexLiteralStringAsString(terminator string, discardLeadingNewLine bool) (string, error) {
growingString := ""
var sb strings.Builder
if discardLeadingNewLine {
if l.follow("\r\n") {
@@ -379,14 +651,14 @@ func (l *tomlLexer) lexLiteralStringAsString(terminator string, discardLeadingNe
// find end of string
for {
if l.follow(terminator) {
return growingString, nil
return sb.String(), nil
}
next := l.peek()
if next == eof {
break
}
growingString += string(l.next())
sb.WriteRune(l.next())
}
return "", errors.New("unclosed string")
@@ -420,7 +692,7 @@ func (l *tomlLexer) lexLiteralString() tomlLexStateFn {
// Terminator is the substring indicating the end of the token.
// The resulting string does not include the terminator.
func (l *tomlLexer) lexStringAsString(terminator string, discardLeadingNewLine, acceptNewLines bool) (string, error) {
growingString := ""
var sb strings.Builder
if discardLeadingNewLine {
if l.follow("\r\n") {
@@ -433,7 +705,7 @@ func (l *tomlLexer) lexStringAsString(terminator string, discardLeadingNewLine,
for {
if l.follow(terminator) {
return growingString, nil
return sb.String(), nil
}
if l.follow("\\") {
@@ -451,72 +723,72 @@ func (l *tomlLexer) lexStringAsString(terminator string, discardLeadingNewLine,
l.next()
}
case '"':
growingString += "\""
sb.WriteString("\"")
l.next()
case 'n':
growingString += "\n"
sb.WriteString("\n")
l.next()
case 'b':
growingString += "\b"
sb.WriteString("\b")
l.next()
case 'f':
growingString += "\f"
sb.WriteString("\f")
l.next()
case '/':
growingString += "/"
sb.WriteString("/")
l.next()
case 't':
growingString += "\t"
sb.WriteString("\t")
l.next()
case 'r':
growingString += "\r"
sb.WriteString("\r")
l.next()
case '\\':
growingString += "\\"
sb.WriteString("\\")
l.next()
case 'u':
l.next()
code := ""
var code strings.Builder
for i := 0; i < 4; i++ {
c := l.peek()
if !isHexDigit(c) {
return "", errors.New("unfinished unicode escape")
}
l.next()
code = code + string(c)
code.WriteRune(c)
}
intcode, err := strconv.ParseInt(code, 16, 32)
intcode, err := strconv.ParseInt(code.String(), 16, 32)
if err != nil {
return "", errors.New("invalid unicode escape: \\u" + code)
return "", errors.New("invalid unicode escape: \\u" + code.String())
}
growingString += string(rune(intcode))
sb.WriteRune(rune(intcode))
case 'U':
l.next()
code := ""
var code strings.Builder
for i := 0; i < 8; i++ {
c := l.peek()
if !isHexDigit(c) {
return "", errors.New("unfinished unicode escape")
}
l.next()
code = code + string(c)
code.WriteRune(c)
}
intcode, err := strconv.ParseInt(code, 16, 64)
intcode, err := strconv.ParseInt(code.String(), 16, 64)
if err != nil {
return "", errors.New("invalid unicode escape: \\U" + code)
return "", errors.New("invalid unicode escape: \\U" + code.String())
}
growingString += string(rune(intcode))
sb.WriteRune(rune(intcode))
default:
return "", errors.New("invalid escape sequence: \\" + string(l.peek()))
}
} else {
r := l.peek()
if 0x00 <= r && r <= 0x1F && !(acceptNewLines && (r == '\n' || r == '\r')) {
if 0x00 <= r && r <= 0x1F && r != '\t' && !(acceptNewLines && (r == '\n' || r == '\r')) {
return "", fmt.Errorf("unescaped control character %U", r)
}
l.next()
growingString += string(r)
sb.WriteRune(r)
}
if l.peek() == eof {
@@ -543,7 +815,6 @@ func (l *tomlLexer) lexString() tomlLexStateFn {
}
str, err := l.lexStringAsString(terminator, discardLeadingNewLine, acceptNewLines)
if err != nil {
return l.errorf(err.Error())
}
@@ -615,6 +886,10 @@ func (l *tomlLexer) lexInsideTableKey() tomlLexStateFn {
func (l *tomlLexer) lexRightBracket() tomlLexStateFn {
l.next()
l.emit(tokenRightBracket)
if len(l.brackets) == 0 || l.brackets[len(l.brackets)-1] != '[' {
return l.errorf("cannot have ']' here")
}
l.brackets = l.brackets[:len(l.brackets)-1]
return l.lexRvalue
}
@@ -740,30 +1015,6 @@ func (l *tomlLexer) run() {
}
}
func init() {
// Regexp for all date/time formats supported by TOML.
// Group 1: nano precision
// Group 2: timezone
//
// /!\ also matches the empty string
//
// Example matches:
//1979-05-27T07:32:00Z
//1979-05-27T00:32:00-07:00
//1979-05-27T00:32:00.999999-07:00
//1979-05-27 07:32:00Z
//1979-05-27 00:32:00-07:00
//1979-05-27 00:32:00.999999-07:00
//1979-05-27T07:32:00
//1979-05-27T00:32:00.999999
//1979-05-27 07:32:00
//1979-05-27 00:32:00.999999
//1979-05-27
//07:32:00
//00:32:00.999999
dateRegexp = regexp.MustCompile(`^(?:\d{1,4}-\d{2}-\d{2})?(?:[T ]?\d{2}:\d{2}:\d{2}(\.\d{1,9})?(Z|[+-]\d{2}:\d{2})?)?`)
}
// Entry point
func lexToml(inputBytes []byte) []token {
runes := bytes.Runes(inputBytes)