delve/vendor/github.com/cosiner/argv/scanner.go
2020-06-24 10:00:37 -07:00

207 lines
3.8 KiB
Go

package argv
import (
"unicode"
)
// Scanner is a cmdline string scanner.
//
// It split cmdline string to tokens: space, string, pipe, reverse quote string.
type Scanner struct {
text []rune
rpos int
}
// NewScanner create a scanner and init it's internal states.
func NewScanner(text string) *Scanner {
return &Scanner{
text: []rune(text),
}
}
const _RuneEOF = 0
func (s *Scanner) nextRune() rune {
if s.rpos >= len(s.text) {
return _RuneEOF
}
r := s.text[s.rpos]
s.rpos++
return r
}
func (s *Scanner) unreadRune(r rune) {
if r != _RuneEOF {
s.rpos--
}
}
func (s *Scanner) isEscapeChars(r rune) (rune, bool) {
switch r {
case 'a':
return '\a', true
case 'b':
return '\b', true
case 'f':
return '\f', true
case 'n':
return '\n', true
case 'r':
return '\r', true
case 't':
return '\t', true
case 'v':
return '\v', true
case '\\':
return '\\', true
case '$':
return '$', true
}
return r, false
}
// TokenType is the type of tokens recognized by the scanner.
type TokenType uint32
// Token is generated by the scanner with a type and value.
type Token struct {
Type TokenType
Value []rune
}
const (
// TokString for string
TokString TokenType = iota + 1
TokStringSingleQuote
TokStringDoubleQuote
// TokPipe is the '|' character
TokPipe
// TokBackQuote is reverse quoted string
TokBackQuote
// TokSpace represent space character sequence
TokSpace
// TokEOF means the input end.
TokEOF
)
// Next return next token, if it reach the end, TOK_EOF will be returned.
//
// Error is returned for invalid syntax such as unpaired quotes.
func (s *Scanner) Next() (Token, error) {
const (
Initial = iota + 1
Space
BackQuote
String
StringSingleQuote
StringDoubleQuote
)
var (
tok Token
state uint8 = Initial
)
for {
r := s.nextRune()
switch state {
case Initial:
switch {
case r == _RuneEOF:
tok.Type = TokEOF
return tok, nil
case r == '|':
tok.Type = TokPipe
return tok, nil
case r == '`':
state = BackQuote
case unicode.IsSpace(r):
state = Space
s.unreadRune(r)
case r == '\'':
state = StringSingleQuote
case r == '"':
state = StringDoubleQuote
default:
state = String
s.unreadRune(r)
}
case Space:
if r == _RuneEOF || !unicode.IsSpace(r) {
s.unreadRune(r)
tok.Type = TokSpace
return tok, nil
}
case BackQuote:
switch r {
case _RuneEOF:
return tok, ErrInvalidSyntax
case '`':
tok.Type = TokBackQuote
return tok, nil
default:
tok.Value = append(tok.Value, r)
}
case String:
switch {
case r == _RuneEOF, r == '|', r == '`', r == '\'', r == '"', unicode.IsSpace(r):
tok.Type = TokString
s.unreadRune(r)
return tok, nil
case r == '\\':
nr := s.nextRune()
if nr == _RuneEOF {
return tok, ErrInvalidSyntax
}
tok.Value = append(tok.Value, nr)
default:
tok.Value = append(tok.Value, r)
}
case StringSingleQuote, StringDoubleQuote:
switch r {
case _RuneEOF:
return tok, ErrInvalidSyntax
case '\'', '"':
if singleQuote := state == StringSingleQuote; singleQuote == (r == '\'') {
if singleQuote {
tok.Type = TokStringSingleQuote
} else {
tok.Type = TokStringDoubleQuote
}
return tok, nil
} else {
tok.Value = append(tok.Value, r)
}
case '\\':
nr := s.nextRune()
if escape, ok := s.isEscapeChars(nr); ok {
tok.Value = append(tok.Value, escape)
} else {
tok.Value = append(tok.Value, r)
s.unreadRune(nr)
}
default:
tok.Value = append(tok.Value, r)
}
}
}
}
// Scan is a utility function help split input text as tokens.
func Scan(text string) ([]Token, error) {
s := NewScanner(text)
var tokens []Token
for {
tok, err := s.Next()
if err != nil {
return nil, err
}
tokens = append(tokens, tok)
if tok.Type == TokEOF {
break
}
}
return tokens, nil
}