207 lines
3.8 KiB
Go
207 lines
3.8 KiB
Go
package argv
|
|
|
|
import (
|
|
"unicode"
|
|
)
|
|
|
|
// Scanner is a cmdline string scanner.
|
|
//
|
|
// It split cmdline string to tokens: space, string, pipe, reverse quote string.
|
|
type Scanner struct {
|
|
text []rune
|
|
rpos int
|
|
}
|
|
|
|
// NewScanner create a scanner and init it's internal states.
|
|
func NewScanner(text string) *Scanner {
|
|
return &Scanner{
|
|
text: []rune(text),
|
|
}
|
|
}
|
|
|
|
const _RuneEOF = 0
|
|
|
|
func (s *Scanner) nextRune() rune {
|
|
if s.rpos >= len(s.text) {
|
|
return _RuneEOF
|
|
}
|
|
|
|
r := s.text[s.rpos]
|
|
s.rpos++
|
|
return r
|
|
}
|
|
|
|
func (s *Scanner) unreadRune(r rune) {
|
|
if r != _RuneEOF {
|
|
s.rpos--
|
|
}
|
|
}
|
|
|
|
func (s *Scanner) isEscapeChars(r rune) (rune, bool) {
|
|
switch r {
|
|
case 'a':
|
|
return '\a', true
|
|
case 'b':
|
|
return '\b', true
|
|
case 'f':
|
|
return '\f', true
|
|
case 'n':
|
|
return '\n', true
|
|
case 'r':
|
|
return '\r', true
|
|
case 't':
|
|
return '\t', true
|
|
case 'v':
|
|
return '\v', true
|
|
case '\\':
|
|
return '\\', true
|
|
case '$':
|
|
return '$', true
|
|
}
|
|
return r, false
|
|
}
|
|
|
|
// TokenType is the type of tokens recognized by the scanner.
|
|
type TokenType uint32
|
|
|
|
// Token is generated by the scanner with a type and value.
|
|
type Token struct {
|
|
Type TokenType
|
|
Value []rune
|
|
}
|
|
|
|
const (
|
|
// TokString for string
|
|
TokString TokenType = iota + 1
|
|
TokStringSingleQuote
|
|
TokStringDoubleQuote
|
|
// TokPipe is the '|' character
|
|
TokPipe
|
|
// TokBackQuote is reverse quoted string
|
|
TokBackQuote
|
|
// TokSpace represent space character sequence
|
|
TokSpace
|
|
// TokEOF means the input end.
|
|
TokEOF
|
|
)
|
|
|
|
// Next return next token, if it reach the end, TOK_EOF will be returned.
|
|
//
|
|
// Error is returned for invalid syntax such as unpaired quotes.
|
|
func (s *Scanner) Next() (Token, error) {
|
|
const (
|
|
Initial = iota + 1
|
|
Space
|
|
BackQuote
|
|
String
|
|
StringSingleQuote
|
|
StringDoubleQuote
|
|
)
|
|
|
|
var (
|
|
tok Token
|
|
|
|
state uint8 = Initial
|
|
)
|
|
for {
|
|
r := s.nextRune()
|
|
switch state {
|
|
case Initial:
|
|
switch {
|
|
case r == _RuneEOF:
|
|
tok.Type = TokEOF
|
|
return tok, nil
|
|
case r == '|':
|
|
tok.Type = TokPipe
|
|
return tok, nil
|
|
case r == '`':
|
|
state = BackQuote
|
|
case unicode.IsSpace(r):
|
|
state = Space
|
|
s.unreadRune(r)
|
|
case r == '\'':
|
|
state = StringSingleQuote
|
|
case r == '"':
|
|
state = StringDoubleQuote
|
|
default:
|
|
state = String
|
|
s.unreadRune(r)
|
|
}
|
|
case Space:
|
|
if r == _RuneEOF || !unicode.IsSpace(r) {
|
|
s.unreadRune(r)
|
|
tok.Type = TokSpace
|
|
return tok, nil
|
|
}
|
|
case BackQuote:
|
|
switch r {
|
|
case _RuneEOF:
|
|
return tok, ErrInvalidSyntax
|
|
case '`':
|
|
tok.Type = TokBackQuote
|
|
return tok, nil
|
|
default:
|
|
tok.Value = append(tok.Value, r)
|
|
}
|
|
case String:
|
|
switch {
|
|
case r == _RuneEOF, r == '|', r == '`', r == '\'', r == '"', unicode.IsSpace(r):
|
|
tok.Type = TokString
|
|
s.unreadRune(r)
|
|
return tok, nil
|
|
case r == '\\':
|
|
nr := s.nextRune()
|
|
if nr == _RuneEOF {
|
|
return tok, ErrInvalidSyntax
|
|
}
|
|
tok.Value = append(tok.Value, nr)
|
|
default:
|
|
tok.Value = append(tok.Value, r)
|
|
}
|
|
case StringSingleQuote, StringDoubleQuote:
|
|
switch r {
|
|
case _RuneEOF:
|
|
return tok, ErrInvalidSyntax
|
|
case '\'', '"':
|
|
if singleQuote := state == StringSingleQuote; singleQuote == (r == '\'') {
|
|
if singleQuote {
|
|
tok.Type = TokStringSingleQuote
|
|
} else {
|
|
tok.Type = TokStringDoubleQuote
|
|
}
|
|
return tok, nil
|
|
} else {
|
|
tok.Value = append(tok.Value, r)
|
|
}
|
|
case '\\':
|
|
nr := s.nextRune()
|
|
if escape, ok := s.isEscapeChars(nr); ok {
|
|
tok.Value = append(tok.Value, escape)
|
|
} else {
|
|
tok.Value = append(tok.Value, r)
|
|
s.unreadRune(nr)
|
|
}
|
|
default:
|
|
tok.Value = append(tok.Value, r)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Scan is a utility function help split input text as tokens.
|
|
func Scan(text string) ([]Token, error) {
|
|
s := NewScanner(text)
|
|
var tokens []Token
|
|
for {
|
|
tok, err := s.Next()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
tokens = append(tokens, tok)
|
|
if tok.Type == TokEOF {
|
|
break
|
|
}
|
|
}
|
|
return tokens, nil
|
|
}
|