package argv import ( "unicode" ) // Scanner is a cmdline string scanner. // // It split cmdline string to tokens: space, string, pipe, reverse quote string. type Scanner struct { text []rune rpos int } // NewScanner create a scanner and init it's internal states. func NewScanner(text string) *Scanner { return &Scanner{ text: []rune(text), } } const _RuneEOF = 0 func (s *Scanner) nextRune() rune { if s.rpos >= len(s.text) { return _RuneEOF } r := s.text[s.rpos] s.rpos++ return r } func (s *Scanner) unreadRune(r rune) { if r != _RuneEOF { s.rpos-- } } func (s *Scanner) isEscapeChars(r rune) (rune, bool) { switch r { case 'a': return '\a', true case 'b': return '\b', true case 'f': return '\f', true case 'n': return '\n', true case 'r': return '\r', true case 't': return '\t', true case 'v': return '\v', true case '\\': return '\\', true case '$': return '$', true } return r, false } // TokenType is the type of tokens recognized by the scanner. type TokenType uint32 // Token is generated by the scanner with a type and value. type Token struct { Type TokenType Value []rune } const ( // TokString for string TokString TokenType = iota + 1 TokStringSingleQuote TokStringDoubleQuote // TokPipe is the '|' character TokPipe // TokBackQuote is reverse quoted string TokBackQuote // TokSpace represent space character sequence TokSpace // TokEOF means the input end. TokEOF ) // Next return next token, if it reach the end, TOK_EOF will be returned. // // Error is returned for invalid syntax such as unpaired quotes. func (s *Scanner) Next() (Token, error) { const ( Initial = iota + 1 Space BackQuote String StringSingleQuote StringDoubleQuote ) var ( tok Token state uint8 = Initial ) for { r := s.nextRune() switch state { case Initial: switch { case r == _RuneEOF: tok.Type = TokEOF return tok, nil case r == '|': tok.Type = TokPipe return tok, nil case r == '`': state = BackQuote case unicode.IsSpace(r): state = Space s.unreadRune(r) case r == '\'': state = StringSingleQuote case r == '"': state = StringDoubleQuote default: state = String s.unreadRune(r) } case Space: if r == _RuneEOF || !unicode.IsSpace(r) { s.unreadRune(r) tok.Type = TokSpace return tok, nil } case BackQuote: switch r { case _RuneEOF: return tok, ErrInvalidSyntax case '`': tok.Type = TokBackQuote return tok, nil default: tok.Value = append(tok.Value, r) } case String: switch { case r == _RuneEOF, r == '|', r == '`', r == '\'', r == '"', unicode.IsSpace(r): tok.Type = TokString s.unreadRune(r) return tok, nil case r == '\\': nr := s.nextRune() if nr == _RuneEOF { return tok, ErrInvalidSyntax } tok.Value = append(tok.Value, nr) default: tok.Value = append(tok.Value, r) } case StringSingleQuote, StringDoubleQuote: switch r { case _RuneEOF: return tok, ErrInvalidSyntax case '\'', '"': if singleQuote := state == StringSingleQuote; singleQuote == (r == '\'') { if singleQuote { tok.Type = TokStringSingleQuote } else { tok.Type = TokStringDoubleQuote } return tok, nil } else { tok.Value = append(tok.Value, r) } case '\\': nr := s.nextRune() if escape, ok := s.isEscapeChars(nr); ok { tok.Value = append(tok.Value, escape) } else { tok.Value = append(tok.Value, r) s.unreadRune(nr) } default: tok.Value = append(tok.Value, r) } } } } // Scan is a utility function help split input text as tokens. func Scan(text string) ([]Token, error) { s := NewScanner(text) var tokens []Token for { tok, err := s.Next() if err != nil { return nil, err } tokens = append(tokens, tok) if tok.Type == TokEOF { break } } return tokens, nil }