tokenizer.go 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. package tokenizer
  2. type TokenType = string
  3. const (
  4. Underline TokenType = "_"
  5. Star TokenType = "*"
  6. Hash TokenType = "#"
  7. Backtick TokenType = "`"
  8. LeftSquareBracket TokenType = "["
  9. RightSquareBracket TokenType = "]"
  10. LeftParenthesis TokenType = "("
  11. RightParenthesis TokenType = ")"
  12. ExclamationMark TokenType = "!"
  13. Newline TokenType = "\n"
  14. Space TokenType = " "
  15. )
  16. const (
  17. Text TokenType = ""
  18. )
  19. type Token struct {
  20. Type TokenType
  21. Value string
  22. }
  23. func NewToken(tp, text string) *Token {
  24. return &Token{
  25. Type: tp,
  26. Value: text,
  27. }
  28. }
  29. func Tokenize(text string) []*Token {
  30. tokens := []*Token{}
  31. for _, c := range text {
  32. switch c {
  33. case '_':
  34. tokens = append(tokens, NewToken(Underline, "_"))
  35. case '*':
  36. tokens = append(tokens, NewToken(Star, "*"))
  37. case '#':
  38. tokens = append(tokens, NewToken(Hash, "#"))
  39. case '`':
  40. tokens = append(tokens, NewToken(Backtick, "`"))
  41. case '[':
  42. tokens = append(tokens, NewToken(LeftSquareBracket, "["))
  43. case ']':
  44. tokens = append(tokens, NewToken(RightSquareBracket, "]"))
  45. case '(':
  46. tokens = append(tokens, NewToken(LeftParenthesis, "("))
  47. case ')':
  48. tokens = append(tokens, NewToken(RightParenthesis, ")"))
  49. case '!':
  50. tokens = append(tokens, NewToken(ExclamationMark, "!"))
  51. case '\n':
  52. tokens = append(tokens, NewToken(Newline, "\n"))
  53. case ' ':
  54. tokens = append(tokens, NewToken(Space, " "))
  55. default:
  56. var lastToken *Token
  57. if len(tokens) > 0 {
  58. lastToken = tokens[len(tokens)-1]
  59. }
  60. if lastToken == nil || lastToken.Type != Text {
  61. tokens = append(tokens, NewToken(Text, string(c)))
  62. } else {
  63. lastToken.Value += string(c)
  64. }
  65. }
  66. }
  67. return tokens
  68. }