mbcssm.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661
  1. ######################## BEGIN LICENSE BLOCK ########################
  2. # The Original Code is mozilla.org code.
  3. #
  4. # The Initial Developer of the Original Code is
  5. # Netscape Communications Corporation.
  6. # Portions created by the Initial Developer are Copyright (C) 1998
  7. # the Initial Developer. All Rights Reserved.
  8. #
  9. # Contributor(s):
  10. # Mark Pilgrim - port to Python
  11. #
  12. # This library is free software; you can redistribute it and/or
  13. # modify it under the terms of the GNU Lesser General Public
  14. # License as published by the Free Software Foundation; either
  15. # version 2.1 of the License, or (at your option) any later version.
  16. #
  17. # This library is distributed in the hope that it will be useful,
  18. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  19. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  20. # Lesser General Public License for more details.
  21. #
  22. # You should have received a copy of the GNU Lesser General Public
  23. # License along with this library; if not, write to the Free Software
  24. # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
  25. # 02110-1301 USA
  26. ######################### END LICENSE BLOCK #########################
  27. from .codingstatemachinedict import CodingStateMachineDict
  28. from .enums import MachineState
  29. # BIG5
  30. # fmt: off
  31. BIG5_CLS = (
  32. 1, 1, 1, 1, 1, 1, 1, 1, # 00 - 07 #allow 0x00 as legal value
  33. 1, 1, 1, 1, 1, 1, 0, 0, # 08 - 0f
  34. 1, 1, 1, 1, 1, 1, 1, 1, # 10 - 17
  35. 1, 1, 1, 0, 1, 1, 1, 1, # 18 - 1f
  36. 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 27
  37. 1, 1, 1, 1, 1, 1, 1, 1, # 28 - 2f
  38. 1, 1, 1, 1, 1, 1, 1, 1, # 30 - 37
  39. 1, 1, 1, 1, 1, 1, 1, 1, # 38 - 3f
  40. 2, 2, 2, 2, 2, 2, 2, 2, # 40 - 47
  41. 2, 2, 2, 2, 2, 2, 2, 2, # 48 - 4f
  42. 2, 2, 2, 2, 2, 2, 2, 2, # 50 - 57
  43. 2, 2, 2, 2, 2, 2, 2, 2, # 58 - 5f
  44. 2, 2, 2, 2, 2, 2, 2, 2, # 60 - 67
  45. 2, 2, 2, 2, 2, 2, 2, 2, # 68 - 6f
  46. 2, 2, 2, 2, 2, 2, 2, 2, # 70 - 77
  47. 2, 2, 2, 2, 2, 2, 2, 1, # 78 - 7f
  48. 4, 4, 4, 4, 4, 4, 4, 4, # 80 - 87
  49. 4, 4, 4, 4, 4, 4, 4, 4, # 88 - 8f
  50. 4, 4, 4, 4, 4, 4, 4, 4, # 90 - 97
  51. 4, 4, 4, 4, 4, 4, 4, 4, # 98 - 9f
  52. 4, 3, 3, 3, 3, 3, 3, 3, # a0 - a7
  53. 3, 3, 3, 3, 3, 3, 3, 3, # a8 - af
  54. 3, 3, 3, 3, 3, 3, 3, 3, # b0 - b7
  55. 3, 3, 3, 3, 3, 3, 3, 3, # b8 - bf
  56. 3, 3, 3, 3, 3, 3, 3, 3, # c0 - c7
  57. 3, 3, 3, 3, 3, 3, 3, 3, # c8 - cf
  58. 3, 3, 3, 3, 3, 3, 3, 3, # d0 - d7
  59. 3, 3, 3, 3, 3, 3, 3, 3, # d8 - df
  60. 3, 3, 3, 3, 3, 3, 3, 3, # e0 - e7
  61. 3, 3, 3, 3, 3, 3, 3, 3, # e8 - ef
  62. 3, 3, 3, 3, 3, 3, 3, 3, # f0 - f7
  63. 3, 3, 3, 3, 3, 3, 3, 0 # f8 - ff
  64. )
  65. BIG5_ST = (
  66. MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
  67. MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,#08-0f
  68. MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START#10-17
  69. )
  70. # fmt: on
  71. BIG5_CHAR_LEN_TABLE = (0, 1, 1, 2, 0)
  72. BIG5_SM_MODEL: CodingStateMachineDict = {
  73. "class_table": BIG5_CLS,
  74. "class_factor": 5,
  75. "state_table": BIG5_ST,
  76. "char_len_table": BIG5_CHAR_LEN_TABLE,
  77. "name": "Big5",
  78. }
  79. # CP949
  80. # fmt: off
  81. CP949_CLS = (
  82. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, # 00 - 0f
  83. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, # 10 - 1f
  84. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 2f
  85. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # 30 - 3f
  86. 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, # 40 - 4f
  87. 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1, 1, # 50 - 5f
  88. 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, # 60 - 6f
  89. 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1, 1, # 70 - 7f
  90. 0, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, # 80 - 8f
  91. 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, # 90 - 9f
  92. 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, # a0 - af
  93. 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, # b0 - bf
  94. 7, 7, 7, 7, 7, 7, 9, 2, 2, 3, 2, 2, 2, 2, 2, 2, # c0 - cf
  95. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, # d0 - df
  96. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, # e0 - ef
  97. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, # f0 - ff
  98. )
  99. CP949_ST = (
  100. #cls= 0 1 2 3 4 5 6 7 8 9 # previous state =
  101. MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START, 4, 5,MachineState.ERROR, 6, # MachineState.START
  102. MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, # MachineState.ERROR
  103. MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME, # MachineState.ITS_ME
  104. MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 3
  105. MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 4
  106. MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 5
  107. MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 6
  108. )
  109. # fmt: on
  110. CP949_CHAR_LEN_TABLE = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2)
  111. CP949_SM_MODEL: CodingStateMachineDict = {
  112. "class_table": CP949_CLS,
  113. "class_factor": 10,
  114. "state_table": CP949_ST,
  115. "char_len_table": CP949_CHAR_LEN_TABLE,
  116. "name": "CP949",
  117. }
  118. # EUC-JP
  119. # fmt: off
  120. EUCJP_CLS = (
  121. 4, 4, 4, 4, 4, 4, 4, 4, # 00 - 07
  122. 4, 4, 4, 4, 4, 4, 5, 5, # 08 - 0f
  123. 4, 4, 4, 4, 4, 4, 4, 4, # 10 - 17
  124. 4, 4, 4, 5, 4, 4, 4, 4, # 18 - 1f
  125. 4, 4, 4, 4, 4, 4, 4, 4, # 20 - 27
  126. 4, 4, 4, 4, 4, 4, 4, 4, # 28 - 2f
  127. 4, 4, 4, 4, 4, 4, 4, 4, # 30 - 37
  128. 4, 4, 4, 4, 4, 4, 4, 4, # 38 - 3f
  129. 4, 4, 4, 4, 4, 4, 4, 4, # 40 - 47
  130. 4, 4, 4, 4, 4, 4, 4, 4, # 48 - 4f
  131. 4, 4, 4, 4, 4, 4, 4, 4, # 50 - 57
  132. 4, 4, 4, 4, 4, 4, 4, 4, # 58 - 5f
  133. 4, 4, 4, 4, 4, 4, 4, 4, # 60 - 67
  134. 4, 4, 4, 4, 4, 4, 4, 4, # 68 - 6f
  135. 4, 4, 4, 4, 4, 4, 4, 4, # 70 - 77
  136. 4, 4, 4, 4, 4, 4, 4, 4, # 78 - 7f
  137. 5, 5, 5, 5, 5, 5, 5, 5, # 80 - 87
  138. 5, 5, 5, 5, 5, 5, 1, 3, # 88 - 8f
  139. 5, 5, 5, 5, 5, 5, 5, 5, # 90 - 97
  140. 5, 5, 5, 5, 5, 5, 5, 5, # 98 - 9f
  141. 5, 2, 2, 2, 2, 2, 2, 2, # a0 - a7
  142. 2, 2, 2, 2, 2, 2, 2, 2, # a8 - af
  143. 2, 2, 2, 2, 2, 2, 2, 2, # b0 - b7
  144. 2, 2, 2, 2, 2, 2, 2, 2, # b8 - bf
  145. 2, 2, 2, 2, 2, 2, 2, 2, # c0 - c7
  146. 2, 2, 2, 2, 2, 2, 2, 2, # c8 - cf
  147. 2, 2, 2, 2, 2, 2, 2, 2, # d0 - d7
  148. 2, 2, 2, 2, 2, 2, 2, 2, # d8 - df
  149. 0, 0, 0, 0, 0, 0, 0, 0, # e0 - e7
  150. 0, 0, 0, 0, 0, 0, 0, 0, # e8 - ef
  151. 0, 0, 0, 0, 0, 0, 0, 0, # f0 - f7
  152. 0, 0, 0, 0, 0, 0, 0, 5 # f8 - ff
  153. )
  154. EUCJP_ST = (
  155. 3, 4, 3, 5,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
  156. MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
  157. MachineState.ITS_ME,MachineState.ITS_ME,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17
  158. MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 3,MachineState.ERROR,#18-1f
  159. 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START#20-27
  160. )
  161. # fmt: on
  162. EUCJP_CHAR_LEN_TABLE = (2, 2, 2, 3, 1, 0)
  163. EUCJP_SM_MODEL: CodingStateMachineDict = {
  164. "class_table": EUCJP_CLS,
  165. "class_factor": 6,
  166. "state_table": EUCJP_ST,
  167. "char_len_table": EUCJP_CHAR_LEN_TABLE,
  168. "name": "EUC-JP",
  169. }
  170. # EUC-KR
  171. # fmt: off
  172. EUCKR_CLS = (
  173. 1, 1, 1, 1, 1, 1, 1, 1, # 00 - 07
  174. 1, 1, 1, 1, 1, 1, 0, 0, # 08 - 0f
  175. 1, 1, 1, 1, 1, 1, 1, 1, # 10 - 17
  176. 1, 1, 1, 0, 1, 1, 1, 1, # 18 - 1f
  177. 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 27
  178. 1, 1, 1, 1, 1, 1, 1, 1, # 28 - 2f
  179. 1, 1, 1, 1, 1, 1, 1, 1, # 30 - 37
  180. 1, 1, 1, 1, 1, 1, 1, 1, # 38 - 3f
  181. 1, 1, 1, 1, 1, 1, 1, 1, # 40 - 47
  182. 1, 1, 1, 1, 1, 1, 1, 1, # 48 - 4f
  183. 1, 1, 1, 1, 1, 1, 1, 1, # 50 - 57
  184. 1, 1, 1, 1, 1, 1, 1, 1, # 58 - 5f
  185. 1, 1, 1, 1, 1, 1, 1, 1, # 60 - 67
  186. 1, 1, 1, 1, 1, 1, 1, 1, # 68 - 6f
  187. 1, 1, 1, 1, 1, 1, 1, 1, # 70 - 77
  188. 1, 1, 1, 1, 1, 1, 1, 1, # 78 - 7f
  189. 0, 0, 0, 0, 0, 0, 0, 0, # 80 - 87
  190. 0, 0, 0, 0, 0, 0, 0, 0, # 88 - 8f
  191. 0, 0, 0, 0, 0, 0, 0, 0, # 90 - 97
  192. 0, 0, 0, 0, 0, 0, 0, 0, # 98 - 9f
  193. 0, 2, 2, 2, 2, 2, 2, 2, # a0 - a7
  194. 2, 2, 2, 2, 2, 3, 3, 3, # a8 - af
  195. 2, 2, 2, 2, 2, 2, 2, 2, # b0 - b7
  196. 2, 2, 2, 2, 2, 2, 2, 2, # b8 - bf
  197. 2, 2, 2, 2, 2, 2, 2, 2, # c0 - c7
  198. 2, 3, 2, 2, 2, 2, 2, 2, # c8 - cf
  199. 2, 2, 2, 2, 2, 2, 2, 2, # d0 - d7
  200. 2, 2, 2, 2, 2, 2, 2, 2, # d8 - df
  201. 2, 2, 2, 2, 2, 2, 2, 2, # e0 - e7
  202. 2, 2, 2, 2, 2, 2, 2, 2, # e8 - ef
  203. 2, 2, 2, 2, 2, 2, 2, 2, # f0 - f7
  204. 2, 2, 2, 2, 2, 2, 2, 0 # f8 - ff
  205. )
  206. EUCKR_ST = (
  207. MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
  208. MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #08-0f
  209. )
  210. # fmt: on
  211. EUCKR_CHAR_LEN_TABLE = (0, 1, 2, 0)
  212. EUCKR_SM_MODEL: CodingStateMachineDict = {
  213. "class_table": EUCKR_CLS,
  214. "class_factor": 4,
  215. "state_table": EUCKR_ST,
  216. "char_len_table": EUCKR_CHAR_LEN_TABLE,
  217. "name": "EUC-KR",
  218. }
  219. # JOHAB
  220. # fmt: off
  221. JOHAB_CLS = (
  222. 4,4,4,4,4,4,4,4, # 00 - 07
  223. 4,4,4,4,4,4,0,0, # 08 - 0f
  224. 4,4,4,4,4,4,4,4, # 10 - 17
  225. 4,4,4,0,4,4,4,4, # 18 - 1f
  226. 4,4,4,4,4,4,4,4, # 20 - 27
  227. 4,4,4,4,4,4,4,4, # 28 - 2f
  228. 4,3,3,3,3,3,3,3, # 30 - 37
  229. 3,3,3,3,3,3,3,3, # 38 - 3f
  230. 3,1,1,1,1,1,1,1, # 40 - 47
  231. 1,1,1,1,1,1,1,1, # 48 - 4f
  232. 1,1,1,1,1,1,1,1, # 50 - 57
  233. 1,1,1,1,1,1,1,1, # 58 - 5f
  234. 1,1,1,1,1,1,1,1, # 60 - 67
  235. 1,1,1,1,1,1,1,1, # 68 - 6f
  236. 1,1,1,1,1,1,1,1, # 70 - 77
  237. 1,1,1,1,1,1,1,2, # 78 - 7f
  238. 6,6,6,6,8,8,8,8, # 80 - 87
  239. 8,8,8,8,8,8,8,8, # 88 - 8f
  240. 8,7,7,7,7,7,7,7, # 90 - 97
  241. 7,7,7,7,7,7,7,7, # 98 - 9f
  242. 7,7,7,7,7,7,7,7, # a0 - a7
  243. 7,7,7,7,7,7,7,7, # a8 - af
  244. 7,7,7,7,7,7,7,7, # b0 - b7
  245. 7,7,7,7,7,7,7,7, # b8 - bf
  246. 7,7,7,7,7,7,7,7, # c0 - c7
  247. 7,7,7,7,7,7,7,7, # c8 - cf
  248. 7,7,7,7,5,5,5,5, # d0 - d7
  249. 5,9,9,9,9,9,9,5, # d8 - df
  250. 9,9,9,9,9,9,9,9, # e0 - e7
  251. 9,9,9,9,9,9,9,9, # e8 - ef
  252. 9,9,9,9,9,9,9,9, # f0 - f7
  253. 9,9,5,5,5,5,5,0 # f8 - ff
  254. )
  255. JOHAB_ST = (
  256. # cls = 0 1 2 3 4 5 6 7 8 9
  257. MachineState.ERROR ,MachineState.START ,MachineState.START ,MachineState.START ,MachineState.START ,MachineState.ERROR ,MachineState.ERROR ,3 ,3 ,4 , # MachineState.START
  258. MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME, # MachineState.ITS_ME
  259. MachineState.ERROR ,MachineState.ERROR ,MachineState.ERROR ,MachineState.ERROR ,MachineState.ERROR ,MachineState.ERROR ,MachineState.ERROR ,MachineState.ERROR ,MachineState.ERROR ,MachineState.ERROR , # MachineState.ERROR
  260. MachineState.ERROR ,MachineState.START ,MachineState.START ,MachineState.ERROR ,MachineState.ERROR ,MachineState.START ,MachineState.START ,MachineState.START ,MachineState.START ,MachineState.START , # 3
  261. MachineState.ERROR ,MachineState.START ,MachineState.ERROR ,MachineState.START ,MachineState.ERROR ,MachineState.START ,MachineState.ERROR ,MachineState.START ,MachineState.ERROR ,MachineState.START , # 4
  262. )
  263. # fmt: on
  264. JOHAB_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 0, 0, 2, 2, 2)
  265. JOHAB_SM_MODEL: CodingStateMachineDict = {
  266. "class_table": JOHAB_CLS,
  267. "class_factor": 10,
  268. "state_table": JOHAB_ST,
  269. "char_len_table": JOHAB_CHAR_LEN_TABLE,
  270. "name": "Johab",
  271. }
  272. # EUC-TW
  273. # fmt: off
  274. EUCTW_CLS = (
  275. 2, 2, 2, 2, 2, 2, 2, 2, # 00 - 07
  276. 2, 2, 2, 2, 2, 2, 0, 0, # 08 - 0f
  277. 2, 2, 2, 2, 2, 2, 2, 2, # 10 - 17
  278. 2, 2, 2, 0, 2, 2, 2, 2, # 18 - 1f
  279. 2, 2, 2, 2, 2, 2, 2, 2, # 20 - 27
  280. 2, 2, 2, 2, 2, 2, 2, 2, # 28 - 2f
  281. 2, 2, 2, 2, 2, 2, 2, 2, # 30 - 37
  282. 2, 2, 2, 2, 2, 2, 2, 2, # 38 - 3f
  283. 2, 2, 2, 2, 2, 2, 2, 2, # 40 - 47
  284. 2, 2, 2, 2, 2, 2, 2, 2, # 48 - 4f
  285. 2, 2, 2, 2, 2, 2, 2, 2, # 50 - 57
  286. 2, 2, 2, 2, 2, 2, 2, 2, # 58 - 5f
  287. 2, 2, 2, 2, 2, 2, 2, 2, # 60 - 67
  288. 2, 2, 2, 2, 2, 2, 2, 2, # 68 - 6f
  289. 2, 2, 2, 2, 2, 2, 2, 2, # 70 - 77
  290. 2, 2, 2, 2, 2, 2, 2, 2, # 78 - 7f
  291. 0, 0, 0, 0, 0, 0, 0, 0, # 80 - 87
  292. 0, 0, 0, 0, 0, 0, 6, 0, # 88 - 8f
  293. 0, 0, 0, 0, 0, 0, 0, 0, # 90 - 97
  294. 0, 0, 0, 0, 0, 0, 0, 0, # 98 - 9f
  295. 0, 3, 4, 4, 4, 4, 4, 4, # a0 - a7
  296. 5, 5, 1, 1, 1, 1, 1, 1, # a8 - af
  297. 1, 1, 1, 1, 1, 1, 1, 1, # b0 - b7
  298. 1, 1, 1, 1, 1, 1, 1, 1, # b8 - bf
  299. 1, 1, 3, 1, 3, 3, 3, 3, # c0 - c7
  300. 3, 3, 3, 3, 3, 3, 3, 3, # c8 - cf
  301. 3, 3, 3, 3, 3, 3, 3, 3, # d0 - d7
  302. 3, 3, 3, 3, 3, 3, 3, 3, # d8 - df
  303. 3, 3, 3, 3, 3, 3, 3, 3, # e0 - e7
  304. 3, 3, 3, 3, 3, 3, 3, 3, # e8 - ef
  305. 3, 3, 3, 3, 3, 3, 3, 3, # f0 - f7
  306. 3, 3, 3, 3, 3, 3, 3, 0 # f8 - ff
  307. )
  308. EUCTW_ST = (
  309. MachineState.ERROR,MachineState.ERROR,MachineState.START, 3, 3, 3, 4,MachineState.ERROR,#00-07
  310. MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
  311. MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.ERROR,#10-17
  312. MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
  313. 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,#20-27
  314. MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f
  315. )
  316. # fmt: on
  317. EUCTW_CHAR_LEN_TABLE = (0, 0, 1, 2, 2, 2, 3)
  318. EUCTW_SM_MODEL: CodingStateMachineDict = {
  319. "class_table": EUCTW_CLS,
  320. "class_factor": 7,
  321. "state_table": EUCTW_ST,
  322. "char_len_table": EUCTW_CHAR_LEN_TABLE,
  323. "name": "x-euc-tw",
  324. }
  325. # GB2312
  326. # fmt: off
  327. GB2312_CLS = (
  328. 1, 1, 1, 1, 1, 1, 1, 1, # 00 - 07
  329. 1, 1, 1, 1, 1, 1, 0, 0, # 08 - 0f
  330. 1, 1, 1, 1, 1, 1, 1, 1, # 10 - 17
  331. 1, 1, 1, 0, 1, 1, 1, 1, # 18 - 1f
  332. 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 27
  333. 1, 1, 1, 1, 1, 1, 1, 1, # 28 - 2f
  334. 3, 3, 3, 3, 3, 3, 3, 3, # 30 - 37
  335. 3, 3, 1, 1, 1, 1, 1, 1, # 38 - 3f
  336. 2, 2, 2, 2, 2, 2, 2, 2, # 40 - 47
  337. 2, 2, 2, 2, 2, 2, 2, 2, # 48 - 4f
  338. 2, 2, 2, 2, 2, 2, 2, 2, # 50 - 57
  339. 2, 2, 2, 2, 2, 2, 2, 2, # 58 - 5f
  340. 2, 2, 2, 2, 2, 2, 2, 2, # 60 - 67
  341. 2, 2, 2, 2, 2, 2, 2, 2, # 68 - 6f
  342. 2, 2, 2, 2, 2, 2, 2, 2, # 70 - 77
  343. 2, 2, 2, 2, 2, 2, 2, 4, # 78 - 7f
  344. 5, 6, 6, 6, 6, 6, 6, 6, # 80 - 87
  345. 6, 6, 6, 6, 6, 6, 6, 6, # 88 - 8f
  346. 6, 6, 6, 6, 6, 6, 6, 6, # 90 - 97
  347. 6, 6, 6, 6, 6, 6, 6, 6, # 98 - 9f
  348. 6, 6, 6, 6, 6, 6, 6, 6, # a0 - a7
  349. 6, 6, 6, 6, 6, 6, 6, 6, # a8 - af
  350. 6, 6, 6, 6, 6, 6, 6, 6, # b0 - b7
  351. 6, 6, 6, 6, 6, 6, 6, 6, # b8 - bf
  352. 6, 6, 6, 6, 6, 6, 6, 6, # c0 - c7
  353. 6, 6, 6, 6, 6, 6, 6, 6, # c8 - cf
  354. 6, 6, 6, 6, 6, 6, 6, 6, # d0 - d7
  355. 6, 6, 6, 6, 6, 6, 6, 6, # d8 - df
  356. 6, 6, 6, 6, 6, 6, 6, 6, # e0 - e7
  357. 6, 6, 6, 6, 6, 6, 6, 6, # e8 - ef
  358. 6, 6, 6, 6, 6, 6, 6, 6, # f0 - f7
  359. 6, 6, 6, 6, 6, 6, 6, 0 # f8 - ff
  360. )
  361. GB2312_ST = (
  362. MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, 3,MachineState.ERROR,#00-07
  363. MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
  364. MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,#10-17
  365. 4,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
  366. MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#20-27
  367. MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f
  368. )
  369. # fmt: on
  370. # To be accurate, the length of class 6 can be either 2 or 4.
  371. # But it is not necessary to discriminate between the two since
  372. # it is used for frequency analysis only, and we are validating
  373. # each code range there as well. So it is safe to set it to be
  374. # 2 here.
  375. GB2312_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 1, 2)
  376. GB2312_SM_MODEL: CodingStateMachineDict = {
  377. "class_table": GB2312_CLS,
  378. "class_factor": 7,
  379. "state_table": GB2312_ST,
  380. "char_len_table": GB2312_CHAR_LEN_TABLE,
  381. "name": "GB2312",
  382. }
  383. # Shift_JIS
  384. # fmt: off
  385. SJIS_CLS = (
  386. 1, 1, 1, 1, 1, 1, 1, 1, # 00 - 07
  387. 1, 1, 1, 1, 1, 1, 0, 0, # 08 - 0f
  388. 1, 1, 1, 1, 1, 1, 1, 1, # 10 - 17
  389. 1, 1, 1, 0, 1, 1, 1, 1, # 18 - 1f
  390. 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 27
  391. 1, 1, 1, 1, 1, 1, 1, 1, # 28 - 2f
  392. 1, 1, 1, 1, 1, 1, 1, 1, # 30 - 37
  393. 1, 1, 1, 1, 1, 1, 1, 1, # 38 - 3f
  394. 2, 2, 2, 2, 2, 2, 2, 2, # 40 - 47
  395. 2, 2, 2, 2, 2, 2, 2, 2, # 48 - 4f
  396. 2, 2, 2, 2, 2, 2, 2, 2, # 50 - 57
  397. 2, 2, 2, 2, 2, 2, 2, 2, # 58 - 5f
  398. 2, 2, 2, 2, 2, 2, 2, 2, # 60 - 67
  399. 2, 2, 2, 2, 2, 2, 2, 2, # 68 - 6f
  400. 2, 2, 2, 2, 2, 2, 2, 2, # 70 - 77
  401. 2, 2, 2, 2, 2, 2, 2, 1, # 78 - 7f
  402. 3, 3, 3, 3, 3, 2, 2, 3, # 80 - 87
  403. 3, 3, 3, 3, 3, 3, 3, 3, # 88 - 8f
  404. 3, 3, 3, 3, 3, 3, 3, 3, # 90 - 97
  405. 3, 3, 3, 3, 3, 3, 3, 3, # 98 - 9f
  406. #0xa0 is illegal in sjis encoding, but some pages does
  407. #contain such byte. We need to be more error forgiven.
  408. 2, 2, 2, 2, 2, 2, 2, 2, # a0 - a7
  409. 2, 2, 2, 2, 2, 2, 2, 2, # a8 - af
  410. 2, 2, 2, 2, 2, 2, 2, 2, # b0 - b7
  411. 2, 2, 2, 2, 2, 2, 2, 2, # b8 - bf
  412. 2, 2, 2, 2, 2, 2, 2, 2, # c0 - c7
  413. 2, 2, 2, 2, 2, 2, 2, 2, # c8 - cf
  414. 2, 2, 2, 2, 2, 2, 2, 2, # d0 - d7
  415. 2, 2, 2, 2, 2, 2, 2, 2, # d8 - df
  416. 3, 3, 3, 3, 3, 3, 3, 3, # e0 - e7
  417. 3, 3, 3, 3, 3, 4, 4, 4, # e8 - ef
  418. 3, 3, 3, 3, 3, 3, 3, 3, # f0 - f7
  419. 3, 3, 3, 3, 3, 0, 0, 0, # f8 - ff
  420. )
  421. SJIS_ST = (
  422. MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
  423. MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
  424. MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START #10-17
  425. )
  426. # fmt: on
  427. SJIS_CHAR_LEN_TABLE = (0, 1, 1, 2, 0, 0)
  428. SJIS_SM_MODEL: CodingStateMachineDict = {
  429. "class_table": SJIS_CLS,
  430. "class_factor": 6,
  431. "state_table": SJIS_ST,
  432. "char_len_table": SJIS_CHAR_LEN_TABLE,
  433. "name": "Shift_JIS",
  434. }
  435. # UCS2-BE
  436. # fmt: off
  437. UCS2BE_CLS = (
  438. 0, 0, 0, 0, 0, 0, 0, 0, # 00 - 07
  439. 0, 0, 1, 0, 0, 2, 0, 0, # 08 - 0f
  440. 0, 0, 0, 0, 0, 0, 0, 0, # 10 - 17
  441. 0, 0, 0, 3, 0, 0, 0, 0, # 18 - 1f
  442. 0, 0, 0, 0, 0, 0, 0, 0, # 20 - 27
  443. 0, 3, 3, 3, 3, 3, 0, 0, # 28 - 2f
  444. 0, 0, 0, 0, 0, 0, 0, 0, # 30 - 37
  445. 0, 0, 0, 0, 0, 0, 0, 0, # 38 - 3f
  446. 0, 0, 0, 0, 0, 0, 0, 0, # 40 - 47
  447. 0, 0, 0, 0, 0, 0, 0, 0, # 48 - 4f
  448. 0, 0, 0, 0, 0, 0, 0, 0, # 50 - 57
  449. 0, 0, 0, 0, 0, 0, 0, 0, # 58 - 5f
  450. 0, 0, 0, 0, 0, 0, 0, 0, # 60 - 67
  451. 0, 0, 0, 0, 0, 0, 0, 0, # 68 - 6f
  452. 0, 0, 0, 0, 0, 0, 0, 0, # 70 - 77
  453. 0, 0, 0, 0, 0, 0, 0, 0, # 78 - 7f
  454. 0, 0, 0, 0, 0, 0, 0, 0, # 80 - 87
  455. 0, 0, 0, 0, 0, 0, 0, 0, # 88 - 8f
  456. 0, 0, 0, 0, 0, 0, 0, 0, # 90 - 97
  457. 0, 0, 0, 0, 0, 0, 0, 0, # 98 - 9f
  458. 0, 0, 0, 0, 0, 0, 0, 0, # a0 - a7
  459. 0, 0, 0, 0, 0, 0, 0, 0, # a8 - af
  460. 0, 0, 0, 0, 0, 0, 0, 0, # b0 - b7
  461. 0, 0, 0, 0, 0, 0, 0, 0, # b8 - bf
  462. 0, 0, 0, 0, 0, 0, 0, 0, # c0 - c7
  463. 0, 0, 0, 0, 0, 0, 0, 0, # c8 - cf
  464. 0, 0, 0, 0, 0, 0, 0, 0, # d0 - d7
  465. 0, 0, 0, 0, 0, 0, 0, 0, # d8 - df
  466. 0, 0, 0, 0, 0, 0, 0, 0, # e0 - e7
  467. 0, 0, 0, 0, 0, 0, 0, 0, # e8 - ef
  468. 0, 0, 0, 0, 0, 0, 0, 0, # f0 - f7
  469. 0, 0, 0, 0, 0, 0, 4, 5 # f8 - ff
  470. )
  471. UCS2BE_ST = (
  472. 5, 7, 7,MachineState.ERROR, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07
  473. MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
  474. MachineState.ITS_ME,MachineState.ITS_ME, 6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,#10-17
  475. 6, 6, 6, 6, 6,MachineState.ITS_ME, 6, 6,#18-1f
  476. 6, 6, 6, 6, 5, 7, 7,MachineState.ERROR,#20-27
  477. 5, 8, 6, 6,MachineState.ERROR, 6, 6, 6,#28-2f
  478. 6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #30-37
  479. )
  480. # fmt: on
  481. UCS2BE_CHAR_LEN_TABLE = (2, 2, 2, 0, 2, 2)
  482. UCS2BE_SM_MODEL: CodingStateMachineDict = {
  483. "class_table": UCS2BE_CLS,
  484. "class_factor": 6,
  485. "state_table": UCS2BE_ST,
  486. "char_len_table": UCS2BE_CHAR_LEN_TABLE,
  487. "name": "UTF-16BE",
  488. }
  489. # UCS2-LE
  490. # fmt: off
  491. UCS2LE_CLS = (
  492. 0, 0, 0, 0, 0, 0, 0, 0, # 00 - 07
  493. 0, 0, 1, 0, 0, 2, 0, 0, # 08 - 0f
  494. 0, 0, 0, 0, 0, 0, 0, 0, # 10 - 17
  495. 0, 0, 0, 3, 0, 0, 0, 0, # 18 - 1f
  496. 0, 0, 0, 0, 0, 0, 0, 0, # 20 - 27
  497. 0, 3, 3, 3, 3, 3, 0, 0, # 28 - 2f
  498. 0, 0, 0, 0, 0, 0, 0, 0, # 30 - 37
  499. 0, 0, 0, 0, 0, 0, 0, 0, # 38 - 3f
  500. 0, 0, 0, 0, 0, 0, 0, 0, # 40 - 47
  501. 0, 0, 0, 0, 0, 0, 0, 0, # 48 - 4f
  502. 0, 0, 0, 0, 0, 0, 0, 0, # 50 - 57
  503. 0, 0, 0, 0, 0, 0, 0, 0, # 58 - 5f
  504. 0, 0, 0, 0, 0, 0, 0, 0, # 60 - 67
  505. 0, 0, 0, 0, 0, 0, 0, 0, # 68 - 6f
  506. 0, 0, 0, 0, 0, 0, 0, 0, # 70 - 77
  507. 0, 0, 0, 0, 0, 0, 0, 0, # 78 - 7f
  508. 0, 0, 0, 0, 0, 0, 0, 0, # 80 - 87
  509. 0, 0, 0, 0, 0, 0, 0, 0, # 88 - 8f
  510. 0, 0, 0, 0, 0, 0, 0, 0, # 90 - 97
  511. 0, 0, 0, 0, 0, 0, 0, 0, # 98 - 9f
  512. 0, 0, 0, 0, 0, 0, 0, 0, # a0 - a7
  513. 0, 0, 0, 0, 0, 0, 0, 0, # a8 - af
  514. 0, 0, 0, 0, 0, 0, 0, 0, # b0 - b7
  515. 0, 0, 0, 0, 0, 0, 0, 0, # b8 - bf
  516. 0, 0, 0, 0, 0, 0, 0, 0, # c0 - c7
  517. 0, 0, 0, 0, 0, 0, 0, 0, # c8 - cf
  518. 0, 0, 0, 0, 0, 0, 0, 0, # d0 - d7
  519. 0, 0, 0, 0, 0, 0, 0, 0, # d8 - df
  520. 0, 0, 0, 0, 0, 0, 0, 0, # e0 - e7
  521. 0, 0, 0, 0, 0, 0, 0, 0, # e8 - ef
  522. 0, 0, 0, 0, 0, 0, 0, 0, # f0 - f7
  523. 0, 0, 0, 0, 0, 0, 4, 5 # f8 - ff
  524. )
  525. UCS2LE_ST = (
  526. 6, 6, 7, 6, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07
  527. MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
  528. MachineState.ITS_ME,MachineState.ITS_ME, 5, 5, 5,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#10-17
  529. 5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR, 6, 6,#18-1f
  530. 7, 6, 8, 8, 5, 5, 5,MachineState.ERROR,#20-27
  531. 5, 5, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5,#28-2f
  532. 5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR,MachineState.START,MachineState.START #30-37
  533. )
  534. # fmt: on
  535. UCS2LE_CHAR_LEN_TABLE = (2, 2, 2, 2, 2, 2)
  536. UCS2LE_SM_MODEL: CodingStateMachineDict = {
  537. "class_table": UCS2LE_CLS,
  538. "class_factor": 6,
  539. "state_table": UCS2LE_ST,
  540. "char_len_table": UCS2LE_CHAR_LEN_TABLE,
  541. "name": "UTF-16LE",
  542. }
  543. # UTF-8
  544. # fmt: off
  545. UTF8_CLS = (
  546. 1, 1, 1, 1, 1, 1, 1, 1, # 00 - 07 #allow 0x00 as a legal value
  547. 1, 1, 1, 1, 1, 1, 0, 0, # 08 - 0f
  548. 1, 1, 1, 1, 1, 1, 1, 1, # 10 - 17
  549. 1, 1, 1, 0, 1, 1, 1, 1, # 18 - 1f
  550. 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 27
  551. 1, 1, 1, 1, 1, 1, 1, 1, # 28 - 2f
  552. 1, 1, 1, 1, 1, 1, 1, 1, # 30 - 37
  553. 1, 1, 1, 1, 1, 1, 1, 1, # 38 - 3f
  554. 1, 1, 1, 1, 1, 1, 1, 1, # 40 - 47
  555. 1, 1, 1, 1, 1, 1, 1, 1, # 48 - 4f
  556. 1, 1, 1, 1, 1, 1, 1, 1, # 50 - 57
  557. 1, 1, 1, 1, 1, 1, 1, 1, # 58 - 5f
  558. 1, 1, 1, 1, 1, 1, 1, 1, # 60 - 67
  559. 1, 1, 1, 1, 1, 1, 1, 1, # 68 - 6f
  560. 1, 1, 1, 1, 1, 1, 1, 1, # 70 - 77
  561. 1, 1, 1, 1, 1, 1, 1, 1, # 78 - 7f
  562. 2, 2, 2, 2, 3, 3, 3, 3, # 80 - 87
  563. 4, 4, 4, 4, 4, 4, 4, 4, # 88 - 8f
  564. 4, 4, 4, 4, 4, 4, 4, 4, # 90 - 97
  565. 4, 4, 4, 4, 4, 4, 4, 4, # 98 - 9f
  566. 5, 5, 5, 5, 5, 5, 5, 5, # a0 - a7
  567. 5, 5, 5, 5, 5, 5, 5, 5, # a8 - af
  568. 5, 5, 5, 5, 5, 5, 5, 5, # b0 - b7
  569. 5, 5, 5, 5, 5, 5, 5, 5, # b8 - bf
  570. 0, 0, 6, 6, 6, 6, 6, 6, # c0 - c7
  571. 6, 6, 6, 6, 6, 6, 6, 6, # c8 - cf
  572. 6, 6, 6, 6, 6, 6, 6, 6, # d0 - d7
  573. 6, 6, 6, 6, 6, 6, 6, 6, # d8 - df
  574. 7, 8, 8, 8, 8, 8, 8, 8, # e0 - e7
  575. 8, 8, 8, 8, 8, 9, 8, 8, # e8 - ef
  576. 10, 11, 11, 11, 11, 11, 11, 11, # f0 - f7
  577. 12, 13, 13, 13, 14, 15, 0, 0 # f8 - ff
  578. )
  579. UTF8_ST = (
  580. MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12, 10,#00-07
  581. 9, 11, 8, 7, 6, 5, 4, 3,#08-0f
  582. MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17
  583. MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
  584. MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#20-27
  585. MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#28-2f
  586. MachineState.ERROR,MachineState.ERROR, 5, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#30-37
  587. MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#38-3f
  588. MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#40-47
  589. MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#48-4f
  590. MachineState.ERROR,MachineState.ERROR, 7, 7, 7, 7,MachineState.ERROR,MachineState.ERROR,#50-57
  591. MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#58-5f
  592. MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 7, 7,MachineState.ERROR,MachineState.ERROR,#60-67
  593. MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#68-6f
  594. MachineState.ERROR,MachineState.ERROR, 9, 9, 9, 9,MachineState.ERROR,MachineState.ERROR,#70-77
  595. MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#78-7f
  596. MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 9,MachineState.ERROR,MachineState.ERROR,#80-87
  597. MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#88-8f
  598. MachineState.ERROR,MachineState.ERROR, 12, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,#90-97
  599. MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#98-9f
  600. MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12,MachineState.ERROR,MachineState.ERROR,#a0-a7
  601. MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#a8-af
  602. MachineState.ERROR,MachineState.ERROR, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b0-b7
  603. MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b8-bf
  604. MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,#c0-c7
  605. MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR #c8-cf
  606. )
  607. # fmt: on
  608. UTF8_CHAR_LEN_TABLE = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6)
  609. UTF8_SM_MODEL: CodingStateMachineDict = {
  610. "class_table": UTF8_CLS,
  611. "class_factor": 16,
  612. "state_table": UTF8_ST,
  613. "char_len_table": UTF8_CHAR_LEN_TABLE,
  614. "name": "UTF-8",
  615. }