genpages.c 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. /**
  2. * @file genpages.c
  3. * @brief generate required font page files
  4. * @author Yunhui Fu (yhfudev@gmail.com)
  5. * @version 1.0
  6. * @date 2015-02-19
  7. * @copyright Yunhui Fu (2015)
  8. */
  9. #include <stdio.h>
  10. #include <stdint.h> /* uint8_t */
  11. #include <stdlib.h> /* size_t */
  12. #include <string.h>
  13. #include <assert.h>
  14. #include "getline.h"
  15. wchar_t get_val_utf82uni(uint8_t *pstart) {
  16. size_t cntleft;
  17. wchar_t retval = 0;
  18. if (0 == (0x80 & *pstart)) return *pstart;
  19. if (((*pstart & 0xE0) ^ 0xC0) == 0) {
  20. cntleft = 1;
  21. retval = *pstart & ~0xE0;
  22. }
  23. else if (((*pstart & 0xF0) ^ 0xE0) == 0) {
  24. cntleft = 2;
  25. retval = *pstart & ~0xF0;
  26. }
  27. else if (((*pstart & 0xF8) ^ 0xF0) == 0) {
  28. cntleft = 3;
  29. retval = *pstart & ~0xF8;
  30. }
  31. else if (((*pstart & 0xFC) ^ 0xF8) == 0) {
  32. cntleft = 4;
  33. retval = *pstart & ~0xFC;
  34. }
  35. else if (((*pstart & 0xFE) ^ 0xFC) == 0) {
  36. cntleft = 5;
  37. retval = *pstart & ~0xFE;
  38. }
  39. else {
  40. /* encoding error */
  41. cntleft = 0;
  42. retval = 0;
  43. }
  44. pstart++;
  45. for (; cntleft > 0; cntleft --) {
  46. retval <<= 6;
  47. retval |= *pstart & 0x3F;
  48. pstart++;
  49. }
  50. return retval;
  51. }
  52. /**
  53. * @brief 转换 UTF-8 编码的一个字符为本地的 Unicode 字符(wchar_t)
  54. *
  55. * @param pstart : 存储 UTF-8 字符的指针
  56. * @param pval : 需要返回的 Unicode 字符存放地址指针
  57. *
  58. * @return 成功返回下个 UTF-8 字符的位置
  59. *
  60. * 转换 UTF-8 编码的一个字符为本地的 Unicode 字符(wchar_t)
  61. */
  62. uint8_t* get_utf8_value(uint8_t *pstart, wchar_t *pval) {
  63. uint32_t val = 0;
  64. const uint8_t *p = pstart;
  65. /*size_t maxlen = strlen(pstart);*/
  66. assert(NULL != pstart);
  67. #define NEXT_6_BITS() do{ val <<= 6; p++; val |= (*p & 0x3F); }while(0)
  68. if (0 == (0x80 & *p)) {
  69. val = (size_t)*p;
  70. p++;
  71. }
  72. else if (0xC0 == (0xE0 & *p)) {
  73. val = *p & 0x1F;
  74. NEXT_6_BITS();
  75. p++;
  76. assert((wchar_t)val == get_val_utf82uni(pstart));
  77. }
  78. else if (0xE0 == (0xF0 & *p)) {
  79. val = *p & 0x0F;
  80. NEXT_6_BITS();
  81. NEXT_6_BITS();
  82. p++;
  83. assert((wchar_t)val == get_val_utf82uni(pstart));
  84. }
  85. else if (0xF0 == (0xF8 & *p)) {
  86. val = *p & 0x07;
  87. NEXT_6_BITS();
  88. NEXT_6_BITS();
  89. NEXT_6_BITS();
  90. p++;
  91. assert((wchar_t)val == get_val_utf82uni(pstart));
  92. }
  93. else if (0xF8 == (0xFC & *p)) {
  94. val = *p & 0x03;
  95. NEXT_6_BITS();
  96. NEXT_6_BITS();
  97. NEXT_6_BITS();
  98. NEXT_6_BITS();
  99. p++;
  100. assert((wchar_t)val == get_val_utf82uni(pstart));
  101. }
  102. else if (0xFC == (0xFE & *p)) {
  103. val = *p & 0x01;
  104. NEXT_6_BITS();
  105. NEXT_6_BITS();
  106. NEXT_6_BITS();
  107. NEXT_6_BITS();
  108. NEXT_6_BITS();
  109. p++;
  110. assert((wchar_t)val == get_val_utf82uni(pstart));
  111. }
  112. else if (0x80 == (0xC0 & *p)) {
  113. /* error? */
  114. for (; 0x80 == (0xC0 & *p); p++);
  115. }
  116. else {
  117. /* error */
  118. for (; ((0xFE & *p) > 0xFC); p++);
  119. }
  120. /*
  121. if (val == 0) {
  122. p = NULL;
  123. */
  124. /*
  125. }
  126. else if (pstart + maxlen < p) {
  127. p = pstart;
  128. if (pval) *pval = 0;
  129. }
  130. */
  131. if (pval) *pval = val;
  132. return p;
  133. }
  134. void usage(char *progname) {
  135. fprintf(stderr, "usage: %s\n", progname);
  136. fprintf(stderr, " read data from stdin\n");
  137. }
  138. void utf8_parse(const char *msg, unsigned int len) {
  139. uint8_t *pend = NULL;
  140. uint8_t *p;
  141. uint8_t *pre;
  142. wchar_t val;
  143. int page;
  144. pend = (uint8_t *)msg + len;
  145. for (pre = (uint8_t *)msg; pre < pend;) {
  146. val = 0;
  147. p = get_utf8_value(pre, &val);
  148. if (NULL == p) break;
  149. page = val / 128;
  150. if (val >= 256) {
  151. fprintf(stdout, "%d %d ", page, (val % 128));
  152. for (; pre < p; pre++) fprintf(stdout, "%c", *pre);
  153. fprintf(stdout, "\n");
  154. }
  155. pre = p;
  156. }
  157. }
  158. int load_file(FILE *fp) {
  159. char * buffer = NULL;
  160. size_t szbuf = 0;
  161. szbuf = 10000;
  162. buffer = (char*)malloc(szbuf);
  163. if (NULL == buffer) return -1;
  164. //pos = ftell (fp);
  165. while (getline( &buffer, &szbuf, fp ) > 0)
  166. utf8_parse((const char*)buffer, (unsigned int)strlen ((char *)buffer));
  167. free(buffer);
  168. return 0;
  169. }
  170. int main(int argc, char * argv[]) {
  171. if (argc > 1) {
  172. usage(argv[0]);
  173. exit(1);
  174. }
  175. load_file(stdin);
  176. }