normalization.cpp 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
  1. #include "normalization.h"
  2. static const wchar32 S_BASE = 0xAC00;
  3. static const wchar32 L_BASE = 0x1100;
  4. static const wchar32 V_BASE = 0x1161;
  5. static const wchar32 T_BASE = 0x11A7;
  6. static const int L_COUNT = 19;
  7. static const int V_COUNT = 21;
  8. static const int T_COUNT = 28;
  9. static const int N_COUNT = V_COUNT * T_COUNT; // 588
  10. static const int S_COUNT = L_COUNT * N_COUNT; // 11172
  11. static inline wchar32 ComposeHangul(wchar32 lead, wchar32 tail) {
  12. // 1. check to see if two current characters are L and V
  13. int lIndex = lead - L_BASE;
  14. if (0 <= lIndex && lIndex < L_COUNT) {
  15. int vIndex = tail - V_BASE;
  16. if (0 <= vIndex && vIndex < V_COUNT) {
  17. // make syllable of form LV
  18. lead = (wchar32)(S_BASE + (lIndex * V_COUNT + vIndex) * T_COUNT);
  19. return lead;
  20. }
  21. }
  22. // 2. check to see if two current characters are LV and T
  23. int sIndex = lead - S_BASE;
  24. if (0 <= sIndex && sIndex < S_COUNT && (sIndex % T_COUNT) == 0) {
  25. int TIndex = tail - T_BASE;
  26. if (0 < TIndex && TIndex < T_COUNT) {
  27. // make syllable of form LVT
  28. lead += TIndex;
  29. return lead;
  30. }
  31. }
  32. return 0;
  33. }
  34. NUnicode::NPrivate::TComposition::TComposition() {
  35. for (size_t i = 0; i != RawDataSize; ++i) {
  36. const TRawData& data = RawData[i];
  37. if (DecompositionCombining(data.Lead) != 0)
  38. continue;
  39. Data[TKey(data.Lead, data.Tail)] = data.Comp;
  40. }
  41. for (wchar32 s = 0xAC00; s != 0xD7A4; ++s) {
  42. const wchar32* decompBegin = NUnicode::Decomposition<true>(s);
  43. if (decompBegin == nullptr)
  44. continue;
  45. wchar32 lead = *(decompBegin++);
  46. while (*decompBegin) {
  47. wchar32 tail = *(decompBegin++);
  48. wchar32 comp = ComposeHangul(lead, tail);
  49. Y_ASSERT(comp != 0);
  50. Data[TKey(lead, tail)] = comp;
  51. lead = comp;
  52. }
  53. }
  54. }