from_unicode.php 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172
  1. <?php
  2. /**
  3. * UTF8::from_unicode
  4. *
  5. * @package Kohana
  6. * @author Kohana Team
  7. * @copyright (c) Kohana Team
  8. * @copyright (c) 2005 Harry Fuecks
  9. * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
  10. */
  11. function _from_unicode($arr)
  12. {
  13. ob_start();
  14. $keys = array_keys($arr);
  15. foreach ($keys as $k)
  16. {
  17. // ASCII range (including control chars)
  18. if (($arr[$k] >= 0) AND ($arr[$k] <= 0x007f))
  19. {
  20. echo chr($arr[$k]);
  21. }
  22. // 2 byte sequence
  23. elseif ($arr[$k] <= 0x07ff)
  24. {
  25. echo chr(0xc0 | ($arr[$k] >> 6));
  26. echo chr(0x80 | ($arr[$k] & 0x003f));
  27. }
  28. // Byte order mark (skip)
  29. elseif ($arr[$k] == 0xFEFF)
  30. {
  31. // nop -- zap the BOM
  32. }
  33. // Test for illegal surrogates
  34. elseif ($arr[$k] >= 0xD800 AND $arr[$k] <= 0xDFFF)
  35. {
  36. // Found a surrogate
  37. throw new UTF8_Exception("UTF8::from_unicode: Illegal surrogate at index: ':index', value: ':value'", [
  38. ':index' => $k,
  39. ':value' => $arr[$k],
  40. ]);
  41. }
  42. // 3 byte sequence
  43. elseif ($arr[$k] <= 0xffff)
  44. {
  45. echo chr(0xe0 | ($arr[$k] >> 12));
  46. echo chr(0x80 | (($arr[$k] >> 6) & 0x003f));
  47. echo chr(0x80 | ($arr[$k] & 0x003f));
  48. }
  49. // 4 byte sequence
  50. elseif ($arr[$k] <= 0x10ffff)
  51. {
  52. echo chr(0xf0 | ($arr[$k] >> 18));
  53. echo chr(0x80 | (($arr[$k] >> 12) & 0x3f));
  54. echo chr(0x80 | (($arr[$k] >> 6) & 0x3f));
  55. echo chr(0x80 | ($arr[$k] & 0x3f));
  56. }
  57. // Out of range
  58. else
  59. {
  60. throw new UTF8_Exception("UTF8::from_unicode: Codepoint out of Unicode range at index: ':index', value: ':value'", [
  61. ':index' => $k,
  62. ':value' => $arr[$k],
  63. ]);
  64. }
  65. }
  66. $result = ob_get_contents();
  67. ob_end_clean();
  68. return $result;
  69. }