UTF8.php 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771
  1. <?php
  2. /**
  3. * A port of [phputf8](http://phputf8.sourceforge.net/) to a unified set
  4. * of files. Provides multi-byte aware replacement string functions.
  5. *
  6. * For UTF-8 support to work correctly, the following requirements must be met:
  7. *
  8. * - PCRE needs to be compiled with UTF-8 support (--enable-utf8)
  9. * - Support for [Unicode properties](http://php.net/manual/reference.pcre.pattern.modifiers.php)
  10. * is highly recommended (--enable-unicode-properties)
  11. * - The [mbstring extension](http://php.net/mbstring) is highly recommended,
  12. * but must not be overloading string functions
  13. *
  14. * [!!] This file is licensed differently from the rest of Kohana. As a port of
  15. * [phputf8](http://phputf8.sourceforge.net/), this file is released under the LGPL.
  16. *
  17. * @package Kohana
  18. * @category Base
  19. * @author Kohana Team
  20. * @copyright (c) Kohana Team
  21. * @copyright (c) 2005 Harry Fuecks
  22. * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
  23. */
  24. class Kohana_UTF8 {
  25. /**
  26. * @var boolean Does the server support UTF-8 natively?
  27. */
  28. public static $server_utf8 = NULL;
  29. /**
  30. * @var array List of called methods that have had their required file included.
  31. */
  32. public static $called = [];
  33. /**
  34. * Recursively cleans arrays, objects, and strings. Removes ASCII control
  35. * codes and converts to the requested charset while silently discarding
  36. * incompatible characters.
  37. *
  38. * UTF8::clean($_GET); // Clean GET data
  39. *
  40. * @param mixed $var variable to clean
  41. * @param string $charset character set, defaults to Kohana::$charset
  42. * @return mixed
  43. * @uses UTF8::clean
  44. * @uses UTF8::strip_ascii_ctrl
  45. * @uses UTF8::is_ascii
  46. */
  47. public static function clean($var, $charset = NULL)
  48. {
  49. if ( ! $charset)
  50. {
  51. // Use the application character set
  52. $charset = Kohana::$charset;
  53. }
  54. if (is_array($var) OR is_object($var))
  55. {
  56. foreach ($var as $key => $val)
  57. {
  58. // Recursion!
  59. $var[UTF8::clean($key)] = UTF8::clean($val);
  60. }
  61. }
  62. elseif (is_string($var) AND $var !== '')
  63. {
  64. // Remove control characters
  65. $var = UTF8::strip_ascii_ctrl($var);
  66. if ( ! UTF8::is_ascii($var))
  67. {
  68. // Temporarily save the mb_substitute_character() value into a variable
  69. $mb_substitute_character = mb_substitute_character();
  70. // Disable substituting illegal characters with the default '?' character
  71. mb_substitute_character('none');
  72. // convert encoding, this is expensive, used when $var is not ASCII
  73. $var = mb_convert_encoding($var, $charset, $charset);
  74. // Reset mb_substitute_character() value back to the original setting
  75. mb_substitute_character($mb_substitute_character);
  76. }
  77. }
  78. return $var;
  79. }
  80. /**
  81. * Tests whether a string contains only 7-bit ASCII bytes. This is used to
  82. * determine when to use native functions or UTF-8 functions.
  83. *
  84. * $ascii = UTF8::is_ascii($str);
  85. *
  86. * @param mixed $str string or array of strings to check
  87. * @return boolean
  88. */
  89. public static function is_ascii($str)
  90. {
  91. if (is_array($str))
  92. {
  93. $str = implode($str);
  94. }
  95. return ! preg_match('/[^\x00-\x7F]/S', $str);
  96. }
  97. /**
  98. * Strips out device control codes in the ASCII range.
  99. *
  100. * $str = UTF8::strip_ascii_ctrl($str);
  101. *
  102. * @param string $str string to clean
  103. * @return string
  104. */
  105. public static function strip_ascii_ctrl($str)
  106. {
  107. return preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S', '', $str);
  108. }
  109. /**
  110. * Strips out all non-7bit ASCII bytes.
  111. *
  112. * $str = UTF8::strip_non_ascii($str);
  113. *
  114. * @param string $str string to clean
  115. * @return string
  116. */
  117. public static function strip_non_ascii($str)
  118. {
  119. return preg_replace('/[^\x00-\x7F]+/S', '', $str);
  120. }
  121. /**
  122. * Replaces special/accented UTF-8 characters by ASCII-7 "equivalents".
  123. *
  124. * $ascii = UTF8::transliterate_to_ascii($utf8);
  125. *
  126. * @author Andreas Gohr <andi@splitbrain.org>
  127. * @param string $str string to transliterate
  128. * @param integer $case -1 lowercase only, +1 uppercase only, 0 both cases
  129. * @return string
  130. */
  131. public static function transliterate_to_ascii($str, $case = 0)
  132. {
  133. if ( ! isset(UTF8::$called[__FUNCTION__]))
  134. {
  135. require Kohana::find_file('utf8', __FUNCTION__);
  136. // Function has been called
  137. UTF8::$called[__FUNCTION__] = TRUE;
  138. }
  139. return _transliterate_to_ascii($str, $case);
  140. }
  141. /**
  142. * Returns the length of the given string. This is a UTF8-aware version
  143. * of [strlen](http://php.net/strlen).
  144. *
  145. * $length = UTF8::strlen($str);
  146. *
  147. * @param string $str string being measured for length
  148. * @return integer
  149. * @uses UTF8::$server_utf8
  150. * @uses Kohana::$charset
  151. */
  152. public static function strlen($str)
  153. {
  154. if (UTF8::$server_utf8)
  155. return mb_strlen($str, Kohana::$charset);
  156. if ( ! isset(UTF8::$called[__FUNCTION__]))
  157. {
  158. require Kohana::find_file('utf8', __FUNCTION__);
  159. // Function has been called
  160. UTF8::$called[__FUNCTION__] = TRUE;
  161. }
  162. return _strlen($str);
  163. }
  164. /**
  165. * Finds position of first occurrence of a UTF-8 string. This is a
  166. * UTF8-aware version of [strpos](http://php.net/strpos).
  167. *
  168. * $position = UTF8::strpos($str, $search);
  169. *
  170. * @author Harry Fuecks <hfuecks@gmail.com>
  171. * @param string $str haystack
  172. * @param string $search needle
  173. * @param integer $offset offset from which character in haystack to start searching
  174. * @return integer position of needle
  175. * @return boolean FALSE if the needle is not found
  176. * @uses UTF8::$server_utf8
  177. * @uses Kohana::$charset
  178. */
  179. public static function strpos($str, $search, $offset = 0)
  180. {
  181. if (UTF8::$server_utf8)
  182. return mb_strpos($str, $search, $offset, Kohana::$charset);
  183. if ( ! isset(UTF8::$called[__FUNCTION__]))
  184. {
  185. require Kohana::find_file('utf8', __FUNCTION__);
  186. // Function has been called
  187. UTF8::$called[__FUNCTION__] = TRUE;
  188. }
  189. return _strpos($str, $search, $offset);
  190. }
  191. /**
  192. * Finds position of last occurrence of a char in a UTF-8 string. This is
  193. * a UTF8-aware version of [strrpos](http://php.net/strrpos).
  194. *
  195. * $position = UTF8::strrpos($str, $search);
  196. *
  197. * @author Harry Fuecks <hfuecks@gmail.com>
  198. * @param string $str haystack
  199. * @param string $search needle
  200. * @param integer $offset offset from which character in haystack to start searching
  201. * @return integer position of needle
  202. * @return boolean FALSE if the needle is not found
  203. * @uses UTF8::$server_utf8
  204. */
  205. public static function strrpos($str, $search, $offset = 0)
  206. {
  207. if (UTF8::$server_utf8)
  208. return mb_strrpos($str, $search, $offset, Kohana::$charset);
  209. if ( ! isset(UTF8::$called[__FUNCTION__]))
  210. {
  211. require Kohana::find_file('utf8', __FUNCTION__);
  212. // Function has been called
  213. UTF8::$called[__FUNCTION__] = TRUE;
  214. }
  215. return _strrpos($str, $search, $offset);
  216. }
  217. /**
  218. * Returns part of a UTF-8 string. This is a UTF8-aware version
  219. * of [substr](http://php.net/substr).
  220. *
  221. * $sub = UTF8::substr($str, $offset);
  222. *
  223. * @author Chris Smith <chris@jalakai.co.uk>
  224. * @param string $str input string
  225. * @param integer $offset offset
  226. * @param integer $length length limit
  227. * @return string
  228. * @uses UTF8::$server_utf8
  229. * @uses Kohana::$charset
  230. */
  231. public static function substr($str, $offset, $length = NULL)
  232. {
  233. if (UTF8::$server_utf8)
  234. return ($length === NULL)
  235. ? mb_substr($str, $offset, mb_strlen($str), Kohana::$charset)
  236. : mb_substr($str, $offset, $length, Kohana::$charset);
  237. if ( ! isset(UTF8::$called[__FUNCTION__]))
  238. {
  239. require Kohana::find_file('utf8', __FUNCTION__);
  240. // Function has been called
  241. UTF8::$called[__FUNCTION__] = TRUE;
  242. }
  243. return _substr($str, $offset, $length);
  244. }
  245. /**
  246. * Replaces text within a portion of a UTF-8 string. This is a UTF8-aware
  247. * version of [substr_replace](http://php.net/substr_replace).
  248. *
  249. * $str = UTF8::substr_replace($str, $replacement, $offset);
  250. *
  251. * @author Harry Fuecks <hfuecks@gmail.com>
  252. * @param string $str input string
  253. * @param string $replacement replacement string
  254. * @param integer $offset offset
  255. * @return string
  256. */
  257. public static function substr_replace($str, $replacement, $offset, $length = NULL)
  258. {
  259. if ( ! isset(UTF8::$called[__FUNCTION__]))
  260. {
  261. require Kohana::find_file('utf8', __FUNCTION__);
  262. // Function has been called
  263. UTF8::$called[__FUNCTION__] = TRUE;
  264. }
  265. return _substr_replace($str, $replacement, $offset, $length);
  266. }
  267. /**
  268. * Makes a UTF-8 string lowercase. This is a UTF8-aware version
  269. * of [strtolower](http://php.net/strtolower).
  270. *
  271. * $str = UTF8::strtolower($str);
  272. *
  273. * @author Andreas Gohr <andi@splitbrain.org>
  274. * @param string $str mixed case string
  275. * @return string
  276. * @uses UTF8::$server_utf8
  277. * @uses Kohana::$charset
  278. */
  279. public static function strtolower($str)
  280. {
  281. $str = $str ?? '';
  282. if (UTF8::$server_utf8)
  283. return mb_strtolower($str, Kohana::$charset);
  284. if ( ! isset(UTF8::$called[__FUNCTION__]))
  285. {
  286. require Kohana::find_file('utf8', __FUNCTION__);
  287. // Function has been called
  288. UTF8::$called[__FUNCTION__] = TRUE;
  289. }
  290. return _strtolower($str);
  291. }
  292. /**
  293. * Makes a UTF-8 string uppercase. This is a UTF8-aware version
  294. * of [strtoupper](http://php.net/strtoupper).
  295. *
  296. * @author Andreas Gohr <andi@splitbrain.org>
  297. * @param string $str mixed case string
  298. * @return string
  299. * @uses UTF8::$server_utf8
  300. * @uses Kohana::$charset
  301. */
  302. public static function strtoupper($str)
  303. {
  304. if (UTF8::$server_utf8)
  305. return mb_strtoupper($str, Kohana::$charset);
  306. if ( ! isset(UTF8::$called[__FUNCTION__]))
  307. {
  308. require Kohana::find_file('utf8', __FUNCTION__);
  309. // Function has been called
  310. UTF8::$called[__FUNCTION__] = TRUE;
  311. }
  312. return _strtoupper($str);
  313. }
  314. /**
  315. * Makes a UTF-8 string's first character uppercase. This is a UTF8-aware
  316. * version of [ucfirst](http://php.net/ucfirst).
  317. *
  318. * $str = UTF8::ucfirst($str);
  319. *
  320. * @author Harry Fuecks <hfuecks@gmail.com>
  321. * @param string $str mixed case string
  322. * @return string
  323. */
  324. public static function ucfirst($str)
  325. {
  326. if ( ! isset(UTF8::$called[__FUNCTION__]))
  327. {
  328. require Kohana::find_file('utf8', __FUNCTION__);
  329. // Function has been called
  330. UTF8::$called[__FUNCTION__] = TRUE;
  331. }
  332. return _ucfirst($str);
  333. }
  334. /**
  335. * Makes the first character of every word in a UTF-8 string uppercase.
  336. * This is a UTF8-aware version of [ucwords](http://php.net/ucwords).
  337. *
  338. * $str = UTF8::ucwords($str);
  339. *
  340. * @author Harry Fuecks <hfuecks@gmail.com>
  341. * @param string $str mixed case string
  342. * @return string
  343. */
  344. public static function ucwords($str)
  345. {
  346. if ( ! isset(UTF8::$called[__FUNCTION__]))
  347. {
  348. require Kohana::find_file('utf8', __FUNCTION__);
  349. // Function has been called
  350. UTF8::$called[__FUNCTION__] = TRUE;
  351. }
  352. return _ucwords($str);
  353. }
  354. /**
  355. * Case-insensitive UTF-8 string comparison. This is a UTF8-aware version
  356. * of [strcasecmp](http://php.net/strcasecmp).
  357. *
  358. * $compare = UTF8::strcasecmp($str1, $str2);
  359. *
  360. * @author Harry Fuecks <hfuecks@gmail.com>
  361. * @param string $str1 string to compare
  362. * @param string $str2 string to compare
  363. * @return integer less than 0 if str1 is less than str2
  364. * @return integer greater than 0 if str1 is greater than str2
  365. * @return integer 0 if they are equal
  366. */
  367. public static function strcasecmp($str1, $str2)
  368. {
  369. if ( ! isset(UTF8::$called[__FUNCTION__]))
  370. {
  371. require Kohana::find_file('utf8', __FUNCTION__);
  372. // Function has been called
  373. UTF8::$called[__FUNCTION__] = TRUE;
  374. }
  375. return _strcasecmp($str1, $str2);
  376. }
  377. /**
  378. * Returns a string or an array with all occurrences of search in subject
  379. * (ignoring case) and replaced with the given replace value. This is a
  380. * UTF8-aware version of [str_ireplace](http://php.net/str_ireplace).
  381. *
  382. * [!!] This function is very slow compared to the native version. Avoid
  383. * using it when possible.
  384. *
  385. * @author Harry Fuecks <hfuecks@gmail.com
  386. * @param string|array $search text to replace
  387. * @param string|array $replace replacement text
  388. * @param string|array $str subject text
  389. * @param integer $count number of matched and replaced needles will be returned via this parameter which is passed by reference
  390. * @return string if the input was a string
  391. * @return array if the input was an array
  392. */
  393. public static function str_ireplace($search, $replace, $str, & $count = NULL)
  394. {
  395. if ( ! isset(UTF8::$called[__FUNCTION__]))
  396. {
  397. require Kohana::find_file('utf8', __FUNCTION__);
  398. // Function has been called
  399. UTF8::$called[__FUNCTION__] = TRUE;
  400. }
  401. return _str_ireplace($search, $replace, $str, $count);
  402. }
  403. /**
  404. * Case-insensitive UTF-8 version of strstr. Returns all of input string
  405. * from the first occurrence of needle to the end. This is a UTF8-aware
  406. * version of [stristr](http://php.net/stristr).
  407. *
  408. * $found = UTF8::stristr($str, $search);
  409. *
  410. * @author Harry Fuecks <hfuecks@gmail.com>
  411. * @param string $str input string
  412. * @param string $search needle
  413. * @return string matched substring if found
  414. * @return FALSE if the substring was not found
  415. */
  416. public static function stristr($str, $search)
  417. {
  418. if ( ! isset(UTF8::$called[__FUNCTION__]))
  419. {
  420. require Kohana::find_file('utf8', __FUNCTION__);
  421. // Function has been called
  422. UTF8::$called[__FUNCTION__] = TRUE;
  423. }
  424. return _stristr($str, $search);
  425. }
  426. /**
  427. * Finds the length of the initial segment matching mask. This is a
  428. * UTF8-aware version of [strspn](http://php.net/strspn).
  429. *
  430. * $found = UTF8::strspn($str, $mask);
  431. *
  432. * @author Harry Fuecks <hfuecks@gmail.com>
  433. * @param string $str input string
  434. * @param string $mask mask for search
  435. * @param integer $offset start position of the string to examine
  436. * @param integer $length length of the string to examine
  437. * @return integer length of the initial segment that contains characters in the mask
  438. */
  439. public static function strspn($str, $mask, $offset = NULL, $length = NULL)
  440. {
  441. if ( ! isset(UTF8::$called[__FUNCTION__]))
  442. {
  443. require Kohana::find_file('utf8', __FUNCTION__);
  444. // Function has been called
  445. UTF8::$called[__FUNCTION__] = TRUE;
  446. }
  447. return _strspn($str, $mask, $offset, $length);
  448. }
  449. /**
  450. * Finds the length of the initial segment not matching mask. This is a
  451. * UTF8-aware version of [strcspn](http://php.net/strcspn).
  452. *
  453. * $found = UTF8::strcspn($str, $mask);
  454. *
  455. * @author Harry Fuecks <hfuecks@gmail.com>
  456. * @param string $str input string
  457. * @param string $mask mask for search
  458. * @param integer $offset start position of the string to examine
  459. * @param integer $length length of the string to examine
  460. * @return integer length of the initial segment that contains characters not in the mask
  461. */
  462. public static function strcspn($str, $mask, $offset = NULL, $length = NULL)
  463. {
  464. if ( ! isset(UTF8::$called[__FUNCTION__]))
  465. {
  466. require Kohana::find_file('utf8', __FUNCTION__);
  467. // Function has been called
  468. UTF8::$called[__FUNCTION__] = TRUE;
  469. }
  470. return _strcspn($str, $mask, $offset, $length);
  471. }
  472. /**
  473. * Pads a UTF-8 string to a certain length with another string. This is a
  474. * UTF8-aware version of [str_pad](http://php.net/str_pad).
  475. *
  476. * $str = UTF8::str_pad($str, $length);
  477. *
  478. * @author Harry Fuecks <hfuecks@gmail.com>
  479. * @param string $str input string
  480. * @param integer $final_str_length desired string length after padding
  481. * @param string $pad_str string to use as padding
  482. * @param string $pad_type padding type: STR_PAD_RIGHT, STR_PAD_LEFT, or STR_PAD_BOTH
  483. * @return string
  484. */
  485. public static function str_pad($str, $final_str_length, $pad_str = ' ', $pad_type = STR_PAD_RIGHT)
  486. {
  487. if ( ! isset(UTF8::$called[__FUNCTION__]))
  488. {
  489. require Kohana::find_file('utf8', __FUNCTION__);
  490. // Function has been called
  491. UTF8::$called[__FUNCTION__] = TRUE;
  492. }
  493. return _str_pad($str, $final_str_length, $pad_str, $pad_type);
  494. }
  495. /**
  496. * Converts a UTF-8 string to an array. This is a UTF8-aware version of
  497. * [str_split](http://php.net/str_split).
  498. *
  499. * $array = UTF8::str_split($str);
  500. *
  501. * @author Harry Fuecks <hfuecks@gmail.com>
  502. * @param string $str input string
  503. * @param integer $split_length maximum length of each chunk
  504. * @return array
  505. */
  506. public static function str_split($str, $split_length = 1)
  507. {
  508. if ( ! isset(UTF8::$called[__FUNCTION__]))
  509. {
  510. require Kohana::find_file('utf8', __FUNCTION__);
  511. // Function has been called
  512. UTF8::$called[__FUNCTION__] = TRUE;
  513. }
  514. return _str_split($str, $split_length);
  515. }
  516. /**
  517. * Reverses a UTF-8 string. This is a UTF8-aware version of [strrev](http://php.net/strrev).
  518. *
  519. * $str = UTF8::strrev($str);
  520. *
  521. * @author Harry Fuecks <hfuecks@gmail.com>
  522. * @param string $str string to be reversed
  523. * @return string
  524. */
  525. public static function strrev($str)
  526. {
  527. if ( ! isset(UTF8::$called[__FUNCTION__]))
  528. {
  529. require Kohana::find_file('utf8', __FUNCTION__);
  530. // Function has been called
  531. UTF8::$called[__FUNCTION__] = TRUE;
  532. }
  533. return _strrev($str);
  534. }
  535. /**
  536. * Strips whitespace (or other UTF-8 characters) from the beginning and
  537. * end of a string. This is a UTF8-aware version of [trim](http://php.net/trim).
  538. *
  539. * $str = UTF8::trim($str);
  540. *
  541. * @author Andreas Gohr <andi@splitbrain.org>
  542. * @param string $str input string
  543. * @param string $charlist string of characters to remove
  544. * @return string
  545. */
  546. public static function trim($str, $charlist = NULL)
  547. {
  548. if ( ! isset(UTF8::$called[__FUNCTION__]))
  549. {
  550. require Kohana::find_file('utf8', __FUNCTION__);
  551. // Function has been called
  552. UTF8::$called[__FUNCTION__] = TRUE;
  553. }
  554. return _trim($str, $charlist);
  555. }
  556. /**
  557. * Strips whitespace (or other UTF-8 characters) from the beginning of
  558. * a string. This is a UTF8-aware version of [ltrim](http://php.net/ltrim).
  559. *
  560. * $str = UTF8::ltrim($str);
  561. *
  562. * @author Andreas Gohr <andi@splitbrain.org>
  563. * @param string $str input string
  564. * @param string $charlist string of characters to remove
  565. * @return string
  566. */
  567. public static function ltrim($str, $charlist = NULL)
  568. {
  569. if ( ! isset(UTF8::$called[__FUNCTION__]))
  570. {
  571. require Kohana::find_file('utf8', __FUNCTION__);
  572. // Function has been called
  573. UTF8::$called[__FUNCTION__] = TRUE;
  574. }
  575. return _ltrim($str, $charlist);
  576. }
  577. /**
  578. * Strips whitespace (or other UTF-8 characters) from the end of a string.
  579. * This is a UTF8-aware version of [rtrim](http://php.net/rtrim).
  580. *
  581. * $str = UTF8::rtrim($str);
  582. *
  583. * @author Andreas Gohr <andi@splitbrain.org>
  584. * @param string $str input string
  585. * @param string $charlist string of characters to remove
  586. * @return string
  587. */
  588. public static function rtrim($str, $charlist = NULL)
  589. {
  590. if ( ! isset(UTF8::$called[__FUNCTION__]))
  591. {
  592. require Kohana::find_file('utf8', __FUNCTION__);
  593. // Function has been called
  594. UTF8::$called[__FUNCTION__] = TRUE;
  595. }
  596. return _rtrim($str, $charlist);
  597. }
  598. /**
  599. * Returns the unicode ordinal for a character. This is a UTF8-aware
  600. * version of [ord](http://php.net/ord).
  601. *
  602. * $digit = UTF8::ord($character);
  603. *
  604. * @author Harry Fuecks <hfuecks@gmail.com>
  605. * @param string $chr UTF-8 encoded character
  606. * @return integer
  607. */
  608. public static function ord($chr)
  609. {
  610. if ( ! isset(UTF8::$called[__FUNCTION__]))
  611. {
  612. require Kohana::find_file('utf8', __FUNCTION__);
  613. // Function has been called
  614. UTF8::$called[__FUNCTION__] = TRUE;
  615. }
  616. return _ord($chr);
  617. }
  618. /**
  619. * Takes an UTF-8 string and returns an array of ints representing the Unicode characters.
  620. * Astral planes are supported i.e. the ints in the output can be > 0xFFFF.
  621. * Occurrences of the BOM are ignored. Surrogates are not allowed.
  622. *
  623. * $array = UTF8::to_unicode($str);
  624. *
  625. * The Original Code is Mozilla Communicator client code.
  626. * The Initial Developer of the Original Code is Netscape Communications Corporation.
  627. * Portions created by the Initial Developer are Copyright (C) 1998 the Initial Developer.
  628. * Ported to PHP by Henri Sivonen <hsivonen@iki.fi>, see <http://hsivonen.iki.fi/php-utf8/>
  629. * Slight modifications to fit with phputf8 library by Harry Fuecks <hfuecks@gmail.com>
  630. *
  631. * @param string $str UTF-8 encoded string
  632. * @return array unicode code points
  633. * @return FALSE if the string is invalid
  634. */
  635. public static function to_unicode($str)
  636. {
  637. if ( ! isset(UTF8::$called[__FUNCTION__]))
  638. {
  639. require Kohana::find_file('utf8', __FUNCTION__);
  640. // Function has been called
  641. UTF8::$called[__FUNCTION__] = TRUE;
  642. }
  643. return _to_unicode($str);
  644. }
  645. /**
  646. * Takes an array of ints representing the Unicode characters and returns a UTF-8 string.
  647. * Astral planes are supported i.e. the ints in the input can be > 0xFFFF.
  648. * Occurrences of the BOM are ignored. Surrogates are not allowed.
  649. *
  650. * $str = UTF8::to_unicode($array);
  651. *
  652. * The Original Code is Mozilla Communicator client code.
  653. * The Initial Developer of the Original Code is Netscape Communications Corporation.
  654. * Portions created by the Initial Developer are Copyright (C) 1998 the Initial Developer.
  655. * Ported to PHP by Henri Sivonen <hsivonen@iki.fi>, see http://hsivonen.iki.fi/php-utf8/
  656. * Slight modifications to fit with phputf8 library by Harry Fuecks <hfuecks@gmail.com>.
  657. *
  658. * @param array $str unicode code points representing a string
  659. * @return string utf8 string of characters
  660. * @return boolean FALSE if a code point cannot be found
  661. */
  662. public static function from_unicode($arr)
  663. {
  664. if ( ! isset(UTF8::$called[__FUNCTION__]))
  665. {
  666. require Kohana::find_file('utf8', __FUNCTION__);
  667. // Function has been called
  668. UTF8::$called[__FUNCTION__] = TRUE;
  669. }
  670. return _from_unicode($arr);
  671. }
  672. }
  673. if (Kohana_UTF8::$server_utf8 === NULL)
  674. {
  675. // Determine if this server supports UTF-8 natively
  676. Kohana_UTF8::$server_utf8 = extension_loaded('mbstring');
  677. }