UTF8.php 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769
  1. <?php
  2. /**
  3. * A port of [phputf8](http://phputf8.sourceforge.net/) to a unified set
  4. * of files. Provides multi-byte aware replacement string functions.
  5. *
  6. * For UTF-8 support to work correctly, the following requirements must be met:
  7. *
  8. * - PCRE needs to be compiled with UTF-8 support (--enable-utf8)
  9. * - Support for [Unicode properties](http://php.net/manual/reference.pcre.pattern.modifiers.php)
  10. * is highly recommended (--enable-unicode-properties)
  11. * - The [mbstring extension](http://php.net/mbstring) is highly recommended,
  12. * but must not be overloading string functions
  13. *
  14. * [!!] This file is licensed differently from the rest of Kohana. As a port of
  15. * [phputf8](http://phputf8.sourceforge.net/), this file is released under the LGPL.
  16. *
  17. * @package Kohana
  18. * @category Base
  19. * @author Kohana Team
  20. * @copyright (c) Kohana Team
  21. * @copyright (c) 2005 Harry Fuecks
  22. * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
  23. */
  24. class Kohana_UTF8 {
  25. /**
  26. * @var boolean Does the server support UTF-8 natively?
  27. */
  28. public static $server_utf8 = NULL;
  29. /**
  30. * @var array List of called methods that have had their required file included.
  31. */
  32. public static $called = [];
  33. /**
  34. * Recursively cleans arrays, objects, and strings. Removes ASCII control
  35. * codes and converts to the requested charset while silently discarding
  36. * incompatible characters.
  37. *
  38. * UTF8::clean($_GET); // Clean GET data
  39. *
  40. * @param mixed $var variable to clean
  41. * @param string $charset character set, defaults to Kohana::$charset
  42. * @return mixed
  43. * @uses UTF8::clean
  44. * @uses UTF8::strip_ascii_ctrl
  45. * @uses UTF8::is_ascii
  46. */
  47. public static function clean($var, $charset = NULL)
  48. {
  49. if ( ! $charset)
  50. {
  51. // Use the application character set
  52. $charset = Kohana::$charset;
  53. }
  54. if (is_array($var) OR is_object($var))
  55. {
  56. foreach ($var as $key => $val)
  57. {
  58. // Recursion!
  59. $var[UTF8::clean($key)] = UTF8::clean($val);
  60. }
  61. }
  62. elseif (is_string($var) AND $var !== '')
  63. {
  64. // Remove control characters
  65. $var = UTF8::strip_ascii_ctrl($var);
  66. if ( ! UTF8::is_ascii($var))
  67. {
  68. // Temporarily save the mb_substitute_character() value into a variable
  69. $mb_substitute_character = mb_substitute_character();
  70. // Disable substituting illegal characters with the default '?' character
  71. mb_substitute_character('none');
  72. // convert encoding, this is expensive, used when $var is not ASCII
  73. $var = mb_convert_encoding($var, $charset, $charset);
  74. // Reset mb_substitute_character() value back to the original setting
  75. mb_substitute_character($mb_substitute_character);
  76. }
  77. }
  78. return $var;
  79. }
  80. /**
  81. * Tests whether a string contains only 7-bit ASCII bytes. This is used to
  82. * determine when to use native functions or UTF-8 functions.
  83. *
  84. * $ascii = UTF8::is_ascii($str);
  85. *
  86. * @param mixed $str string or array of strings to check
  87. * @return boolean
  88. */
  89. public static function is_ascii($str)
  90. {
  91. if (is_array($str))
  92. {
  93. $str = implode($str);
  94. }
  95. return ! preg_match('/[^\x00-\x7F]/S', $str);
  96. }
  97. /**
  98. * Strips out device control codes in the ASCII range.
  99. *
  100. * $str = UTF8::strip_ascii_ctrl($str);
  101. *
  102. * @param string $str string to clean
  103. * @return string
  104. */
  105. public static function strip_ascii_ctrl($str)
  106. {
  107. return preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S', '', $str);
  108. }
  109. /**
  110. * Strips out all non-7bit ASCII bytes.
  111. *
  112. * $str = UTF8::strip_non_ascii($str);
  113. *
  114. * @param string $str string to clean
  115. * @return string
  116. */
  117. public static function strip_non_ascii($str)
  118. {
  119. return preg_replace('/[^\x00-\x7F]+/S', '', $str);
  120. }
  121. /**
  122. * Replaces special/accented UTF-8 characters by ASCII-7 "equivalents".
  123. *
  124. * $ascii = UTF8::transliterate_to_ascii($utf8);
  125. *
  126. * @author Andreas Gohr <andi@splitbrain.org>
  127. * @param string $str string to transliterate
  128. * @param integer $case -1 lowercase only, +1 uppercase only, 0 both cases
  129. * @return string
  130. */
  131. public static function transliterate_to_ascii($str, $case = 0)
  132. {
  133. if ( ! isset(UTF8::$called[__FUNCTION__]))
  134. {
  135. require Kohana::find_file('utf8', __FUNCTION__);
  136. // Function has been called
  137. UTF8::$called[__FUNCTION__] = TRUE;
  138. }
  139. return _transliterate_to_ascii($str, $case);
  140. }
  141. /**
  142. * Returns the length of the given string. This is a UTF8-aware version
  143. * of [strlen](http://php.net/strlen).
  144. *
  145. * $length = UTF8::strlen($str);
  146. *
  147. * @param string $str string being measured for length
  148. * @return integer
  149. * @uses UTF8::$server_utf8
  150. * @uses Kohana::$charset
  151. */
  152. public static function strlen($str)
  153. {
  154. if (UTF8::$server_utf8)
  155. return mb_strlen($str, Kohana::$charset);
  156. if ( ! isset(UTF8::$called[__FUNCTION__]))
  157. {
  158. require Kohana::find_file('utf8', __FUNCTION__);
  159. // Function has been called
  160. UTF8::$called[__FUNCTION__] = TRUE;
  161. }
  162. return _strlen($str);
  163. }
  164. /**
  165. * Finds position of first occurrence of a UTF-8 string. This is a
  166. * UTF8-aware version of [strpos](http://php.net/strpos).
  167. *
  168. * $position = UTF8::strpos($str, $search);
  169. *
  170. * @author Harry Fuecks <hfuecks@gmail.com>
  171. * @param string $str haystack
  172. * @param string $search needle
  173. * @param integer $offset offset from which character in haystack to start searching
  174. * @return integer position of needle
  175. * @return boolean FALSE if the needle is not found
  176. * @uses UTF8::$server_utf8
  177. * @uses Kohana::$charset
  178. */
  179. public static function strpos($str, $search, $offset = 0)
  180. {
  181. if (UTF8::$server_utf8)
  182. return mb_strpos($str, $search, $offset, Kohana::$charset);
  183. if ( ! isset(UTF8::$called[__FUNCTION__]))
  184. {
  185. require Kohana::find_file('utf8', __FUNCTION__);
  186. // Function has been called
  187. UTF8::$called[__FUNCTION__] = TRUE;
  188. }
  189. return _strpos($str, $search, $offset);
  190. }
  191. /**
  192. * Finds position of last occurrence of a char in a UTF-8 string. This is
  193. * a UTF8-aware version of [strrpos](http://php.net/strrpos).
  194. *
  195. * $position = UTF8::strrpos($str, $search);
  196. *
  197. * @author Harry Fuecks <hfuecks@gmail.com>
  198. * @param string $str haystack
  199. * @param string $search needle
  200. * @param integer $offset offset from which character in haystack to start searching
  201. * @return integer position of needle
  202. * @return boolean FALSE if the needle is not found
  203. * @uses UTF8::$server_utf8
  204. */
  205. public static function strrpos($str, $search, $offset = 0)
  206. {
  207. if (UTF8::$server_utf8)
  208. return mb_strrpos($str, $search, $offset, Kohana::$charset);
  209. if ( ! isset(UTF8::$called[__FUNCTION__]))
  210. {
  211. require Kohana::find_file('utf8', __FUNCTION__);
  212. // Function has been called
  213. UTF8::$called[__FUNCTION__] = TRUE;
  214. }
  215. return _strrpos($str, $search, $offset);
  216. }
  217. /**
  218. * Returns part of a UTF-8 string. This is a UTF8-aware version
  219. * of [substr](http://php.net/substr).
  220. *
  221. * $sub = UTF8::substr($str, $offset);
  222. *
  223. * @author Chris Smith <chris@jalakai.co.uk>
  224. * @param string $str input string
  225. * @param integer $offset offset
  226. * @param integer $length length limit
  227. * @return string
  228. * @uses UTF8::$server_utf8
  229. * @uses Kohana::$charset
  230. */
  231. public static function substr($str, $offset, $length = NULL)
  232. {
  233. if (UTF8::$server_utf8)
  234. return ($length === NULL)
  235. ? mb_substr($str, $offset, mb_strlen($str), Kohana::$charset)
  236. : mb_substr($str, $offset, $length, Kohana::$charset);
  237. if ( ! isset(UTF8::$called[__FUNCTION__]))
  238. {
  239. require Kohana::find_file('utf8', __FUNCTION__);
  240. // Function has been called
  241. UTF8::$called[__FUNCTION__] = TRUE;
  242. }
  243. return _substr($str, $offset, $length);
  244. }
  245. /**
  246. * Replaces text within a portion of a UTF-8 string. This is a UTF8-aware
  247. * version of [substr_replace](http://php.net/substr_replace).
  248. *
  249. * $str = UTF8::substr_replace($str, $replacement, $offset);
  250. *
  251. * @author Harry Fuecks <hfuecks@gmail.com>
  252. * @param string $str input string
  253. * @param string $replacement replacement string
  254. * @param integer $offset offset
  255. * @return string
  256. */
  257. public static function substr_replace($str, $replacement, $offset, $length = NULL)
  258. {
  259. if ( ! isset(UTF8::$called[__FUNCTION__]))
  260. {
  261. require Kohana::find_file('utf8', __FUNCTION__);
  262. // Function has been called
  263. UTF8::$called[__FUNCTION__] = TRUE;
  264. }
  265. return _substr_replace($str, $replacement, $offset, $length);
  266. }
  267. /**
  268. * Makes a UTF-8 string lowercase. This is a UTF8-aware version
  269. * of [strtolower](http://php.net/strtolower).
  270. *
  271. * $str = UTF8::strtolower($str);
  272. *
  273. * @author Andreas Gohr <andi@splitbrain.org>
  274. * @param string $str mixed case string
  275. * @return string
  276. * @uses UTF8::$server_utf8
  277. * @uses Kohana::$charset
  278. */
  279. public static function strtolower($str)
  280. {
  281. if (UTF8::$server_utf8)
  282. return mb_strtolower($str, Kohana::$charset);
  283. if ( ! isset(UTF8::$called[__FUNCTION__]))
  284. {
  285. require Kohana::find_file('utf8', __FUNCTION__);
  286. // Function has been called
  287. UTF8::$called[__FUNCTION__] = TRUE;
  288. }
  289. return _strtolower($str);
  290. }
  291. /**
  292. * Makes a UTF-8 string uppercase. This is a UTF8-aware version
  293. * of [strtoupper](http://php.net/strtoupper).
  294. *
  295. * @author Andreas Gohr <andi@splitbrain.org>
  296. * @param string $str mixed case string
  297. * @return string
  298. * @uses UTF8::$server_utf8
  299. * @uses Kohana::$charset
  300. */
  301. public static function strtoupper($str)
  302. {
  303. if (UTF8::$server_utf8)
  304. return mb_strtoupper($str, Kohana::$charset);
  305. if ( ! isset(UTF8::$called[__FUNCTION__]))
  306. {
  307. require Kohana::find_file('utf8', __FUNCTION__);
  308. // Function has been called
  309. UTF8::$called[__FUNCTION__] = TRUE;
  310. }
  311. return _strtoupper($str);
  312. }
  313. /**
  314. * Makes a UTF-8 string's first character uppercase. This is a UTF8-aware
  315. * version of [ucfirst](http://php.net/ucfirst).
  316. *
  317. * $str = UTF8::ucfirst($str);
  318. *
  319. * @author Harry Fuecks <hfuecks@gmail.com>
  320. * @param string $str mixed case string
  321. * @return string
  322. */
  323. public static function ucfirst($str)
  324. {
  325. if ( ! isset(UTF8::$called[__FUNCTION__]))
  326. {
  327. require Kohana::find_file('utf8', __FUNCTION__);
  328. // Function has been called
  329. UTF8::$called[__FUNCTION__] = TRUE;
  330. }
  331. return _ucfirst($str);
  332. }
  333. /**
  334. * Makes the first character of every word in a UTF-8 string uppercase.
  335. * This is a UTF8-aware version of [ucwords](http://php.net/ucwords).
  336. *
  337. * $str = UTF8::ucwords($str);
  338. *
  339. * @author Harry Fuecks <hfuecks@gmail.com>
  340. * @param string $str mixed case string
  341. * @return string
  342. */
  343. public static function ucwords($str)
  344. {
  345. if ( ! isset(UTF8::$called[__FUNCTION__]))
  346. {
  347. require Kohana::find_file('utf8', __FUNCTION__);
  348. // Function has been called
  349. UTF8::$called[__FUNCTION__] = TRUE;
  350. }
  351. return _ucwords($str);
  352. }
  353. /**
  354. * Case-insensitive UTF-8 string comparison. This is a UTF8-aware version
  355. * of [strcasecmp](http://php.net/strcasecmp).
  356. *
  357. * $compare = UTF8::strcasecmp($str1, $str2);
  358. *
  359. * @author Harry Fuecks <hfuecks@gmail.com>
  360. * @param string $str1 string to compare
  361. * @param string $str2 string to compare
  362. * @return integer less than 0 if str1 is less than str2
  363. * @return integer greater than 0 if str1 is greater than str2
  364. * @return integer 0 if they are equal
  365. */
  366. public static function strcasecmp($str1, $str2)
  367. {
  368. if ( ! isset(UTF8::$called[__FUNCTION__]))
  369. {
  370. require Kohana::find_file('utf8', __FUNCTION__);
  371. // Function has been called
  372. UTF8::$called[__FUNCTION__] = TRUE;
  373. }
  374. return _strcasecmp($str1, $str2);
  375. }
  376. /**
  377. * Returns a string or an array with all occurrences of search in subject
  378. * (ignoring case) and replaced with the given replace value. This is a
  379. * UTF8-aware version of [str_ireplace](http://php.net/str_ireplace).
  380. *
  381. * [!!] This function is very slow compared to the native version. Avoid
  382. * using it when possible.
  383. *
  384. * @author Harry Fuecks <hfuecks@gmail.com
  385. * @param string|array $search text to replace
  386. * @param string|array $replace replacement text
  387. * @param string|array $str subject text
  388. * @param integer $count number of matched and replaced needles will be returned via this parameter which is passed by reference
  389. * @return string if the input was a string
  390. * @return array if the input was an array
  391. */
  392. public static function str_ireplace($search, $replace, $str, & $count = NULL)
  393. {
  394. if ( ! isset(UTF8::$called[__FUNCTION__]))
  395. {
  396. require Kohana::find_file('utf8', __FUNCTION__);
  397. // Function has been called
  398. UTF8::$called[__FUNCTION__] = TRUE;
  399. }
  400. return _str_ireplace($search, $replace, $str, $count);
  401. }
  402. /**
  403. * Case-insensitive UTF-8 version of strstr. Returns all of input string
  404. * from the first occurrence of needle to the end. This is a UTF8-aware
  405. * version of [stristr](http://php.net/stristr).
  406. *
  407. * $found = UTF8::stristr($str, $search);
  408. *
  409. * @author Harry Fuecks <hfuecks@gmail.com>
  410. * @param string $str input string
  411. * @param string $search needle
  412. * @return string matched substring if found
  413. * @return FALSE if the substring was not found
  414. */
  415. public static function stristr($str, $search)
  416. {
  417. if ( ! isset(UTF8::$called[__FUNCTION__]))
  418. {
  419. require Kohana::find_file('utf8', __FUNCTION__);
  420. // Function has been called
  421. UTF8::$called[__FUNCTION__] = TRUE;
  422. }
  423. return _stristr($str, $search);
  424. }
  425. /**
  426. * Finds the length of the initial segment matching mask. This is a
  427. * UTF8-aware version of [strspn](http://php.net/strspn).
  428. *
  429. * $found = UTF8::strspn($str, $mask);
  430. *
  431. * @author Harry Fuecks <hfuecks@gmail.com>
  432. * @param string $str input string
  433. * @param string $mask mask for search
  434. * @param integer $offset start position of the string to examine
  435. * @param integer $length length of the string to examine
  436. * @return integer length of the initial segment that contains characters in the mask
  437. */
  438. public static function strspn($str, $mask, $offset = NULL, $length = NULL)
  439. {
  440. if ( ! isset(UTF8::$called[__FUNCTION__]))
  441. {
  442. require Kohana::find_file('utf8', __FUNCTION__);
  443. // Function has been called
  444. UTF8::$called[__FUNCTION__] = TRUE;
  445. }
  446. return _strspn($str, $mask, $offset, $length);
  447. }
  448. /**
  449. * Finds the length of the initial segment not matching mask. This is a
  450. * UTF8-aware version of [strcspn](http://php.net/strcspn).
  451. *
  452. * $found = UTF8::strcspn($str, $mask);
  453. *
  454. * @author Harry Fuecks <hfuecks@gmail.com>
  455. * @param string $str input string
  456. * @param string $mask mask for search
  457. * @param integer $offset start position of the string to examine
  458. * @param integer $length length of the string to examine
  459. * @return integer length of the initial segment that contains characters not in the mask
  460. */
  461. public static function strcspn($str, $mask, $offset = NULL, $length = NULL)
  462. {
  463. if ( ! isset(UTF8::$called[__FUNCTION__]))
  464. {
  465. require Kohana::find_file('utf8', __FUNCTION__);
  466. // Function has been called
  467. UTF8::$called[__FUNCTION__] = TRUE;
  468. }
  469. return _strcspn($str, $mask, $offset, $length);
  470. }
  471. /**
  472. * Pads a UTF-8 string to a certain length with another string. This is a
  473. * UTF8-aware version of [str_pad](http://php.net/str_pad).
  474. *
  475. * $str = UTF8::str_pad($str, $length);
  476. *
  477. * @author Harry Fuecks <hfuecks@gmail.com>
  478. * @param string $str input string
  479. * @param integer $final_str_length desired string length after padding
  480. * @param string $pad_str string to use as padding
  481. * @param string $pad_type padding type: STR_PAD_RIGHT, STR_PAD_LEFT, or STR_PAD_BOTH
  482. * @return string
  483. */
  484. public static function str_pad($str, $final_str_length, $pad_str = ' ', $pad_type = STR_PAD_RIGHT)
  485. {
  486. if ( ! isset(UTF8::$called[__FUNCTION__]))
  487. {
  488. require Kohana::find_file('utf8', __FUNCTION__);
  489. // Function has been called
  490. UTF8::$called[__FUNCTION__] = TRUE;
  491. }
  492. return _str_pad($str, $final_str_length, $pad_str, $pad_type);
  493. }
  494. /**
  495. * Converts a UTF-8 string to an array. This is a UTF8-aware version of
  496. * [str_split](http://php.net/str_split).
  497. *
  498. * $array = UTF8::str_split($str);
  499. *
  500. * @author Harry Fuecks <hfuecks@gmail.com>
  501. * @param string $str input string
  502. * @param integer $split_length maximum length of each chunk
  503. * @return array
  504. */
  505. public static function str_split($str, $split_length = 1)
  506. {
  507. if ( ! isset(UTF8::$called[__FUNCTION__]))
  508. {
  509. require Kohana::find_file('utf8', __FUNCTION__);
  510. // Function has been called
  511. UTF8::$called[__FUNCTION__] = TRUE;
  512. }
  513. return _str_split($str, $split_length);
  514. }
  515. /**
  516. * Reverses a UTF-8 string. This is a UTF8-aware version of [strrev](http://php.net/strrev).
  517. *
  518. * $str = UTF8::strrev($str);
  519. *
  520. * @author Harry Fuecks <hfuecks@gmail.com>
  521. * @param string $str string to be reversed
  522. * @return string
  523. */
  524. public static function strrev($str)
  525. {
  526. if ( ! isset(UTF8::$called[__FUNCTION__]))
  527. {
  528. require Kohana::find_file('utf8', __FUNCTION__);
  529. // Function has been called
  530. UTF8::$called[__FUNCTION__] = TRUE;
  531. }
  532. return _strrev($str);
  533. }
  534. /**
  535. * Strips whitespace (or other UTF-8 characters) from the beginning and
  536. * end of a string. This is a UTF8-aware version of [trim](http://php.net/trim).
  537. *
  538. * $str = UTF8::trim($str);
  539. *
  540. * @author Andreas Gohr <andi@splitbrain.org>
  541. * @param string $str input string
  542. * @param string $charlist string of characters to remove
  543. * @return string
  544. */
  545. public static function trim($str, $charlist = NULL)
  546. {
  547. if ( ! isset(UTF8::$called[__FUNCTION__]))
  548. {
  549. require Kohana::find_file('utf8', __FUNCTION__);
  550. // Function has been called
  551. UTF8::$called[__FUNCTION__] = TRUE;
  552. }
  553. return _trim($str, $charlist);
  554. }
  555. /**
  556. * Strips whitespace (or other UTF-8 characters) from the beginning of
  557. * a string. This is a UTF8-aware version of [ltrim](http://php.net/ltrim).
  558. *
  559. * $str = UTF8::ltrim($str);
  560. *
  561. * @author Andreas Gohr <andi@splitbrain.org>
  562. * @param string $str input string
  563. * @param string $charlist string of characters to remove
  564. * @return string
  565. */
  566. public static function ltrim($str, $charlist = NULL)
  567. {
  568. if ( ! isset(UTF8::$called[__FUNCTION__]))
  569. {
  570. require Kohana::find_file('utf8', __FUNCTION__);
  571. // Function has been called
  572. UTF8::$called[__FUNCTION__] = TRUE;
  573. }
  574. return _ltrim($str, $charlist);
  575. }
  576. /**
  577. * Strips whitespace (or other UTF-8 characters) from the end of a string.
  578. * This is a UTF8-aware version of [rtrim](http://php.net/rtrim).
  579. *
  580. * $str = UTF8::rtrim($str);
  581. *
  582. * @author Andreas Gohr <andi@splitbrain.org>
  583. * @param string $str input string
  584. * @param string $charlist string of characters to remove
  585. * @return string
  586. */
  587. public static function rtrim($str, $charlist = NULL)
  588. {
  589. if ( ! isset(UTF8::$called[__FUNCTION__]))
  590. {
  591. require Kohana::find_file('utf8', __FUNCTION__);
  592. // Function has been called
  593. UTF8::$called[__FUNCTION__] = TRUE;
  594. }
  595. return _rtrim($str, $charlist);
  596. }
  597. /**
  598. * Returns the unicode ordinal for a character. This is a UTF8-aware
  599. * version of [ord](http://php.net/ord).
  600. *
  601. * $digit = UTF8::ord($character);
  602. *
  603. * @author Harry Fuecks <hfuecks@gmail.com>
  604. * @param string $chr UTF-8 encoded character
  605. * @return integer
  606. */
  607. public static function ord($chr)
  608. {
  609. if ( ! isset(UTF8::$called[__FUNCTION__]))
  610. {
  611. require Kohana::find_file('utf8', __FUNCTION__);
  612. // Function has been called
  613. UTF8::$called[__FUNCTION__] = TRUE;
  614. }
  615. return _ord($chr);
  616. }
  617. /**
  618. * Takes an UTF-8 string and returns an array of ints representing the Unicode characters.
  619. * Astral planes are supported i.e. the ints in the output can be > 0xFFFF.
  620. * Occurrences of the BOM are ignored. Surrogates are not allowed.
  621. *
  622. * $array = UTF8::to_unicode($str);
  623. *
  624. * The Original Code is Mozilla Communicator client code.
  625. * The Initial Developer of the Original Code is Netscape Communications Corporation.
  626. * Portions created by the Initial Developer are Copyright (C) 1998 the Initial Developer.
  627. * Ported to PHP by Henri Sivonen <hsivonen@iki.fi>, see <http://hsivonen.iki.fi/php-utf8/>
  628. * Slight modifications to fit with phputf8 library by Harry Fuecks <hfuecks@gmail.com>
  629. *
  630. * @param string $str UTF-8 encoded string
  631. * @return array unicode code points
  632. * @return FALSE if the string is invalid
  633. */
  634. public static function to_unicode($str)
  635. {
  636. if ( ! isset(UTF8::$called[__FUNCTION__]))
  637. {
  638. require Kohana::find_file('utf8', __FUNCTION__);
  639. // Function has been called
  640. UTF8::$called[__FUNCTION__] = TRUE;
  641. }
  642. return _to_unicode($str);
  643. }
  644. /**
  645. * Takes an array of ints representing the Unicode characters and returns a UTF-8 string.
  646. * Astral planes are supported i.e. the ints in the input can be > 0xFFFF.
  647. * Occurrences of the BOM are ignored. Surrogates are not allowed.
  648. *
  649. * $str = UTF8::to_unicode($array);
  650. *
  651. * The Original Code is Mozilla Communicator client code.
  652. * The Initial Developer of the Original Code is Netscape Communications Corporation.
  653. * Portions created by the Initial Developer are Copyright (C) 1998 the Initial Developer.
  654. * Ported to PHP by Henri Sivonen <hsivonen@iki.fi>, see http://hsivonen.iki.fi/php-utf8/
  655. * Slight modifications to fit with phputf8 library by Harry Fuecks <hfuecks@gmail.com>.
  656. *
  657. * @param array $str unicode code points representing a string
  658. * @return string utf8 string of characters
  659. * @return boolean FALSE if a code point cannot be found
  660. */
  661. public static function from_unicode($arr)
  662. {
  663. if ( ! isset(UTF8::$called[__FUNCTION__]))
  664. {
  665. require Kohana::find_file('utf8', __FUNCTION__);
  666. // Function has been called
  667. UTF8::$called[__FUNCTION__] = TRUE;
  668. }
  669. return _from_unicode($arr);
  670. }
  671. }
  672. if (Kohana_UTF8::$server_utf8 === NULL)
  673. {
  674. // Determine if this server supports UTF-8 natively
  675. Kohana_UTF8::$server_utf8 = extension_loaded('mbstring');
  676. }