UTF8.php 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818
  1. <?php
  2. /**
  3. * A port of [phputf8](http://phputf8.sourceforge.net/) to a unified set
  4. * of files. Provides multi-byte aware replacement string functions.
  5. *
  6. * For UTF-8 support to work correctly, the following requirements must be met:
  7. *
  8. * - PCRE needs to be compiled with UTF-8 support (--enable-utf8)
  9. * - Support for [Unicode properties](http://php.net/manual/reference.pcre.pattern.modifiers.php)
  10. * is highly recommended (--enable-unicode-properties)
  11. * - The [mbstring extension](http://php.net/mbstring) is highly recommended,
  12. * but must not be overloading string functions
  13. *
  14. * [!!] This file is licensed differently from the rest of Kohana. As a port of
  15. * [phputf8](http://phputf8.sourceforge.net/), this file is released under the LGPL.
  16. *
  17. * @package Kohana
  18. * @category Base
  19. * @author Kohana Team
  20. * @copyright (c) Kohana Team
  21. * @copyright (c) 2005 Harry Fuecks
  22. * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
  23. */
  24. class Kohana_UTF8 {
  25. /**
  26. * @var boolean Does the server support UTF-8 natively?
  27. */
  28. public static $server_utf8 = NULL;
  29. /**
  30. * @var array List of called methods that have had their required file included.
  31. */
  32. public static $called = [];
  33. /**
  34. * Recursively cleans arrays, objects, and strings. Removes ASCII control
  35. * codes and converts to the requested charset while silently discarding
  36. * incompatible characters.
  37. *
  38. * UTF8::clean($_GET); // Clean GET data
  39. *
  40. * @param mixed $var variable to clean
  41. * @param string $charset character set, defaults to Kohana::$charset
  42. * @return mixed
  43. * @uses UTF8::clean
  44. * @uses UTF8::strip_ascii_ctrl
  45. * @uses UTF8::is_ascii
  46. */
  47. public static function clean($var, $charset = NULL)
  48. {
  49. if ( ! $charset)
  50. {
  51. // Use the application character set
  52. $charset = Kohana::$charset;
  53. }
  54. if (is_array($var) OR is_object($var))
  55. {
  56. foreach ($var as $key => $val)
  57. {
  58. // Recursion!
  59. $var[UTF8::clean($key)] = UTF8::clean($val);
  60. }
  61. }
  62. elseif (is_string($var) AND $var !== '')
  63. {
  64. // Remove control characters
  65. $var = UTF8::strip_ascii_ctrl($var);
  66. if ( ! UTF8::is_ascii($var))
  67. {
  68. // Temporarily save the mb_substitute_character() value into a variable
  69. $mb_substitute_character = mb_substitute_character();
  70. // Disable substituting illegal characters with the default '?' character
  71. mb_substitute_character('none');
  72. // convert encoding, this is expensive, used when $var is not ASCII
  73. $var = mb_convert_encoding($var, $charset, $charset);
  74. // Reset mb_substitute_character() value back to the original setting
  75. mb_substitute_character($mb_substitute_character);
  76. }
  77. }
  78. return $var;
  79. }
  80. /**
  81. * Tests whether a string contains only 7-bit ASCII bytes. This is used to
  82. * determine when to use native functions or UTF-8 functions.
  83. *
  84. * $ascii = UTF8::is_ascii($str);
  85. *
  86. * @param mixed $str string or array of strings to check
  87. * @return boolean
  88. */
  89. public static function is_ascii($str)
  90. {
  91. $str = $str ?? '';
  92. if (is_array($str))
  93. {
  94. $str = implode($str);
  95. }
  96. return ! preg_match('/[^\x00-\x7F]/S', $str);
  97. }
  98. /**
  99. * Strips out device control codes in the ASCII range.
  100. *
  101. * $str = UTF8::strip_ascii_ctrl($str);
  102. *
  103. * @param string $str string to clean
  104. * @return string
  105. */
  106. public static function strip_ascii_ctrl($str)
  107. {
  108. return preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S', '', $str ?? '');
  109. }
  110. /**
  111. * Strips out all non-7bit ASCII bytes.
  112. *
  113. * $str = UTF8::strip_non_ascii($str);
  114. *
  115. * @param string $str string to clean
  116. * @return string
  117. */
  118. public static function strip_non_ascii($str)
  119. {
  120. return preg_replace('/[^\x00-\x7F]+/S', '', $str ?? '');
  121. }
  122. /**
  123. * Replaces special/accented UTF-8 characters by ASCII-7 "equivalents".
  124. *
  125. * $ascii = UTF8::transliterate_to_ascii($utf8);
  126. *
  127. * @author Andreas Gohr <andi@splitbrain.org>
  128. * @param string $str string to transliterate
  129. * @param integer $case -1 lowercase only, +1 uppercase only, 0 both cases
  130. * @return string
  131. */
  132. public static function transliterate_to_ascii($str, $case = 0)
  133. {
  134. if ( ! isset(UTF8::$called[__FUNCTION__]))
  135. {
  136. require Kohana::find_file('utf8', __FUNCTION__);
  137. // Function has been called
  138. UTF8::$called[__FUNCTION__] = TRUE;
  139. }
  140. return _transliterate_to_ascii($str, $case);
  141. }
  142. /**
  143. * Returns the length of the given string. This is a UTF8-aware version
  144. * of [strlen](http://php.net/strlen).
  145. *
  146. * $length = UTF8::strlen($str);
  147. *
  148. * @param string $str string being measured for length
  149. * @return integer
  150. * @uses UTF8::$server_utf8
  151. * @uses Kohana::$charset
  152. */
  153. public static function strlen($str)
  154. {
  155. if (UTF8::$server_utf8)
  156. return mb_strlen($str, Kohana::$charset);
  157. if ( ! isset(UTF8::$called[__FUNCTION__]))
  158. {
  159. require Kohana::find_file('utf8', __FUNCTION__);
  160. // Function has been called
  161. UTF8::$called[__FUNCTION__] = TRUE;
  162. }
  163. return _strlen($str);
  164. }
  165. /**
  166. * Finds position of first occurrence of a UTF-8 string. This is a
  167. * UTF8-aware version of [strpos](http://php.net/strpos).
  168. *
  169. * $position = UTF8::strpos($str, $search);
  170. *
  171. * @author Harry Fuecks <hfuecks@gmail.com>
  172. * @param string $str haystack
  173. * @param string $search needle
  174. * @param integer $offset offset from which character in haystack to start searching
  175. * @return integer position of needle
  176. * @return boolean FALSE if the needle is not found
  177. * @uses UTF8::$server_utf8
  178. * @uses Kohana::$charset
  179. */
  180. public static function strpos($str, $search, $offset = 0)
  181. {
  182. $str = $str ?? '';
  183. $search = $search ?? '';
  184. if (UTF8::$server_utf8)
  185. return mb_strpos($str, $search, $offset, Kohana::$charset);
  186. if ( ! isset(UTF8::$called[__FUNCTION__]))
  187. {
  188. require Kohana::find_file('utf8', __FUNCTION__);
  189. // Function has been called
  190. UTF8::$called[__FUNCTION__] = TRUE;
  191. }
  192. return _strpos($str, $search, $offset);
  193. }
  194. /**
  195. * Finds position of last occurrence of a char in a UTF-8 string. This is
  196. * a UTF8-aware version of [strrpos](http://php.net/strrpos).
  197. *
  198. * $position = UTF8::strrpos($str, $search);
  199. *
  200. * @author Harry Fuecks <hfuecks@gmail.com>
  201. * @param string $str haystack
  202. * @param string $search needle
  203. * @param integer $offset offset from which character in haystack to start searching
  204. * @return integer position of needle
  205. * @return boolean FALSE if the needle is not found
  206. * @uses UTF8::$server_utf8
  207. */
  208. public static function strrpos($str, $search, $offset = 0)
  209. {
  210. $str = $str ?? '';
  211. $search = $search ?? '';
  212. if (UTF8::$server_utf8)
  213. return mb_strrpos($str, $search, $offset, Kohana::$charset);
  214. if ( ! isset(UTF8::$called[__FUNCTION__]))
  215. {
  216. require Kohana::find_file('utf8', __FUNCTION__);
  217. // Function has been called
  218. UTF8::$called[__FUNCTION__] = TRUE;
  219. }
  220. return _strrpos($str, $search, $offset);
  221. }
  222. /**
  223. * Returns part of a UTF-8 string. This is a UTF8-aware version
  224. * of [substr](http://php.net/substr).
  225. *
  226. * $sub = UTF8::substr($str, $offset);
  227. *
  228. * @author Chris Smith <chris@jalakai.co.uk>
  229. * @param string $str input string
  230. * @param integer $offset offset
  231. * @param integer $length length limit
  232. * @return string
  233. * @uses UTF8::$server_utf8
  234. * @uses Kohana::$charset
  235. */
  236. public static function substr($str, $offset, $length = NULL)
  237. {
  238. $str = $str ?? '';
  239. if (UTF8::$server_utf8)
  240. return ($length === NULL)
  241. ? mb_substr($str, $offset, mb_strlen($str), Kohana::$charset)
  242. : mb_substr($str, $offset, $length, Kohana::$charset);
  243. if ( ! isset(UTF8::$called[__FUNCTION__]))
  244. {
  245. require Kohana::find_file('utf8', __FUNCTION__);
  246. // Function has been called
  247. UTF8::$called[__FUNCTION__] = TRUE;
  248. }
  249. return _substr($str, $offset, $length);
  250. }
  251. /**
  252. * Replaces text within a portion of a UTF-8 string. This is a UTF8-aware
  253. * version of [substr_replace](http://php.net/substr_replace).
  254. *
  255. * $str = UTF8::substr_replace($str, $replacement, $offset);
  256. *
  257. * @author Harry Fuecks <hfuecks@gmail.com>
  258. * @param string $str input string
  259. * @param string $replacement replacement string
  260. * @param integer $offset offset
  261. * @return string
  262. */
  263. public static function substr_replace($str, $replacement, $offset, $length = NULL)
  264. {
  265. if ( ! isset(UTF8::$called[__FUNCTION__]))
  266. {
  267. require Kohana::find_file('utf8', __FUNCTION__);
  268. // Function has been called
  269. UTF8::$called[__FUNCTION__] = TRUE;
  270. }
  271. return _substr_replace($str, $replacement, $offset, $length);
  272. }
  273. /**
  274. * Makes a UTF-8 string lowercase. This is a UTF8-aware version
  275. * of [strtolower](http://php.net/strtolower).
  276. *
  277. * $str = UTF8::strtolower($str);
  278. *
  279. * @author Andreas Gohr <andi@splitbrain.org>
  280. * @param string $str mixed case string
  281. * @return string
  282. * @uses UTF8::$server_utf8
  283. * @uses Kohana::$charset
  284. */
  285. public static function strtolower($str)
  286. {
  287. $str = $str ?? '';
  288. if (UTF8::$server_utf8)
  289. return mb_strtolower($str, Kohana::$charset);
  290. if ( ! isset(UTF8::$called[__FUNCTION__]))
  291. {
  292. require Kohana::find_file('utf8', __FUNCTION__);
  293. // Function has been called
  294. UTF8::$called[__FUNCTION__] = TRUE;
  295. }
  296. return _strtolower($str);
  297. }
  298. /**
  299. * Makes a UTF-8 string uppercase. This is a UTF8-aware version
  300. * of [strtoupper](http://php.net/strtoupper).
  301. *
  302. * @author Andreas Gohr <andi@splitbrain.org>
  303. * @param string $str mixed case string
  304. * @return string
  305. * @uses UTF8::$server_utf8
  306. * @uses Kohana::$charset
  307. */
  308. public static function strtoupper($str)
  309. {
  310. $str = $str ?? '';
  311. if (UTF8::$server_utf8)
  312. return mb_strtoupper($str, Kohana::$charset);
  313. if ( ! isset(UTF8::$called[__FUNCTION__]))
  314. {
  315. require Kohana::find_file('utf8', __FUNCTION__);
  316. // Function has been called
  317. UTF8::$called[__FUNCTION__] = TRUE;
  318. }
  319. return _strtoupper($str);
  320. }
  321. /**
  322. * Makes a UTF-8 string's first character uppercase. This is a UTF8-aware
  323. * version of [ucfirst](http://php.net/ucfirst).
  324. *
  325. * $str = UTF8::ucfirst($str);
  326. *
  327. * @author Harry Fuecks <hfuecks@gmail.com>
  328. * @param string $str mixed case string
  329. * @return string
  330. */
  331. public static function ucfirst($str)
  332. {
  333. $str = $str ?? '';
  334. if ( ! isset(UTF8::$called[__FUNCTION__]))
  335. {
  336. require Kohana::find_file('utf8', __FUNCTION__);
  337. // Function has been called
  338. UTF8::$called[__FUNCTION__] = TRUE;
  339. }
  340. return _ucfirst($str);
  341. }
  342. /**
  343. * Makes the first character of every word in a UTF-8 string uppercase.
  344. * This is a UTF8-aware version of [ucwords](http://php.net/ucwords).
  345. *
  346. * $str = UTF8::ucwords($str);
  347. *
  348. * @author Harry Fuecks <hfuecks@gmail.com>
  349. * @param string $str mixed case string
  350. * @return string
  351. */
  352. public static function ucwords($str)
  353. {
  354. $str = $str ?? '';
  355. if ( ! isset(UTF8::$called[__FUNCTION__]))
  356. {
  357. require Kohana::find_file('utf8', __FUNCTION__);
  358. // Function has been called
  359. UTF8::$called[__FUNCTION__] = TRUE;
  360. }
  361. return _ucwords($str);
  362. }
  363. /**
  364. * Case-insensitive UTF-8 string comparison. This is a UTF8-aware version
  365. * of [strcasecmp](http://php.net/strcasecmp).
  366. *
  367. * $compare = UTF8::strcasecmp($str1, $str2);
  368. *
  369. * @author Harry Fuecks <hfuecks@gmail.com>
  370. * @param string $str1 string to compare
  371. * @param string $str2 string to compare
  372. * @return integer less than 0 if str1 is less than str2
  373. * @return integer greater than 0 if str1 is greater than str2
  374. * @return integer 0 if they are equal
  375. */
  376. public static function strcasecmp($str1, $str2)
  377. {
  378. $str1 = $str1 ?? '';
  379. $str2 = $str2 ?? '';
  380. if ( ! isset(UTF8::$called[__FUNCTION__]))
  381. {
  382. require Kohana::find_file('utf8', __FUNCTION__);
  383. // Function has been called
  384. UTF8::$called[__FUNCTION__] = TRUE;
  385. }
  386. return _strcasecmp($str1, $str2);
  387. }
  388. /**
  389. * Returns a string or an array with all occurrences of search in subject
  390. * (ignoring case) and replaced with the given replace value. This is a
  391. * UTF8-aware version of [str_ireplace](http://php.net/str_ireplace).
  392. *
  393. * [!!] This function is very slow compared to the native version. Avoid
  394. * using it when possible.
  395. *
  396. * @author Harry Fuecks <hfuecks@gmail.com
  397. * @param string|array $search text to replace
  398. * @param string|array $replace replacement text
  399. * @param string|array $str subject text
  400. * @param integer $count number of matched and replaced needles will be returned via this parameter which is passed by reference
  401. * @return string if the input was a string
  402. * @return array if the input was an array
  403. */
  404. public static function str_ireplace($search, $replace, $str, & $count = NULL)
  405. {
  406. $search = $search ?? '';
  407. $replace = $replace ?? '';
  408. $str = $str ?? '';
  409. if ( ! isset(UTF8::$called[__FUNCTION__]))
  410. {
  411. require Kohana::find_file('utf8', __FUNCTION__);
  412. // Function has been called
  413. UTF8::$called[__FUNCTION__] = TRUE;
  414. }
  415. return _str_ireplace($search, $replace, $str, $count);
  416. }
  417. /**
  418. * Case-insensitive UTF-8 version of strstr. Returns all of input string
  419. * from the first occurrence of needle to the end. This is a UTF8-aware
  420. * version of [stristr](http://php.net/stristr).
  421. *
  422. * $found = UTF8::stristr($str, $search);
  423. *
  424. * @author Harry Fuecks <hfuecks@gmail.com>
  425. * @param string $str input string
  426. * @param string $search needle
  427. * @return string matched substring if found
  428. * @return FALSE if the substring was not found
  429. */
  430. public static function stristr($str, $search)
  431. {
  432. $str = $str ?? '';
  433. $search = $search ?? '';
  434. if ( ! isset(UTF8::$called[__FUNCTION__]))
  435. {
  436. require Kohana::find_file('utf8', __FUNCTION__);
  437. // Function has been called
  438. UTF8::$called[__FUNCTION__] = TRUE;
  439. }
  440. return _stristr($str, $search);
  441. }
  442. /**
  443. * Finds the length of the initial segment matching mask. This is a
  444. * UTF8-aware version of [strspn](http://php.net/strspn).
  445. *
  446. * $found = UTF8::strspn($str, $mask);
  447. *
  448. * @author Harry Fuecks <hfuecks@gmail.com>
  449. * @param string $str input string
  450. * @param string $mask mask for search
  451. * @param integer $offset start position of the string to examine
  452. * @param integer $length length of the string to examine
  453. * @return integer length of the initial segment that contains characters in the mask
  454. */
  455. public static function strspn($str, $mask, $offset = NULL, $length = NULL)
  456. {
  457. $str = $str ?? '';
  458. $mask = $mask ?? '';
  459. if ( ! isset(UTF8::$called[__FUNCTION__]))
  460. {
  461. require Kohana::find_file('utf8', __FUNCTION__);
  462. // Function has been called
  463. UTF8::$called[__FUNCTION__] = TRUE;
  464. }
  465. return _strspn($str, $mask, $offset, $length);
  466. }
  467. /**
  468. * Finds the length of the initial segment not matching mask. This is a
  469. * UTF8-aware version of [strcspn](http://php.net/strcspn).
  470. *
  471. * $found = UTF8::strcspn($str, $mask);
  472. *
  473. * @author Harry Fuecks <hfuecks@gmail.com>
  474. * @param string $str input string
  475. * @param string $mask mask for search
  476. * @param integer $offset start position of the string to examine
  477. * @param integer $length length of the string to examine
  478. * @return integer length of the initial segment that contains characters not in the mask
  479. */
  480. public static function strcspn($str, $mask, $offset = NULL, $length = NULL)
  481. {
  482. $str = $str ?? '';
  483. $mask = $mask ?? '';
  484. if ( ! isset(UTF8::$called[__FUNCTION__]))
  485. {
  486. require Kohana::find_file('utf8', __FUNCTION__);
  487. // Function has been called
  488. UTF8::$called[__FUNCTION__] = TRUE;
  489. }
  490. return _strcspn($str, $mask, $offset, $length);
  491. }
  492. /**
  493. * Pads a UTF-8 string to a certain length with another string. This is a
  494. * UTF8-aware version of [str_pad](http://php.net/str_pad).
  495. *
  496. * $str = UTF8::str_pad($str, $length);
  497. *
  498. * @author Harry Fuecks <hfuecks@gmail.com>
  499. * @param string $str input string
  500. * @param integer $final_str_length desired string length after padding
  501. * @param string $pad_str string to use as padding
  502. * @param string $pad_type padding type: STR_PAD_RIGHT, STR_PAD_LEFT, or STR_PAD_BOTH
  503. * @return string
  504. */
  505. public static function str_pad($str, $final_str_length, $pad_str = ' ', $pad_type = STR_PAD_RIGHT)
  506. {
  507. $str = $str ?? '';
  508. if ( ! isset(UTF8::$called[__FUNCTION__]))
  509. {
  510. require Kohana::find_file('utf8', __FUNCTION__);
  511. // Function has been called
  512. UTF8::$called[__FUNCTION__] = TRUE;
  513. }
  514. return _str_pad($str, $final_str_length, $pad_str, $pad_type);
  515. }
  516. /**
  517. * Converts a UTF-8 string to an array. This is a UTF8-aware version of
  518. * [str_split](http://php.net/str_split).
  519. *
  520. * $array = UTF8::str_split($str);
  521. *
  522. * @author Harry Fuecks <hfuecks@gmail.com>
  523. * @param string $str input string
  524. * @param integer $split_length maximum length of each chunk
  525. * @return array
  526. */
  527. public static function str_split($str, $split_length = 1)
  528. {
  529. $str = $str ?? '';
  530. if ( ! isset(UTF8::$called[__FUNCTION__]))
  531. {
  532. require Kohana::find_file('utf8', __FUNCTION__);
  533. // Function has been called
  534. UTF8::$called[__FUNCTION__] = TRUE;
  535. }
  536. return _str_split($str, $split_length);
  537. }
  538. /**
  539. * Reverses a UTF-8 string. This is a UTF8-aware version of [strrev](http://php.net/strrev).
  540. *
  541. * $str = UTF8::strrev($str);
  542. *
  543. * @author Harry Fuecks <hfuecks@gmail.com>
  544. * @param string $str string to be reversed
  545. * @return string
  546. */
  547. public static function strrev($str)
  548. {
  549. $str = $str ?? '';
  550. if ( ! isset(UTF8::$called[__FUNCTION__]))
  551. {
  552. require Kohana::find_file('utf8', __FUNCTION__);
  553. // Function has been called
  554. UTF8::$called[__FUNCTION__] = TRUE;
  555. }
  556. return _strrev($str);
  557. }
  558. /**
  559. * Strips whitespace (or other UTF-8 characters) from the beginning and
  560. * end of a string. This is a UTF8-aware version of [trim](http://php.net/trim).
  561. *
  562. * $str = UTF8::trim($str);
  563. *
  564. * @author Andreas Gohr <andi@splitbrain.org>
  565. * @param string $str input string
  566. * @param string $charlist string of characters to remove
  567. * @return string
  568. */
  569. public static function trim($str, $charlist = NULL)
  570. {
  571. $str = $str ?? '';
  572. if ( ! isset(UTF8::$called[__FUNCTION__]))
  573. {
  574. require Kohana::find_file('utf8', __FUNCTION__);
  575. // Function has been called
  576. UTF8::$called[__FUNCTION__] = TRUE;
  577. }
  578. return _trim($str, $charlist);
  579. }
  580. /**
  581. * Strips whitespace (or other UTF-8 characters) from the beginning of
  582. * a string. This is a UTF8-aware version of [ltrim](http://php.net/ltrim).
  583. *
  584. * $str = UTF8::ltrim($str);
  585. *
  586. * @author Andreas Gohr <andi@splitbrain.org>
  587. * @param string $str input string
  588. * @param string $charlist string of characters to remove
  589. * @return string
  590. */
  591. public static function ltrim($str, $charlist = NULL)
  592. {
  593. $str = $str ?? '';
  594. if ( ! isset(UTF8::$called[__FUNCTION__]))
  595. {
  596. require Kohana::find_file('utf8', __FUNCTION__);
  597. // Function has been called
  598. UTF8::$called[__FUNCTION__] = TRUE;
  599. }
  600. return _ltrim($str, $charlist);
  601. }
  602. /**
  603. * Strips whitespace (or other UTF-8 characters) from the end of a string.
  604. * This is a UTF8-aware version of [rtrim](http://php.net/rtrim).
  605. *
  606. * $str = UTF8::rtrim($str);
  607. *
  608. * @author Andreas Gohr <andi@splitbrain.org>
  609. * @param string $str input string
  610. * @param string $charlist string of characters to remove
  611. * @return string
  612. */
  613. public static function rtrim($str, $charlist = NULL)
  614. {
  615. $str = $str ?? '';
  616. if ( ! isset(UTF8::$called[__FUNCTION__]))
  617. {
  618. require Kohana::find_file('utf8', __FUNCTION__);
  619. // Function has been called
  620. UTF8::$called[__FUNCTION__] = TRUE;
  621. }
  622. return _rtrim($str, $charlist);
  623. }
  624. /**
  625. * Returns the unicode ordinal for a character. This is a UTF8-aware
  626. * version of [ord](http://php.net/ord).
  627. *
  628. * $digit = UTF8::ord($character);
  629. *
  630. * @author Harry Fuecks <hfuecks@gmail.com>
  631. * @param string $chr UTF-8 encoded character
  632. * @return integer
  633. */
  634. public static function ord($chr)
  635. {
  636. $str = $str ?? '';
  637. if ( ! isset(UTF8::$called[__FUNCTION__]))
  638. {
  639. require Kohana::find_file('utf8', __FUNCTION__);
  640. // Function has been called
  641. UTF8::$called[__FUNCTION__] = TRUE;
  642. }
  643. return _ord($chr);
  644. }
  645. /**
  646. * Takes an UTF-8 string and returns an array of ints representing the Unicode characters.
  647. * Astral planes are supported i.e. the ints in the output can be > 0xFFFF.
  648. * Occurrences of the BOM are ignored. Surrogates are not allowed.
  649. *
  650. * $array = UTF8::to_unicode($str);
  651. *
  652. * The Original Code is Mozilla Communicator client code.
  653. * The Initial Developer of the Original Code is Netscape Communications Corporation.
  654. * Portions created by the Initial Developer are Copyright (C) 1998 the Initial Developer.
  655. * Ported to PHP by Henri Sivonen <hsivonen@iki.fi>, see <http://hsivonen.iki.fi/php-utf8/>
  656. * Slight modifications to fit with phputf8 library by Harry Fuecks <hfuecks@gmail.com>
  657. *
  658. * @param string $str UTF-8 encoded string
  659. * @return array unicode code points
  660. * @return FALSE if the string is invalid
  661. */
  662. public static function to_unicode($str)
  663. {
  664. $str = $str ?? '';
  665. if ( ! isset(UTF8::$called[__FUNCTION__]))
  666. {
  667. require Kohana::find_file('utf8', __FUNCTION__);
  668. // Function has been called
  669. UTF8::$called[__FUNCTION__] = TRUE;
  670. }
  671. return _to_unicode($str);
  672. }
  673. /**
  674. * Takes an array of ints representing the Unicode characters and returns a UTF-8 string.
  675. * Astral planes are supported i.e. the ints in the input can be > 0xFFFF.
  676. * Occurrences of the BOM are ignored. Surrogates are not allowed.
  677. *
  678. * $str = UTF8::to_unicode($array);
  679. *
  680. * The Original Code is Mozilla Communicator client code.
  681. * The Initial Developer of the Original Code is Netscape Communications Corporation.
  682. * Portions created by the Initial Developer are Copyright (C) 1998 the Initial Developer.
  683. * Ported to PHP by Henri Sivonen <hsivonen@iki.fi>, see http://hsivonen.iki.fi/php-utf8/
  684. * Slight modifications to fit with phputf8 library by Harry Fuecks <hfuecks@gmail.com>.
  685. *
  686. * @param array $str unicode code points representing a string
  687. * @return string utf8 string of characters
  688. * @return boolean FALSE if a code point cannot be found
  689. */
  690. public static function from_unicode($arr)
  691. {
  692. if ( ! isset(UTF8::$called[__FUNCTION__]))
  693. {
  694. require Kohana::find_file('utf8', __FUNCTION__);
  695. // Function has been called
  696. UTF8::$called[__FUNCTION__] = TRUE;
  697. }
  698. return _from_unicode($arr);
  699. }
  700. }
  701. if (Kohana_UTF8::$server_utf8 === NULL)
  702. {
  703. // Determine if this server supports UTF-8 natively
  704. Kohana_UTF8::$server_utf8 = extension_loaded('mbstring');
  705. }