UTF8.php 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768
  1. <?php
  2. /**
  3. * A port of [phputf8](http://phputf8.sourceforge.net/) to a unified set
  4. * of files. Provides multi-byte aware replacement string functions.
  5. *
  6. * For UTF-8 support to work correctly, the following requirements must be met:
  7. *
  8. * - PCRE needs to be compiled with UTF-8 support (--enable-utf8)
  9. * - Support for [Unicode properties](http://php.net/manual/reference.pcre.pattern.modifiers.php)
  10. * is highly recommended (--enable-unicode-properties)
  11. * - The [mbstring extension](http://php.net/mbstring) is highly recommended,
  12. * but must not be overloading string functions
  13. *
  14. * [!!] This file is licensed differently from the rest of KO7. As a port of
  15. * [phputf8](http://phputf8.sourceforge.net/), this file is released under the LGPL.
  16. *
  17. * @package KO7
  18. * @category Base
  19. *
  20. * @copyright (c) 2007-2016 Kohana Team
  21. * @copyright (c) since 2016 Koseven Team
  22. * @copyright (c) 2005 Harry Fuecks
  23. * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
  24. */
  25. class KO7_UTF8 {
  26. /**
  27. * @var boolean Does the server support UTF-8 natively?
  28. */
  29. public static $server_utf8;
  30. /**
  31. * @var array List of called methods that have had their required file included.
  32. */
  33. public static $called = [];
  34. /**
  35. * Recursively cleans arrays, objects, and strings. Removes ASCII control
  36. * codes and converts to the requested charset while silently discarding
  37. * incompatible characters.
  38. *
  39. * UTF8::clean($_GET); // Clean GET data
  40. *
  41. * @param mixed $var variable to clean
  42. * @param string $charset character set, defaults to KO7::$charset
  43. * @return mixed
  44. * @uses UTF8::strip_ascii_ctrl
  45. * @uses UTF8::is_ascii
  46. */
  47. public static function clean($var, ?string $charset = NULL)
  48. {
  49. if ( ! $charset)
  50. {
  51. // Use the application character set
  52. $charset = KO7::$charset;
  53. }
  54. if (is_iterable($var))
  55. {
  56. $vars = [];
  57. foreach ($var as $key => $val)
  58. {
  59. $vars[UTF8::clean($key, $charset)] = UTF8::clean($val, $charset);
  60. }
  61. $var = $vars;
  62. }
  63. elseif (is_string($var) AND $var !== '')
  64. {
  65. // Remove control characters
  66. $var = UTF8::strip_ascii_ctrl($var);
  67. if ( ! UTF8::is_ascii($var))
  68. {
  69. // Temporarily save the mb_substitute_character() value into a variable
  70. $substitute_character = mb_substitute_character();
  71. // Disable substituting illegal characters with the default '?' character
  72. mb_substitute_character('none');
  73. // convert encoding, this is expensive, used when $var is not ASCII
  74. $var = mb_convert_encoding($var, $charset, $charset);
  75. // Reset mb_substitute_character() value back to the original setting
  76. mb_substitute_character($substitute_character);
  77. }
  78. }
  79. return $var;
  80. }
  81. /**
  82. * Tests whether a string contains only 7-bit ASCII bytes. This is used to
  83. * determine when to use native functions or UTF-8 functions.
  84. *
  85. * $ascii = UTF8::is_ascii($str);
  86. *
  87. * @param mixed $str string or array of strings to check
  88. * @return bool
  89. */
  90. public static function is_ascii($str)
  91. {
  92. if (is_array($str))
  93. {
  94. $str = implode($str);
  95. }
  96. return ! preg_match('/[^\x00-\x7F]/S', (string) $str);
  97. }
  98. /**
  99. * Strips out device control codes in the ASCII range.
  100. *
  101. * $str = UTF8::strip_ascii_ctrl($str);
  102. *
  103. * @param string $str string to clean
  104. * @return string
  105. */
  106. public static function strip_ascii_ctrl($str)
  107. {
  108. return preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S', '', (string) $str);
  109. }
  110. /**
  111. * Strips out all non-7bit ASCII bytes.
  112. *
  113. * $str = UTF8::strip_non_ascii($str);
  114. *
  115. * @param string $str string to clean
  116. * @return string
  117. */
  118. public static function strip_non_ascii($str)
  119. {
  120. return preg_replace('/[^\x00-\x7F]+/S', '', (string) $str);
  121. }
  122. /**
  123. * Replaces special/accented UTF-8 characters by ASCII-7 "equivalents".
  124. *
  125. * $ascii = UTF8::transliterate_to_ascii($utf8);
  126. *
  127. * @author Andreas Gohr <andi@splitbrain.org>
  128. * @param string $str string to transliterate
  129. * @param int $case -1 lowercase only, +1 uppercase only, 0 both cases
  130. * @return string
  131. */
  132. public static function transliterate_to_ascii($str, int $case = 0)
  133. {
  134. if ( ! isset(UTF8::$called[__FUNCTION__]))
  135. {
  136. require KO7::find_file('utf8', __FUNCTION__);
  137. // Function has been called
  138. UTF8::$called[__FUNCTION__] = TRUE;
  139. }
  140. return _transliterate_to_ascii((string) $str, $case);
  141. }
  142. /**
  143. * Returns the length of the given string. This is a UTF8-aware version
  144. * of [strlen](http://php.net/strlen).
  145. *
  146. * $length = UTF8::strlen($str);
  147. *
  148. * @param string $str string being measured for length
  149. * @return integer
  150. * @uses UTF8::$server_utf8
  151. * @uses KO7::$charset
  152. */
  153. public static function strlen($str)
  154. {
  155. if (UTF8::$server_utf8)
  156. return mb_strlen((string) $str, KO7::$charset);
  157. if ( ! isset(UTF8::$called[__FUNCTION__]))
  158. {
  159. require KO7::find_file('utf8', __FUNCTION__);
  160. // Function has been called
  161. UTF8::$called[__FUNCTION__] = TRUE;
  162. }
  163. return _strlen((string) $str);
  164. }
  165. /**
  166. * Finds position of first occurrence of a UTF-8 string. This is a
  167. * UTF8-aware version of [strpos](http://php.net/strpos).
  168. *
  169. * $position = UTF8::strpos($str, $search);
  170. *
  171. * @author Harry Fuecks <hfuecks@gmail.com>
  172. * @param string $str haystack
  173. * @param string $search needle
  174. * @param integer $offset offset from which character in haystack to start searching
  175. * @return integer position of needle
  176. * @return boolean FALSE if the needle is not found
  177. * @uses UTF8::$server_utf8
  178. * @uses KO7::$charset
  179. */
  180. public static function strpos($str, $search, int $offset = 0)
  181. {
  182. if (UTF8::$server_utf8)
  183. return mb_strpos((string) $str, (string) $search, $offset, KO7::$charset);
  184. if ( ! isset(UTF8::$called[__FUNCTION__]))
  185. {
  186. require KO7::find_file('utf8', __FUNCTION__);
  187. // Function has been called
  188. UTF8::$called[__FUNCTION__] = TRUE;
  189. }
  190. return _strpos((string) $str, (string) $search, $offset);
  191. }
  192. /**
  193. * Finds position of last occurrence of a char in a UTF-8 string. This is
  194. * a UTF8-aware version of [strrpos](http://php.net/strrpos).
  195. *
  196. * $position = UTF8::strrpos($str, $search);
  197. *
  198. * @author Harry Fuecks <hfuecks@gmail.com>
  199. * @param string $str haystack
  200. * @param string $search needle
  201. * @param int $offset offset from which character in haystack to start searching
  202. * @return int position of needle
  203. * @return bool FALSE if the needle is not found
  204. * @uses UTF8::$server_utf8
  205. */
  206. public static function strrpos($str, $search, int $offset = 0)
  207. {
  208. if (UTF8::$server_utf8)
  209. return mb_strrpos((string) $str, (string) $search, $offset, KO7::$charset);
  210. if ( ! isset(UTF8::$called[__FUNCTION__]))
  211. {
  212. require KO7::find_file('utf8', __FUNCTION__);
  213. // Function has been called
  214. UTF8::$called[__FUNCTION__] = TRUE;
  215. }
  216. return _strrpos((string) $str, (string) $search, $offset);
  217. }
  218. /**
  219. * Returns part of a UTF-8 string. This is a UTF8-aware version
  220. * of [substr](http://php.net/substr).
  221. *
  222. * $sub = UTF8::substr($str, $offset);
  223. *
  224. * @author Chris Smith <chris@jalakai.co.uk>
  225. * @param string $str input string
  226. * @param int $offset offset
  227. * @param int|null $length length limit
  228. * @return string
  229. * @uses UTF8::$server_utf8
  230. * @uses KO7::$charset
  231. */
  232. public static function substr($str, int $offset, ?int $length = NULL)
  233. {
  234. $str = (string) $str;
  235. if (UTF8::$server_utf8)
  236. {
  237. return mb_substr($str, $offset, $length ?? mb_strlen($str, KO7::$charset), KO7::$charset);
  238. }
  239. if ( ! isset(UTF8::$called[__FUNCTION__]))
  240. {
  241. require KO7::find_file('utf8', __FUNCTION__);
  242. // Function has been called
  243. UTF8::$called[__FUNCTION__] = TRUE;
  244. }
  245. return _substr($str, $offset, $length);
  246. }
  247. /**
  248. * Replaces text within a portion of a UTF-8 string. This is a UTF8-aware
  249. * version of [substr_replace](http://php.net/substr_replace).
  250. *
  251. * $str = UTF8::substr_replace($str, $replacement, $offset);
  252. *
  253. * @author Harry Fuecks <hfuecks@gmail.com>
  254. * @param string $str input string
  255. * @param string $replacement replacement string
  256. * @param int $offset offset
  257. * @param int|null $length length
  258. * @return string
  259. */
  260. public static function substr_replace($str, $replacement, int $offset, ?int $length = NULL)
  261. {
  262. if ( ! isset(UTF8::$called[__FUNCTION__]))
  263. {
  264. require KO7::find_file('utf8', __FUNCTION__);
  265. // Function has been called
  266. UTF8::$called[__FUNCTION__] = TRUE;
  267. }
  268. return _substr_replace((string) $str, (string) $replacement, $offset, $length);
  269. }
  270. /**
  271. * Makes a UTF-8 string lowercase. This is a UTF8-aware version
  272. * of [strtolower](http://php.net/strtolower).
  273. *
  274. * $str = UTF8::strtolower($str);
  275. *
  276. * @author Andreas Gohr <andi@splitbrain.org>
  277. * @param string $str mixed case string
  278. * @return string
  279. * @uses UTF8::$server_utf8
  280. * @uses KO7::$charset
  281. */
  282. public static function strtolower($str)
  283. {
  284. if (UTF8::$server_utf8)
  285. return mb_strtolower((string) $str, KO7::$charset);
  286. if ( ! isset(UTF8::$called[__FUNCTION__]))
  287. {
  288. require KO7::find_file('utf8', __FUNCTION__);
  289. // Function has been called
  290. UTF8::$called[__FUNCTION__] = TRUE;
  291. }
  292. return _strtolower((string) $str);
  293. }
  294. /**
  295. * Makes a UTF-8 string uppercase. This is a UTF8-aware version
  296. * of [strtoupper](http://php.net/strtoupper).
  297. *
  298. * @author Andreas Gohr <andi@splitbrain.org>
  299. * @param string $str mixed case string
  300. * @return string
  301. * @uses UTF8::$server_utf8
  302. * @uses KO7::$charset
  303. */
  304. public static function strtoupper($str)
  305. {
  306. if (UTF8::$server_utf8)
  307. return mb_strtoupper((string) $str, KO7::$charset);
  308. if ( ! isset(UTF8::$called[__FUNCTION__]))
  309. {
  310. require KO7::find_file('utf8', __FUNCTION__);
  311. // Function has been called
  312. UTF8::$called[__FUNCTION__] = TRUE;
  313. }
  314. return _strtoupper((string) $str);
  315. }
  316. /**
  317. * Makes a UTF-8 string's first character uppercase. This is a UTF8-aware
  318. * version of [ucfirst](http://php.net/ucfirst).
  319. *
  320. * $str = UTF8::ucfirst($str);
  321. *
  322. * @author Harry Fuecks <hfuecks@gmail.com>
  323. * @param string $str mixed case string
  324. * @return string
  325. */
  326. public static function ucfirst($str)
  327. {
  328. if ( ! isset(UTF8::$called[__FUNCTION__]))
  329. {
  330. require KO7::find_file('utf8', __FUNCTION__);
  331. // Function has been called
  332. UTF8::$called[__FUNCTION__] = TRUE;
  333. }
  334. return _ucfirst((string) $str);
  335. }
  336. /**
  337. * Makes the first character of every word in a UTF-8 string uppercase.
  338. * This is a UTF8-aware version of [ucwords](http://php.net/ucwords).
  339. *
  340. * $str = UTF8::ucwords($str);
  341. *
  342. * @author Harry Fuecks <hfuecks@gmail.com>
  343. * @param string $str mixed case string
  344. * @return string
  345. */
  346. public static function ucwords($str)
  347. {
  348. if ( ! isset(UTF8::$called[__FUNCTION__]))
  349. {
  350. require KO7::find_file('utf8', __FUNCTION__);
  351. // Function has been called
  352. UTF8::$called[__FUNCTION__] = TRUE;
  353. }
  354. return _ucwords((string) $str);
  355. }
  356. /**
  357. * Case-insensitive UTF-8 string comparison. This is a UTF8-aware version
  358. * of [strcasecmp](http://php.net/strcasecmp).
  359. *
  360. * $compare = UTF8::strcasecmp($str1, $str2);
  361. *
  362. * @author Harry Fuecks <hfuecks@gmail.com>
  363. * @param string $str1 string to compare
  364. * @param string $str2 string to compare
  365. * @return int less than 0 if str1 is less than str2
  366. * @return int greater than 0 if str1 is greater than str2
  367. * @return int 0 if they are equal
  368. */
  369. public static function strcasecmp($str1, $str2)
  370. {
  371. if ( ! isset(UTF8::$called[__FUNCTION__]))
  372. {
  373. require KO7::find_file('utf8', __FUNCTION__);
  374. // Function has been called
  375. UTF8::$called[__FUNCTION__] = TRUE;
  376. }
  377. return _strcasecmp((string) $str1, (string) $str2);
  378. }
  379. /**
  380. * Returns a string or an array with all occurrences of search in subject
  381. * (ignoring case) and replaced with the given replace value. This is a
  382. * UTF8-aware version of [str_ireplace](http://php.net/str_ireplace).
  383. *
  384. * [!!] This function is very slow compared to the native version. Avoid
  385. * using it when possible.
  386. *
  387. * @author Harry Fuecks <hfuecks@gmail.com
  388. * @param string|array $search text to replace
  389. * @param string|array $replace replacement text
  390. * @param string|array $str subject text
  391. * @param int $count number of matched and replaced needles will be returned via this parameter which is passed by reference
  392. * @return string if the input was a string
  393. * @return array if the input was an array
  394. */
  395. public static function str_ireplace($search, $replace, $str, int & $count = 0)
  396. {
  397. if ( ! isset(UTF8::$called[__FUNCTION__]))
  398. {
  399. require KO7::find_file('utf8', __FUNCTION__);
  400. // Function has been called
  401. UTF8::$called[__FUNCTION__] = TRUE;
  402. }
  403. return _str_ireplace($search, $replace, $str, $count);
  404. }
  405. /**
  406. * Case-insensitive UTF-8 version of strstr. Returns all of input string
  407. * from the first occurrence of needle to the end. This is a UTF8-aware
  408. * version of [stristr](http://php.net/stristr).
  409. *
  410. * $found = UTF8::stristr($str, $search);
  411. *
  412. * @author Harry Fuecks <hfuecks@gmail.com>
  413. * @param string $str input string
  414. * @param string $search needle
  415. * @return string matched substring if found
  416. * @return FALSE if the substring was not found
  417. */
  418. public static function stristr($str, $search)
  419. {
  420. if ( ! isset(UTF8::$called[__FUNCTION__]))
  421. {
  422. require KO7::find_file('utf8', __FUNCTION__);
  423. // Function has been called
  424. UTF8::$called[__FUNCTION__] = TRUE;
  425. }
  426. return _stristr((string) $str, (string) $search);
  427. }
  428. /**
  429. * Finds the length of the initial segment matching mask. This is a
  430. * UTF8-aware version of [strspn](http://php.net/strspn).
  431. *
  432. * $found = UTF8::strspn($str, $mask);
  433. *
  434. * @author Harry Fuecks <hfuecks@gmail.com>
  435. * @param string $str input string
  436. * @param string $mask mask for search
  437. * @param int|null $offset start position of the string to examine
  438. * @param int|null $length length of the string to examine
  439. * @return int length of the initial segment that contains characters in the mask
  440. */
  441. public static function strspn($str, $mask, ?int $offset = NULL, ?int $length = NULL)
  442. {
  443. if ( ! isset(UTF8::$called[__FUNCTION__]))
  444. {
  445. require KO7::find_file('utf8', __FUNCTION__);
  446. // Function has been called
  447. UTF8::$called[__FUNCTION__] = TRUE;
  448. }
  449. return _strspn((string) $str, (string) $mask, $offset, $length);
  450. }
  451. /**
  452. * Finds the length of the initial segment not matching mask. This is a
  453. * UTF8-aware version of [strcspn](http://php.net/strcspn).
  454. *
  455. * $found = UTF8::strcspn($str, $mask);
  456. *
  457. * @author Harry Fuecks <hfuecks@gmail.com>
  458. * @param string $str input string
  459. * @param string $mask mask for search
  460. * @param int|null $offset start position of the string to examine
  461. * @param int|null $length length of the string to examine
  462. * @return int length of the initial segment that contains characters not in the mask
  463. */
  464. public static function strcspn($str, $mask, ?int $offset = NULL, ?int $length = NULL)
  465. {
  466. if ( ! isset(UTF8::$called[__FUNCTION__]))
  467. {
  468. require KO7::find_file('utf8', __FUNCTION__);
  469. // Function has been called
  470. UTF8::$called[__FUNCTION__] = TRUE;
  471. }
  472. return _strcspn((string) $str, (string) $mask, $offset, $length);
  473. }
  474. /**
  475. * Pads a UTF-8 string to a certain length with another string. This is a
  476. * UTF8-aware version of [str_pad](http://php.net/str_pad).
  477. *
  478. * $str = UTF8::str_pad($str, $length);
  479. *
  480. * @author Harry Fuecks <hfuecks@gmail.com>
  481. * @param string $str input string
  482. * @param int $final_str_length desired string length after padding
  483. * @param string $pad_str string to use as padding
  484. * @param int $pad_type padding type: STR_PAD_RIGHT, STR_PAD_LEFT, or STR_PAD_BOTH
  485. * @return string
  486. */
  487. public static function str_pad($str, int $final_str_length, string $pad_str = ' ', int $pad_type = STR_PAD_RIGHT)
  488. {
  489. if ( ! isset(UTF8::$called[__FUNCTION__]))
  490. {
  491. require KO7::find_file('utf8', __FUNCTION__);
  492. // Function has been called
  493. UTF8::$called[__FUNCTION__] = TRUE;
  494. }
  495. return _str_pad((string) $str, $final_str_length, $pad_str, $pad_type);
  496. }
  497. /**
  498. * Converts a UTF-8 string to an array. This is a UTF8-aware version of
  499. * [str_split](http://php.net/str_split).
  500. *
  501. * $array = UTF8::str_split($str);
  502. *
  503. * @author Harry Fuecks <hfuecks@gmail.com>
  504. * @param string $str input string
  505. * @param int $split_length maximum length of each chunk
  506. * @return array
  507. */
  508. public static function str_split($str, int $split_length = 1)
  509. {
  510. if ( ! isset(UTF8::$called[__FUNCTION__]))
  511. {
  512. require KO7::find_file('utf8', __FUNCTION__);
  513. // Function has been called
  514. UTF8::$called[__FUNCTION__] = TRUE;
  515. }
  516. return _str_split((string) $str, $split_length);
  517. }
  518. /**
  519. * Reverses a UTF-8 string. This is a UTF8-aware version of [strrev](http://php.net/strrev).
  520. *
  521. * $str = UTF8::strrev($str);
  522. *
  523. * @author Harry Fuecks <hfuecks@gmail.com>
  524. * @param string $str string to be reversed
  525. * @return string
  526. */
  527. public static function strrev($str)
  528. {
  529. if ( ! isset(UTF8::$called[__FUNCTION__]))
  530. {
  531. require KO7::find_file('utf8', __FUNCTION__);
  532. // Function has been called
  533. UTF8::$called[__FUNCTION__] = TRUE;
  534. }
  535. return _strrev((string) $str);
  536. }
  537. /**
  538. * Strips whitespace (or other UTF-8 characters) from the beginning and
  539. * end of a string. This is a UTF8-aware version of [trim](http://php.net/trim).
  540. *
  541. * $str = UTF8::trim($str);
  542. *
  543. * @author Andreas Gohr <andi@splitbrain.org>
  544. * @param string $str input string
  545. * @param string $charlist string of characters to remove
  546. * @return string
  547. */
  548. public static function trim($str, ?string $charlist = NULL)
  549. {
  550. if ( ! isset(UTF8::$called[__FUNCTION__]))
  551. {
  552. require KO7::find_file('utf8', __FUNCTION__);
  553. // Function has been called
  554. UTF8::$called[__FUNCTION__] = TRUE;
  555. }
  556. return _trim((string) $str, $charlist);
  557. }
  558. /**
  559. * Strips whitespace (or other UTF-8 characters) from the beginning of
  560. * a string. This is a UTF8-aware version of [ltrim](http://php.net/ltrim).
  561. *
  562. * $str = UTF8::ltrim($str);
  563. *
  564. * @author Andreas Gohr <andi@splitbrain.org>
  565. * @param string $str input string
  566. * @param string $charlist string of characters to remove
  567. * @return string
  568. */
  569. public static function ltrim($str, ?string $charlist = NULL)
  570. {
  571. if ( ! isset(UTF8::$called[__FUNCTION__]))
  572. {
  573. require KO7::find_file('utf8', __FUNCTION__);
  574. // Function has been called
  575. UTF8::$called[__FUNCTION__] = TRUE;
  576. }
  577. return _ltrim((string) $str, $charlist);
  578. }
  579. /**
  580. * Strips whitespace (or other UTF-8 characters) from the end of a string.
  581. * This is a UTF8-aware version of [rtrim](http://php.net/rtrim).
  582. *
  583. * $str = UTF8::rtrim($str);
  584. *
  585. * @author Andreas Gohr <andi@splitbrain.org>
  586. * @param string $str input string
  587. * @param string $charlist string of characters to remove
  588. * @return string
  589. */
  590. public static function rtrim($str, ?string $charlist = NULL)
  591. {
  592. if ( ! isset(UTF8::$called[__FUNCTION__]))
  593. {
  594. require KO7::find_file('utf8', __FUNCTION__);
  595. // Function has been called
  596. UTF8::$called[__FUNCTION__] = TRUE;
  597. }
  598. return _rtrim((string) $str, $charlist);
  599. }
  600. /**
  601. * Returns the unicode ordinal for a character. This is a UTF8-aware
  602. * version of [ord](http://php.net/ord).
  603. *
  604. * $digit = UTF8::ord($character);
  605. *
  606. * @author Harry Fuecks <hfuecks@gmail.com>
  607. * @param string $chr UTF-8 encoded character
  608. * @return integer
  609. */
  610. public static function ord($chr)
  611. {
  612. if ( ! isset(UTF8::$called[__FUNCTION__]))
  613. {
  614. require KO7::find_file('utf8', __FUNCTION__);
  615. // Function has been called
  616. UTF8::$called[__FUNCTION__] = TRUE;
  617. }
  618. return _ord((string) $chr);
  619. }
  620. /**
  621. * Takes an UTF-8 string and returns an array of ints representing the Unicode characters.
  622. * Astral planes are supported i.e. the ints in the output can be > 0xFFFF.
  623. * Occurrences of the BOM are ignored. Surrogates are not allowed.
  624. *
  625. * $array = UTF8::to_unicode($str);
  626. *
  627. * The Original Code is Mozilla Communicator client code.
  628. * The Initial Developer of the Original Code is Netscape Communications Corporation.
  629. * Portions created by the Initial Developer are Copyright (C) 1998 the Initial Developer.
  630. * Ported to PHP by Henri Sivonen <hsivonen@iki.fi>, see <http://hsivonen.iki.fi/php-utf8/>
  631. * Slight modifications to fit with phputf8 library by Harry Fuecks <hfuecks@gmail.com>
  632. *
  633. * @param string $str UTF-8 encoded string
  634. * @return array unicode code points
  635. * @return FALSE if the string is invalid
  636. */
  637. public static function to_unicode($str)
  638. {
  639. if ( ! isset(UTF8::$called[__FUNCTION__]))
  640. {
  641. require KO7::find_file('utf8', __FUNCTION__);
  642. // Function has been called
  643. UTF8::$called[__FUNCTION__] = TRUE;
  644. }
  645. return _to_unicode((string) $str);
  646. }
  647. /**
  648. * Takes an array of ints representing the Unicode characters and returns a UTF-8 string.
  649. * Astral planes are supported i.e. the ints in the input can be > 0xFFFF.
  650. * Occurrences of the BOM are ignored. Surrogates are not allowed.
  651. *
  652. * $str = UTF8::to_unicode($array);
  653. *
  654. * The Original Code is Mozilla Communicator client code.
  655. * The Initial Developer of the Original Code is Netscape Communications Corporation.
  656. * Portions created by the Initial Developer are Copyright (C) 1998 the Initial Developer.
  657. * Ported to PHP by Henri Sivonen <hsivonen@iki.fi>, see http://hsivonen.iki.fi/php-utf8/
  658. * Slight modifications to fit with phputf8 library by Harry Fuecks <hfuecks@gmail.com>.
  659. *
  660. * @param array $arr unicode code points representing a string
  661. * @return string UTF-8 string of characters
  662. * @return bool FALSE if a code point cannot be found
  663. */
  664. public static function from_unicode($arr)
  665. {
  666. if ( ! isset(UTF8::$called[__FUNCTION__]))
  667. {
  668. require KO7::find_file('utf8', __FUNCTION__);
  669. // Function has been called
  670. UTF8::$called[__FUNCTION__] = TRUE;
  671. }
  672. return _from_unicode($arr);
  673. }
  674. }
  675. if (KO7_UTF8::$server_utf8 === NULL)
  676. {
  677. // Determine if this server supports UTF-8 natively
  678. KO7_UTF8::$server_utf8 = extension_loaded('mbstring');
  679. }