UTF8Test.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651
  1. <?php
  2. /**
  3. * Tests _UTF8 class
  4. *
  5. * @group ko7
  6. * @group ko7.core
  7. * @group ko7.core.utf8
  8. *
  9. * @package KO7
  10. * @category Tests
  11. *
  12. * @copyright (c) 2007-2016 Kohana Team
  13. * @copyright (c) since 2016 Koseven Team
  14. * @license https://koseven.dev/LICENSE
  15. */
  16. class KO7_UTF8Test extends Unittest_TestCase
  17. {
  18. /**
  19. * Provides test data for test_clean()
  20. */
  21. public function provider_clean()
  22. {
  23. return [
  24. ["\0", ''],
  25. ["→foo\021", '→foo'],
  26. ["\x7Fbar", 'bar'],
  27. ["\xFF", ''],
  28. ["\x41", 'A'],
  29. [["→foo\021", "\x41"], ['→foo', 'A']],
  30. ];
  31. }
  32. /**
  33. * Tests UTF8::clean
  34. *
  35. * @test
  36. * @dataProvider provider_clean
  37. */
  38. public function test_clean($input, $expected)
  39. {
  40. $this->assertSame($expected, UTF8::clean($input));
  41. }
  42. /**
  43. * Provides test data for test_is_ascii()
  44. */
  45. public function provider_is_ascii()
  46. {
  47. return [
  48. ["\0", TRUE],
  49. ["\$eno\r", TRUE],
  50. ['Señor', FALSE],
  51. [['Se', 'nor'], TRUE],
  52. [['Se', 'ñor'], FALSE],
  53. ];
  54. }
  55. /**
  56. * Tests UTF8::is_ascii
  57. *
  58. * @test
  59. * @dataProvider provider_is_ascii
  60. */
  61. public function test_is_ascii($input, $expected)
  62. {
  63. $this->assertSame($expected, UTF8::is_ascii($input));
  64. }
  65. /**
  66. * Provides test data for test_strip_ascii_ctrl()
  67. */
  68. public function provider_strip_ascii_ctrl()
  69. {
  70. return [
  71. ["\0", ''],
  72. ["→foo\021", '→foo'],
  73. ["\x7Fbar", 'bar'],
  74. ["\xFF", "\xFF"],
  75. ["\x41", 'A'],
  76. ];
  77. }
  78. /**
  79. * Tests UTF8::strip_ascii_ctrl
  80. *
  81. * @test
  82. * @dataProvider provider_strip_ascii_ctrl
  83. */
  84. public function test_strip_ascii_ctrl($input, $expected)
  85. {
  86. $this->assertSame($expected, UTF8::strip_ascii_ctrl($input));
  87. }
  88. /**
  89. * Provides test data for test_strip_non_ascii()
  90. */
  91. public function provider_strip_non_ascii()
  92. {
  93. return [
  94. ["\0\021\x7F", "\0\021\x7F"],
  95. ['I ♥ cocoñùт', 'I coco'],
  96. ];
  97. }
  98. /**
  99. * Tests UTF8::strip_non_ascii
  100. *
  101. * @test
  102. * @dataProvider provider_strip_non_ascii
  103. */
  104. public function test_strip_non_ascii($input, $expected)
  105. {
  106. $this->assertSame($expected, UTF8::strip_non_ascii($input));
  107. }
  108. /**
  109. * Provides test data for test_transliterate_to_ascii()
  110. */
  111. public function provider_transliterate_to_ascii()
  112. {
  113. return [
  114. ['Cocoñùт', -1, 'Coconuт'],
  115. ['COCOÑÙТ', -1, 'COCOÑÙТ'],
  116. ['Cocoñùт', 0, 'Coconuт'],
  117. ['COCOÑÙТ', 0, 'COCONUТ'],
  118. ['Cocoñùт', 1, 'Cocoñùт'],
  119. ['COCOÑÙТ', 1, 'COCONUТ'],
  120. ];
  121. }
  122. /**
  123. * Tests UTF8::transliterate_to_ascii
  124. *
  125. * @test
  126. * @dataProvider provider_transliterate_to_ascii
  127. */
  128. public function test_transliterate_to_ascii($input, $case, $expected)
  129. {
  130. $this->assertSame($expected, UTF8::transliterate_to_ascii($input, $case));
  131. }
  132. /**
  133. * Provides test data for test_strlen()
  134. */
  135. public function provider_strlen()
  136. {
  137. return [
  138. ['Cocoñùт', 7],
  139. ['Coconut', 7],
  140. ];
  141. }
  142. /**
  143. * Tests UTF8::strlen
  144. *
  145. * @test
  146. * @dataProvider provider_strlen
  147. */
  148. public function test_strlen($input, $expected)
  149. {
  150. $this->assertSame($expected, UTF8::strlen($input));
  151. }
  152. /**
  153. * Provides test data for test_strpos()
  154. */
  155. public function provider_strpos()
  156. {
  157. return [
  158. ['Cocoñùт', 'o', 0, 1],
  159. ['Cocoñùт', 'ñ', 1, 4],
  160. ];
  161. }
  162. /**
  163. * Tests UTF8::strpos
  164. *
  165. * @test
  166. * @dataProvider provider_strpos
  167. */
  168. public function test_strpos($input, $str, $offset, $expected)
  169. {
  170. $this->assertSame($expected, UTF8::strpos($input, $str, $offset));
  171. }
  172. /**
  173. * Provides test data for test_strrpos()
  174. */
  175. public function provider_strrpos()
  176. {
  177. return [
  178. ['Cocoñùт', 'o', 0, 3],
  179. ['Cocoñùт', 'ñ', 2, 4],
  180. ];
  181. }
  182. /**
  183. * Tests UTF8::strrpos
  184. *
  185. * @test
  186. * @dataProvider provider_strrpos
  187. */
  188. public function test_strrpos($input, $str, $offset, $expected)
  189. {
  190. $this->assertSame($expected, UTF8::strrpos($input, $str, $offset));
  191. }
  192. /**
  193. * Provides test data for test_substr()
  194. */
  195. public function provider_substr()
  196. {
  197. return [
  198. ['Cocoñùт', 3, 2, 'oñ'],
  199. ['Cocoñùт', 3, 9, 'oñùт'],
  200. ['Cocoñùт', 3, NULL, 'oñùт'],
  201. ['Cocoñùт', 3, -2, 'oñ'],
  202. ];
  203. }
  204. /**
  205. * Tests UTF8::substr
  206. *
  207. * @test
  208. * @dataProvider provider_substr
  209. */
  210. public function test_substr($input, $offset, $length, $expected)
  211. {
  212. $this->assertSame($expected, UTF8::substr($input, $offset, $length));
  213. }
  214. /**
  215. * Provides test data for test_substr_replace()
  216. */
  217. public function provider_substr_replace()
  218. {
  219. return [
  220. ['Cocoñùт', 'šš', 3, 2, 'Cocššùт'],
  221. ['Cocoñùт', 'šš', 3, 9, 'Cocšš'],
  222. ];
  223. }
  224. /**
  225. * Tests UTF8::substr_replace
  226. *
  227. * @test
  228. * @dataProvider provider_substr_replace
  229. */
  230. public function test_substr_replace($input, $replacement, $offset, $length, $expected)
  231. {
  232. $this->assertSame($expected, UTF8::substr_replace($input, $replacement, $offset, $length));
  233. }
  234. /**
  235. * Provides test data for test_strtolower()
  236. */
  237. public function provider_strtolower()
  238. {
  239. return [
  240. ['COCOÑÙТ', 'cocoñùт'],
  241. ['JÄGER', 'jäger'],
  242. ];
  243. }
  244. /**
  245. * Tests UTF8::strtolower
  246. *
  247. * @test
  248. * @dataProvider provider_strtolower
  249. */
  250. public function test_strtolower($input, $expected)
  251. {
  252. $this->assertSame($expected, UTF8::strtolower($input));
  253. }
  254. /**
  255. * Provides test data for test_strtoupper()
  256. */
  257. public function provider_strtoupper()
  258. {
  259. return [
  260. ['Cocoñùт', 'COCOÑÙТ'],
  261. ['jäger', 'JÄGER'],
  262. ];
  263. }
  264. /**
  265. * Tests UTF8::strtoupper
  266. *
  267. * @test
  268. * @dataProvider provider_strtoupper
  269. */
  270. public function test_strtoupper($input, $expected)
  271. {
  272. $this->assertSame($expected, UTF8::strtoupper($input));
  273. }
  274. /**
  275. * Provides test data for test_ucfirst()
  276. */
  277. public function provider_ucfirst()
  278. {
  279. return [
  280. ['ñùт', 'Ñùт'],
  281. ];
  282. }
  283. /**
  284. * Tests UTF8::ucfirst
  285. *
  286. * @test
  287. * @dataProvider provider_ucfirst
  288. */
  289. public function test_ucfirst($input, $expected)
  290. {
  291. $this->assertSame($expected, UTF8::ucfirst($input));
  292. }
  293. /**
  294. * Provides test data for test_strip_non_ascii()
  295. */
  296. public function provider_ucwords()
  297. {
  298. return [
  299. ['ExAmple', 'ExAmple'],
  300. ['i ♥ Cocoñùт', 'I ♥ Cocoñùт'],
  301. ];
  302. }
  303. /**
  304. * Tests UTF8::ucwords
  305. *
  306. * @test
  307. * @dataProvider provider_ucwords
  308. */
  309. public function test_ucwords($input, $expected)
  310. {
  311. $this->assertSame($expected, UTF8::ucwords($input));
  312. }
  313. /**
  314. * Provides test data for test_strcasecmp()
  315. */
  316. public function provider_strcasecmp()
  317. {
  318. //8.2.0 This function now returns -1 or 1, where it previously returned a negative or positive number.
  319. if ( version_compare(PHP_VERSION, '8.2.0', '>=') ) {
  320. return [
  321. ['Cocoñùт', 'Cocoñùт', 0],
  322. ['Čau', 'Čauo', -1],
  323. ['Čau', 'Ča', 1],
  324. ['Cocoñùт', 'Cocoñ', 1],
  325. ['Cocoñùт', 'Coco', 1],
  326. ];
  327. } else {
  328. return [
  329. ['Cocoñùт', 'Cocoñùт', 0],
  330. ['Čau', 'Čauo', -1],
  331. ['Čau', 'Ča', 1],
  332. ['Cocoñùт', 'Cocoñ', 4],
  333. ['Cocoñùт', 'Coco', 6],
  334. ];
  335. }
  336. }
  337. /**
  338. * Tests UTF8::strcasecmp
  339. *
  340. * @test
  341. * @dataProvider provider_strcasecmp
  342. */
  343. public function test_strcasecmp($input, $input2, $expected)
  344. {
  345. $this->assertSame($expected, UTF8::strcasecmp($input, $input2));
  346. }
  347. /**
  348. * Provides test data for test_str_ireplace()
  349. */
  350. public function provider_str_ireplace()
  351. {
  352. return [
  353. ['т', 't', 'cocoñuт', 'cocoñut'],
  354. ['Ñ', 'N', 'cocoñuт', 'cocoNuт'],
  355. [['т', 'Ñ', 'k' => 'k'], ['t', 'N', 'K'], ['cocoñuт'], ['cocoNut']],
  356. [['ñ'], 'n', 'cocoñuт', 'coconuт'],
  357. ];
  358. }
  359. /**
  360. * Tests UTF8::str_ireplace
  361. *
  362. * @test
  363. * @dataProvider provider_str_ireplace
  364. */
  365. public function test_str_ireplace($search, $replace, $subject, $expected)
  366. {
  367. $this->assertSame($expected, UTF8::str_ireplace($search, $replace, $subject));
  368. }
  369. /**
  370. * Provides test data for test_stristr()
  371. */
  372. public function provider_stristr()
  373. {
  374. return [
  375. ['Cocoñùт', 'oñ', 'oñùт'],
  376. ['Cocoñùт', 'o', 'ocoñùт'],
  377. ['Cocoñùт', 'k', FALSE],
  378. ];
  379. }
  380. /**
  381. * Tests UTF8::stristr
  382. *
  383. * @test
  384. * @dataProvider provider_stristr
  385. */
  386. public function test_stristr($input, $input2, $expected)
  387. {
  388. $this->assertSame($expected, UTF8::stristr($input, $input2));
  389. }
  390. /**
  391. * Provides test data for test_strspn()
  392. */
  393. public function provider_strspn()
  394. {
  395. return [
  396. ["foo", "o", 1, 2, 2],
  397. ['Cocoñùт', 'oñ', NULL, NULL, 1],
  398. ['Cocoñùт', 'oñ', 2, 4, 1],
  399. ['Cocoñùт', 'šš', 3, 9, 4],
  400. ];
  401. }
  402. /**
  403. * Tests UTF8::strspn
  404. *
  405. * @test
  406. * @dataProvider provider_strspn
  407. */
  408. public function test_strspn($input, $mask, $offset, $length, $expected)
  409. {
  410. $this->assertSame($expected, UTF8::strspn($input, $mask, $offset, $length));
  411. }
  412. /**
  413. * Provides test data for test_strcspn()
  414. */
  415. public function provider_strcspn()
  416. {
  417. return [
  418. ['Cocoñùт', 'oñ', NULL, NULL, 1],
  419. ['Cocoñùт', 'oñ', 2, 4, 1],
  420. ['Cocoñùт', 'šš', 3, 9, 4],
  421. ];
  422. }
  423. /**
  424. * Tests UTF8::strcspn
  425. *
  426. * @test
  427. * @dataProvider provider_strcspn
  428. */
  429. public function test_strcspn($input, $mask, $offset, $length, $expected)
  430. {
  431. $this->assertSame($expected, UTF8::strcspn($input, $mask, $offset, $length));
  432. }
  433. /**
  434. * Provides test data for test_str_pad()
  435. */
  436. public function provider_str_pad()
  437. {
  438. return [
  439. ['Cocoñùт', 10, 'š', STR_PAD_RIGHT, 'Cocoñùтššš'],
  440. ['Cocoñùт', 10, 'š', STR_PAD_LEFT, 'šššCocoñùт'],
  441. ['Cocoñùт', 10, 'š', STR_PAD_BOTH, 'šCocoñùтšš'],
  442. ];
  443. }
  444. /**
  445. * Tests UTF8::str_pad
  446. *
  447. * @test
  448. * @dataProvider provider_str_pad
  449. */
  450. public function test_str_pad($input, $length, $pad, $type, $expected)
  451. {
  452. $this->assertSame($expected, UTF8::str_pad($input, $length, $pad, $type));
  453. }
  454. /**
  455. * Tests UTF8::str_pad error
  456. * @test
  457. * @throws ValueError
  458. * @throws UTF8_Exception
  459. */
  460. public function test_str_pad_error()
  461. {
  462. {
  463. if ( version_compare(PHP_VERSION, '8.3.0', '>=') ) {
  464. $this->expectException(ValueError::class);
  465. } else {
  466. $this->expectException(UTF8_Exception::class);
  467. }
  468. UTF8::str_pad('Cocoñùт', 10, 'š', 15, 'šCocoñùтšš');
  469. }
  470. }
  471. /**
  472. * Provides test data for test_str_split()
  473. */
  474. public function provider_str_split()
  475. {
  476. return [
  477. ['Bár', 1, ['B', 'á', 'r']],
  478. ['Cocoñùт', 2, ['Co', 'co', 'ñù', 'т']],
  479. ['Cocoñùт', 3, ['Coc', 'oñù', 'т']],
  480. ];
  481. }
  482. /**
  483. * Tests UTF8::str_split
  484. *
  485. * @test
  486. * @dataProvider provider_str_split
  487. */
  488. public function test_str_split($input, $split_length, $expected)
  489. {
  490. $this->assertSame($expected, UTF8::str_split($input, $split_length));
  491. }
  492. /**
  493. * Provides test data for test_strrev()
  494. */
  495. public function provider_strrev()
  496. {
  497. return [
  498. ['Cocoñùт', 'тùñocoC'],
  499. ];
  500. }
  501. /**
  502. * Tests UTF8::strrev
  503. *
  504. * @test
  505. * @dataProvider provider_strrev
  506. */
  507. public function test_strrev($input, $expected)
  508. {
  509. $this->assertSame($expected, UTF8::strrev($input));
  510. }
  511. /**
  512. * Provides test data for test_trim()
  513. */
  514. public function provider_trim()
  515. {
  516. return [
  517. [' bar ', NULL, 'bar'],
  518. ['bar', 'b', 'ar'],
  519. ['barb', 'b', 'ar'],
  520. ];
  521. }
  522. /**
  523. * Tests UTF8::trim
  524. *
  525. * @test
  526. * @dataProvider provider_trim
  527. */
  528. public function test_trim($input, $input2, $expected)
  529. {
  530. $this->assertSame($expected, UTF8::trim($input, $input2));
  531. }
  532. /**
  533. * Provides test data for test_ltrim()
  534. */
  535. public function provider_ltrim()
  536. {
  537. return [
  538. [' bar ', NULL, 'bar '],
  539. ['bar', 'b', 'ar'],
  540. ['barb', 'b', 'arb'],
  541. ['ñùт', 'ñ', 'ùт'],
  542. ];
  543. }
  544. /**
  545. * Tests UTF8::ltrim
  546. *
  547. * @test
  548. * @dataProvider provider_ltrim
  549. */
  550. public function test_ltrim($input, $charlist, $expected)
  551. {
  552. $this->assertSame($expected, UTF8::ltrim($input, $charlist));
  553. }
  554. /**
  555. * Provides test data for test_rtrim()
  556. */
  557. public function provider_rtrim()
  558. {
  559. return [
  560. [' bar ', NULL, ' bar'],
  561. ['bar', 'b', 'bar'],
  562. ['barb', 'b', 'bar'],
  563. ['Cocoñùт', 'т', 'Cocoñù'],
  564. ];
  565. }
  566. /**
  567. * Tests UTF8::rtrim
  568. *
  569. * @test
  570. * @dataProvider provider_rtrim
  571. */
  572. public function test_rtrim($input, $input2, $expected)
  573. {
  574. $this->assertSame($expected, UTF8::rtrim($input, $input2));
  575. }
  576. /**
  577. * Provides test data for test_ord()
  578. */
  579. public function provider_ord()
  580. {
  581. return [
  582. ['f', 102],
  583. ['ñ', 241],
  584. ['Ñ', 209],
  585. ];
  586. }
  587. /**
  588. * Tests UTF8::ord
  589. *
  590. * @test
  591. * @dataProvider provider_ord
  592. */
  593. public function test_ord($input, $expected)
  594. {
  595. $this->assertSame($expected, UTF8::ord($input));
  596. }
  597. }