UTF8Test.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630
  1. <?php
  2. /**
  3. * Tests _UTF8 class
  4. *
  5. * @group ko7
  6. * @group ko7.core
  7. * @group ko7.core.utf8
  8. *
  9. * @package KO7
  10. * @category Tests
  11. *
  12. * @copyright (c) 2007-2016 Kohana Team
  13. * @copyright (c) since 2016 Koseven Team
  14. * @license https://koseven.dev/LICENSE
  15. */
  16. class KO7_UTF8Test extends Unittest_TestCase
  17. {
  18. /**
  19. * Provides test data for test_clean()
  20. */
  21. public function provider_clean()
  22. {
  23. return [
  24. ["\0", ''],
  25. ["→foo\021", '→foo'],
  26. ["\x7Fbar", 'bar'],
  27. ["\xFF", ''],
  28. ["\x41", 'A'],
  29. [["→foo\021", "\x41"], ['→foo', 'A']],
  30. ];
  31. }
  32. /**
  33. * Tests UTF8::clean
  34. *
  35. * @test
  36. * @dataProvider provider_clean
  37. */
  38. public function test_clean($input, $expected)
  39. {
  40. $this->assertSame($expected, UTF8::clean($input));
  41. }
  42. /**
  43. * Provides test data for test_is_ascii()
  44. */
  45. public function provider_is_ascii()
  46. {
  47. return [
  48. ["\0", TRUE],
  49. ["\$eno\r", TRUE],
  50. ['Señor', FALSE],
  51. [['Se', 'nor'], TRUE],
  52. [['Se', 'ñor'], FALSE],
  53. ];
  54. }
  55. /**
  56. * Tests UTF8::is_ascii
  57. *
  58. * @test
  59. * @dataProvider provider_is_ascii
  60. */
  61. public function test_is_ascii($input, $expected)
  62. {
  63. $this->assertSame($expected, UTF8::is_ascii($input));
  64. }
  65. /**
  66. * Provides test data for test_strip_ascii_ctrl()
  67. */
  68. public function provider_strip_ascii_ctrl()
  69. {
  70. return [
  71. ["\0", ''],
  72. ["→foo\021", '→foo'],
  73. ["\x7Fbar", 'bar'],
  74. ["\xFF", "\xFF"],
  75. ["\x41", 'A'],
  76. ];
  77. }
  78. /**
  79. * Tests UTF8::strip_ascii_ctrl
  80. *
  81. * @test
  82. * @dataProvider provider_strip_ascii_ctrl
  83. */
  84. public function test_strip_ascii_ctrl($input, $expected)
  85. {
  86. $this->assertSame($expected, UTF8::strip_ascii_ctrl($input));
  87. }
  88. /**
  89. * Provides test data for test_strip_non_ascii()
  90. */
  91. public function provider_strip_non_ascii()
  92. {
  93. return [
  94. ["\0\021\x7F", "\0\021\x7F"],
  95. ['I ♥ cocoñùт', 'I coco'],
  96. ];
  97. }
  98. /**
  99. * Tests UTF8::strip_non_ascii
  100. *
  101. * @test
  102. * @dataProvider provider_strip_non_ascii
  103. */
  104. public function test_strip_non_ascii($input, $expected)
  105. {
  106. $this->assertSame($expected, UTF8::strip_non_ascii($input));
  107. }
  108. /**
  109. * Provides test data for test_transliterate_to_ascii()
  110. */
  111. public function provider_transliterate_to_ascii()
  112. {
  113. return [
  114. ['Cocoñùт', -1, 'Coconuт'],
  115. ['COCOÑÙТ', -1, 'COCOÑÙТ'],
  116. ['Cocoñùт', 0, 'Coconuт'],
  117. ['COCOÑÙТ', 0, 'COCONUТ'],
  118. ['Cocoñùт', 1, 'Cocoñùт'],
  119. ['COCOÑÙТ', 1, 'COCONUТ'],
  120. ];
  121. }
  122. /**
  123. * Tests UTF8::transliterate_to_ascii
  124. *
  125. * @test
  126. * @dataProvider provider_transliterate_to_ascii
  127. */
  128. public function test_transliterate_to_ascii($input, $case, $expected)
  129. {
  130. $this->assertSame($expected, UTF8::transliterate_to_ascii($input, $case));
  131. }
  132. /**
  133. * Provides test data for test_strlen()
  134. */
  135. public function provider_strlen()
  136. {
  137. return [
  138. ['Cocoñùт', 7],
  139. ['Coconut', 7],
  140. ];
  141. }
  142. /**
  143. * Tests UTF8::strlen
  144. *
  145. * @test
  146. * @dataProvider provider_strlen
  147. */
  148. public function test_strlen($input, $expected)
  149. {
  150. $this->assertSame($expected, UTF8::strlen($input));
  151. }
  152. /**
  153. * Provides test data for test_strpos()
  154. */
  155. public function provider_strpos()
  156. {
  157. return [
  158. ['Cocoñùт', 'o', 0, 1],
  159. ['Cocoñùт', 'ñ', 1, 4],
  160. ];
  161. }
  162. /**
  163. * Tests UTF8::strpos
  164. *
  165. * @test
  166. * @dataProvider provider_strpos
  167. */
  168. public function test_strpos($input, $str, $offset, $expected)
  169. {
  170. $this->assertSame($expected, UTF8::strpos($input, $str, $offset));
  171. }
  172. /**
  173. * Provides test data for test_strrpos()
  174. */
  175. public function provider_strrpos()
  176. {
  177. return [
  178. ['Cocoñùт', 'o', 0, 3],
  179. ['Cocoñùт', 'ñ', 2, 4],
  180. ];
  181. }
  182. /**
  183. * Tests UTF8::strrpos
  184. *
  185. * @test
  186. * @dataProvider provider_strrpos
  187. */
  188. public function test_strrpos($input, $str, $offset, $expected)
  189. {
  190. $this->assertSame($expected, UTF8::strrpos($input, $str, $offset));
  191. }
  192. /**
  193. * Provides test data for test_substr()
  194. */
  195. public function provider_substr()
  196. {
  197. return [
  198. ['Cocoñùт', 3, 2, 'oñ'],
  199. ['Cocoñùт', 3, 9, 'oñùт'],
  200. ['Cocoñùт', 3, NULL, 'oñùт'],
  201. ['Cocoñùт', 3, -2, 'oñ'],
  202. ];
  203. }
  204. /**
  205. * Tests UTF8::substr
  206. *
  207. * @test
  208. * @dataProvider provider_substr
  209. */
  210. public function test_substr($input, $offset, $length, $expected)
  211. {
  212. $this->assertSame($expected, UTF8::substr($input, $offset, $length));
  213. }
  214. /**
  215. * Provides test data for test_substr_replace()
  216. */
  217. public function provider_substr_replace()
  218. {
  219. return [
  220. ['Cocoñùт', 'šš', 3, 2, 'Cocššùт'],
  221. ['Cocoñùт', 'šš', 3, 9, 'Cocšš'],
  222. ];
  223. }
  224. /**
  225. * Tests UTF8::substr_replace
  226. *
  227. * @test
  228. * @dataProvider provider_substr_replace
  229. */
  230. public function test_substr_replace($input, $replacement, $offset, $length, $expected)
  231. {
  232. $this->assertSame($expected, UTF8::substr_replace($input, $replacement, $offset, $length));
  233. }
  234. /**
  235. * Provides test data for test_strtolower()
  236. */
  237. public function provider_strtolower()
  238. {
  239. return [
  240. ['COCOÑÙТ', 'cocoñùт'],
  241. ['JÄGER', 'jäger'],
  242. ];
  243. }
  244. /**
  245. * Tests UTF8::strtolower
  246. *
  247. * @test
  248. * @dataProvider provider_strtolower
  249. */
  250. public function test_strtolower($input, $expected)
  251. {
  252. $this->assertSame($expected, UTF8::strtolower($input));
  253. }
  254. /**
  255. * Provides test data for test_strtoupper()
  256. */
  257. public function provider_strtoupper()
  258. {
  259. return [
  260. ['Cocoñùт', 'COCOÑÙТ'],
  261. ['jäger', 'JÄGER'],
  262. ];
  263. }
  264. /**
  265. * Tests UTF8::strtoupper
  266. *
  267. * @test
  268. * @dataProvider provider_strtoupper
  269. */
  270. public function test_strtoupper($input, $expected)
  271. {
  272. $this->assertSame($expected, UTF8::strtoupper($input));
  273. }
  274. /**
  275. * Provides test data for test_ucfirst()
  276. */
  277. public function provider_ucfirst()
  278. {
  279. return [
  280. ['ñùт', 'Ñùт'],
  281. ];
  282. }
  283. /**
  284. * Tests UTF8::ucfirst
  285. *
  286. * @test
  287. * @dataProvider provider_ucfirst
  288. */
  289. public function test_ucfirst($input, $expected)
  290. {
  291. $this->assertSame($expected, UTF8::ucfirst($input));
  292. }
  293. /**
  294. * Provides test data for test_strip_non_ascii()
  295. */
  296. public function provider_ucwords()
  297. {
  298. return [
  299. ['ExAmple', 'ExAmple'],
  300. ['i ♥ Cocoñùт', 'I ♥ Cocoñùт'],
  301. ];
  302. }
  303. /**
  304. * Tests UTF8::ucwords
  305. *
  306. * @test
  307. * @dataProvider provider_ucwords
  308. */
  309. public function test_ucwords($input, $expected)
  310. {
  311. $this->assertSame($expected, UTF8::ucwords($input));
  312. }
  313. /**
  314. * Provides test data for test_strcasecmp()
  315. */
  316. public function provider_strcasecmp()
  317. {
  318. return [
  319. ['Cocoñùт', 'Cocoñùт', 0],
  320. ['Čau', 'Čauo', -1],
  321. ['Čau', 'Ča', 1],
  322. ['Cocoñùт', 'Cocoñ', 4],
  323. ['Cocoñùт', 'Coco', 6],
  324. ];
  325. }
  326. /**
  327. * Tests UTF8::strcasecmp
  328. *
  329. * @test
  330. * @dataProvider provider_strcasecmp
  331. */
  332. public function test_strcasecmp($input, $input2, $expected)
  333. {
  334. $this->assertSame($expected, UTF8::strcasecmp($input, $input2));
  335. }
  336. /**
  337. * Provides test data for test_str_ireplace()
  338. */
  339. public function provider_str_ireplace()
  340. {
  341. return [
  342. ['т', 't', 'cocoñuт', 'cocoñut'],
  343. ['Ñ', 'N', 'cocoñuт', 'cocoNuт'],
  344. [['т', 'Ñ', 'k' => 'k'], ['t', 'N', 'K'], ['cocoñuт'], ['cocoNut']],
  345. [['ñ'], 'n', 'cocoñuт', 'coconuт'],
  346. ];
  347. }
  348. /**
  349. * Tests UTF8::str_ireplace
  350. *
  351. * @test
  352. * @dataProvider provider_str_ireplace
  353. */
  354. public function test_str_ireplace($search, $replace, $subject, $expected)
  355. {
  356. $this->assertSame($expected, UTF8::str_ireplace($search, $replace, $subject));
  357. }
  358. /**
  359. * Provides test data for test_stristr()
  360. */
  361. public function provider_stristr()
  362. {
  363. return [
  364. ['Cocoñùт', 'oñ', 'oñùт'],
  365. ['Cocoñùт', 'o', 'ocoñùт'],
  366. ['Cocoñùт', 'k', FALSE],
  367. ];
  368. }
  369. /**
  370. * Tests UTF8::stristr
  371. *
  372. * @test
  373. * @dataProvider provider_stristr
  374. */
  375. public function test_stristr($input, $input2, $expected)
  376. {
  377. $this->assertSame($expected, UTF8::stristr($input, $input2));
  378. }
  379. /**
  380. * Provides test data for test_strspn()
  381. */
  382. public function provider_strspn()
  383. {
  384. return [
  385. ["foo", "o", 1, 2, 2],
  386. ['Cocoñùт', 'oñ', NULL, NULL, 1],
  387. ['Cocoñùт', 'oñ', 2, 4, 1],
  388. ['Cocoñùт', 'šš', 3, 9, 4],
  389. ];
  390. }
  391. /**
  392. * Tests UTF8::strspn
  393. *
  394. * @test
  395. * @dataProvider provider_strspn
  396. */
  397. public function test_strspn($input, $mask, $offset, $length, $expected)
  398. {
  399. $this->assertSame($expected, UTF8::strspn($input, $mask, $offset, $length));
  400. }
  401. /**
  402. * Provides test data for test_strcspn()
  403. */
  404. public function provider_strcspn()
  405. {
  406. return [
  407. ['Cocoñùт', 'oñ', NULL, NULL, 1],
  408. ['Cocoñùт', 'oñ', 2, 4, 1],
  409. ['Cocoñùт', 'šš', 3, 9, 4],
  410. ];
  411. }
  412. /**
  413. * Tests UTF8::strcspn
  414. *
  415. * @test
  416. * @dataProvider provider_strcspn
  417. */
  418. public function test_strcspn($input, $mask, $offset, $length, $expected)
  419. {
  420. $this->assertSame($expected, UTF8::strcspn($input, $mask, $offset, $length));
  421. }
  422. /**
  423. * Provides test data for test_str_pad()
  424. */
  425. public function provider_str_pad()
  426. {
  427. return [
  428. ['Cocoñùт', 10, 'š', STR_PAD_RIGHT, 'Cocoñùтššš'],
  429. ['Cocoñùт', 10, 'š', STR_PAD_LEFT, 'šššCocoñùт'],
  430. ['Cocoñùт', 10, 'š', STR_PAD_BOTH, 'šCocoñùтšš'],
  431. ];
  432. }
  433. /**
  434. * Tests UTF8::str_pad
  435. *
  436. * @test
  437. * @dataProvider provider_str_pad
  438. */
  439. public function test_str_pad($input, $length, $pad, $type, $expected)
  440. {
  441. $this->assertSame($expected, UTF8::str_pad($input, $length, $pad, $type));
  442. }
  443. /**
  444. * Tests UTF8::str_pad error
  445. */
  446. public function test_str_pad_error()
  447. {
  448. $this->expectException(UTF8_Exception::class);
  449. UTF8::str_pad('Cocoñùт', 10, 'š', 15, 'šCocoñùтšš');
  450. }
  451. /**
  452. * Provides test data for test_str_split()
  453. */
  454. public function provider_str_split()
  455. {
  456. return [
  457. ['Bár', 1, ['B', 'á', 'r']],
  458. ['Cocoñùт', 2, ['Co', 'co', 'ñù', 'т']],
  459. ['Cocoñùт', 3, ['Coc', 'oñù', 'т']],
  460. ];
  461. }
  462. /**
  463. * Tests UTF8::str_split
  464. *
  465. * @test
  466. * @dataProvider provider_str_split
  467. */
  468. public function test_str_split($input, $split_length, $expected)
  469. {
  470. $this->assertSame($expected, UTF8::str_split($input, $split_length));
  471. }
  472. /**
  473. * Provides test data for test_strrev()
  474. */
  475. public function provider_strrev()
  476. {
  477. return [
  478. ['Cocoñùт', 'тùñocoC'],
  479. ];
  480. }
  481. /**
  482. * Tests UTF8::strrev
  483. *
  484. * @test
  485. * @dataProvider provider_strrev
  486. */
  487. public function test_strrev($input, $expected)
  488. {
  489. $this->assertSame($expected, UTF8::strrev($input));
  490. }
  491. /**
  492. * Provides test data for test_trim()
  493. */
  494. public function provider_trim()
  495. {
  496. return [
  497. [' bar ', NULL, 'bar'],
  498. ['bar', 'b', 'ar'],
  499. ['barb', 'b', 'ar'],
  500. ];
  501. }
  502. /**
  503. * Tests UTF8::trim
  504. *
  505. * @test
  506. * @dataProvider provider_trim
  507. */
  508. public function test_trim($input, $input2, $expected)
  509. {
  510. $this->assertSame($expected, UTF8::trim($input, $input2));
  511. }
  512. /**
  513. * Provides test data for test_ltrim()
  514. */
  515. public function provider_ltrim()
  516. {
  517. return [
  518. [' bar ', NULL, 'bar '],
  519. ['bar', 'b', 'ar'],
  520. ['barb', 'b', 'arb'],
  521. ['ñùт', 'ñ', 'ùт'],
  522. ];
  523. }
  524. /**
  525. * Tests UTF8::ltrim
  526. *
  527. * @test
  528. * @dataProvider provider_ltrim
  529. */
  530. public function test_ltrim($input, $charlist, $expected)
  531. {
  532. $this->assertSame($expected, UTF8::ltrim($input, $charlist));
  533. }
  534. /**
  535. * Provides test data for test_rtrim()
  536. */
  537. public function provider_rtrim()
  538. {
  539. return [
  540. [' bar ', NULL, ' bar'],
  541. ['bar', 'b', 'bar'],
  542. ['barb', 'b', 'bar'],
  543. ['Cocoñùт', 'т', 'Cocoñù'],
  544. ];
  545. }
  546. /**
  547. * Tests UTF8::rtrim
  548. *
  549. * @test
  550. * @dataProvider provider_rtrim
  551. */
  552. public function test_rtrim($input, $input2, $expected)
  553. {
  554. $this->assertSame($expected, UTF8::rtrim($input, $input2));
  555. }
  556. /**
  557. * Provides test data for test_ord()
  558. */
  559. public function provider_ord()
  560. {
  561. return [
  562. ['f', 102],
  563. ['ñ', 241],
  564. ['Ñ', 209],
  565. ];
  566. }
  567. /**
  568. * Tests UTF8::ord
  569. *
  570. * @test
  571. * @dataProvider provider_ord
  572. */
  573. public function test_ord($input, $expected)
  574. {
  575. $this->assertSame($expected, UTF8::ord($input));
  576. }
  577. }