UTF8Test.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631
  1. <?php
  2. /**
  3. * Tests Kohana_UTF8 class
  4. *
  5. * @group kohana
  6. * @group kohana.core
  7. * @group kohana.core.utf8
  8. *
  9. * @package Kohana
  10. * @category Tests
  11. * @author Kohana Team
  12. * @copyright (c) Kohana Team
  13. * @license https://koseven.ga/LICENSE.md
  14. */
  15. class Kohana_UTF8Test extends Unittest_TestCase
  16. {
  17. /**
  18. * Provides test data for test_clean()
  19. */
  20. public function provider_clean()
  21. {
  22. return [
  23. ["\0", ''],
  24. ["→foo\021", '→foo'],
  25. ["\x7Fbar", 'bar'],
  26. ["\xFF", ''],
  27. ["\x41", 'A'],
  28. [["→foo\021", "\x41"], ['→foo', 'A']],
  29. ];
  30. }
  31. /**
  32. * Tests UTF8::clean
  33. *
  34. * @test
  35. * @dataProvider provider_clean
  36. */
  37. public function test_clean($input, $expected)
  38. {
  39. $this->assertSame($expected, UTF8::clean($input));
  40. }
  41. /**
  42. * Provides test data for test_is_ascii()
  43. */
  44. public function provider_is_ascii()
  45. {
  46. return [
  47. ["\0", TRUE],
  48. ["\$eno\r", TRUE],
  49. ['Señor', FALSE],
  50. [['Se', 'nor'], TRUE],
  51. [['Se', 'ñor'], FALSE],
  52. ];
  53. }
  54. /**
  55. * Tests UTF8::is_ascii
  56. *
  57. * @test
  58. * @dataProvider provider_is_ascii
  59. */
  60. public function test_is_ascii($input, $expected)
  61. {
  62. $this->assertSame($expected, UTF8::is_ascii($input));
  63. }
  64. /**
  65. * Provides test data for test_strip_ascii_ctrl()
  66. */
  67. public function provider_strip_ascii_ctrl()
  68. {
  69. return [
  70. ["\0", ''],
  71. ["→foo\021", '→foo'],
  72. ["\x7Fbar", 'bar'],
  73. ["\xFF", "\xFF"],
  74. ["\x41", 'A'],
  75. ];
  76. }
  77. /**
  78. * Tests UTF8::strip_ascii_ctrl
  79. *
  80. * @test
  81. * @dataProvider provider_strip_ascii_ctrl
  82. */
  83. public function test_strip_ascii_ctrl($input, $expected)
  84. {
  85. $this->assertSame($expected, UTF8::strip_ascii_ctrl($input));
  86. }
  87. /**
  88. * Provides test data for test_strip_non_ascii()
  89. */
  90. public function provider_strip_non_ascii()
  91. {
  92. return [
  93. ["\0\021\x7F", "\0\021\x7F"],
  94. ['I ♥ cocoñùт', 'I coco'],
  95. ];
  96. }
  97. /**
  98. * Tests UTF8::strip_non_ascii
  99. *
  100. * @test
  101. * @dataProvider provider_strip_non_ascii
  102. */
  103. public function test_strip_non_ascii($input, $expected)
  104. {
  105. $this->assertSame($expected, UTF8::strip_non_ascii($input));
  106. }
  107. /**
  108. * Provides test data for test_transliterate_to_ascii()
  109. */
  110. public function provider_transliterate_to_ascii()
  111. {
  112. return [
  113. ['Cocoñùт', -1, 'Coconuт'],
  114. ['COCOÑÙТ', -1, 'COCOÑÙТ'],
  115. ['Cocoñùт', 0, 'Coconuт'],
  116. ['COCOÑÙТ', 0, 'COCONUТ'],
  117. ['Cocoñùт', 1, 'Cocoñùт'],
  118. ['COCOÑÙТ', 1, 'COCONUТ'],
  119. ];
  120. }
  121. /**
  122. * Tests UTF8::transliterate_to_ascii
  123. *
  124. * @test
  125. * @dataProvider provider_transliterate_to_ascii
  126. */
  127. public function test_transliterate_to_ascii($input, $case, $expected)
  128. {
  129. $this->assertSame($expected, UTF8::transliterate_to_ascii($input, $case));
  130. }
  131. /**
  132. * Provides test data for test_strlen()
  133. */
  134. public function provider_strlen()
  135. {
  136. return [
  137. ['Cocoñùт', 7],
  138. ['Coconut', 7],
  139. ];
  140. }
  141. /**
  142. * Tests UTF8::strlen
  143. *
  144. * @test
  145. * @dataProvider provider_strlen
  146. */
  147. public function test_strlen($input, $expected)
  148. {
  149. $this->assertSame($expected, UTF8::strlen($input));
  150. }
  151. /**
  152. * Provides test data for test_strpos()
  153. */
  154. public function provider_strpos()
  155. {
  156. return [
  157. ['Cocoñùт', 'o', 0, 1],
  158. ['Cocoñùт', 'ñ', 1, 4],
  159. ];
  160. }
  161. /**
  162. * Tests UTF8::strpos
  163. *
  164. * @test
  165. * @dataProvider provider_strpos
  166. */
  167. public function test_strpos($input, $str, $offset, $expected)
  168. {
  169. $this->assertSame($expected, UTF8::strpos($input, $str, $offset));
  170. }
  171. /**
  172. * Provides test data for test_strrpos()
  173. */
  174. public function provider_strrpos()
  175. {
  176. return [
  177. ['Cocoñùт', 'o', 0, 3],
  178. ['Cocoñùт', 'ñ', 2, 4],
  179. ];
  180. }
  181. /**
  182. * Tests UTF8::strrpos
  183. *
  184. * @test
  185. * @dataProvider provider_strrpos
  186. */
  187. public function test_strrpos($input, $str, $offset, $expected)
  188. {
  189. $this->assertSame($expected, UTF8::strrpos($input, $str, $offset));
  190. }
  191. /**
  192. * Provides test data for test_substr()
  193. */
  194. public function provider_substr()
  195. {
  196. return [
  197. ['Cocoñùт', 3, 2, 'oñ'],
  198. ['Cocoñùт', 3, 9, 'oñùт'],
  199. ['Cocoñùт', 3, NULL, 'oñùт'],
  200. ['Cocoñùт', 3, -2, 'oñ'],
  201. ];
  202. }
  203. /**
  204. * Tests UTF8::substr
  205. *
  206. * @test
  207. * @dataProvider provider_substr
  208. */
  209. public function test_substr($input, $offset, $length, $expected)
  210. {
  211. $this->assertSame($expected, UTF8::substr($input, $offset, $length));
  212. }
  213. /**
  214. * Provides test data for test_substr_replace()
  215. */
  216. public function provider_substr_replace()
  217. {
  218. return [
  219. ['Cocoñùт', 'šš', 3, 2, 'Cocššùт'],
  220. ['Cocoñùт', 'šš', 3, 9, 'Cocšš'],
  221. ];
  222. }
  223. /**
  224. * Tests UTF8::substr_replace
  225. *
  226. * @test
  227. * @dataProvider provider_substr_replace
  228. */
  229. public function test_substr_replace($input, $replacement, $offset, $length, $expected)
  230. {
  231. $this->assertSame($expected, UTF8::substr_replace($input, $replacement, $offset, $length));
  232. }
  233. /**
  234. * Provides test data for test_strtolower()
  235. */
  236. public function provider_strtolower()
  237. {
  238. return [
  239. ['COCOÑÙТ', 'cocoñùт'],
  240. ['JÄGER', 'jäger'],
  241. ];
  242. }
  243. /**
  244. * Tests UTF8::strtolower
  245. *
  246. * @test
  247. * @dataProvider provider_strtolower
  248. */
  249. public function test_strtolower($input, $expected)
  250. {
  251. $this->assertSame($expected, UTF8::strtolower($input));
  252. }
  253. /**
  254. * Provides test data for test_strtoupper()
  255. */
  256. public function provider_strtoupper()
  257. {
  258. return [
  259. ['Cocoñùт', 'COCOÑÙТ'],
  260. ['jäger', 'JÄGER'],
  261. ];
  262. }
  263. /**
  264. * Tests UTF8::strtoupper
  265. *
  266. * @test
  267. * @dataProvider provider_strtoupper
  268. */
  269. public function test_strtoupper($input, $expected)
  270. {
  271. $this->assertSame($expected, UTF8::strtoupper($input));
  272. }
  273. /**
  274. * Provides test data for test_ucfirst()
  275. */
  276. public function provider_ucfirst()
  277. {
  278. return [
  279. ['ñùт', 'Ñùт'],
  280. ];
  281. }
  282. /**
  283. * Tests UTF8::ucfirst
  284. *
  285. * @test
  286. * @dataProvider provider_ucfirst
  287. */
  288. public function test_ucfirst($input, $expected)
  289. {
  290. $this->assertSame($expected, UTF8::ucfirst($input));
  291. }
  292. /**
  293. * Provides test data for test_strip_non_ascii()
  294. */
  295. public function provider_ucwords()
  296. {
  297. return [
  298. ['ExAmple', 'ExAmple'],
  299. ['i ♥ Cocoñùт', 'I ♥ Cocoñùт'],
  300. ];
  301. }
  302. /**
  303. * Tests UTF8::ucwords
  304. *
  305. * @test
  306. * @dataProvider provider_ucwords
  307. */
  308. public function test_ucwords($input, $expected)
  309. {
  310. $this->assertSame($expected, UTF8::ucwords($input));
  311. }
  312. /**
  313. * Provides test data for test_strcasecmp()
  314. */
  315. public function provider_strcasecmp()
  316. {
  317. return [
  318. ['Cocoñùт', 'Cocoñùт', 0],
  319. ['Čau', 'Čauo', -1],
  320. ['Čau', 'Ča', 1],
  321. ['Cocoñùт', 'Cocoñ', 4],
  322. ['Cocoñùт', 'Coco', 6],
  323. ];
  324. }
  325. /**
  326. * Tests UTF8::strcasecmp
  327. *
  328. * @test
  329. * @dataProvider provider_strcasecmp
  330. */
  331. public function test_strcasecmp($input, $input2, $expected)
  332. {
  333. $this->assertSame($expected, UTF8::strcasecmp($input, $input2));
  334. }
  335. /**
  336. * Provides test data for test_str_ireplace()
  337. */
  338. public function provider_str_ireplace()
  339. {
  340. return [
  341. ['т', 't', 'cocoñuт', 'cocoñut'],
  342. ['Ñ', 'N', 'cocoñuт', 'cocoNuт'],
  343. [['т', 'Ñ', 'k' => 'k'], ['t', 'N', 'K'], ['cocoñuт'], ['cocoNut']],
  344. [['ñ'], 'n', 'cocoñuт', 'coconuт'],
  345. ];
  346. }
  347. /**
  348. * Tests UTF8::str_ireplace
  349. *
  350. * @test
  351. * @dataProvider provider_str_ireplace
  352. */
  353. public function test_str_ireplace($search, $replace, $subject, $expected)
  354. {
  355. $this->assertSame($expected, UTF8::str_ireplace($search, $replace, $subject));
  356. }
  357. /**
  358. * Provides test data for test_stristr()
  359. */
  360. public function provider_stristr()
  361. {
  362. return [
  363. ['Cocoñùт', 'oñ', 'oñùт'],
  364. ['Cocoñùт', 'o', 'ocoñùт'],
  365. ['Cocoñùт', 'k', FALSE],
  366. ];
  367. }
  368. /**
  369. * Tests UTF8::stristr
  370. *
  371. * @test
  372. * @dataProvider provider_stristr
  373. */
  374. public function test_stristr($input, $input2, $expected)
  375. {
  376. $this->assertSame($expected, UTF8::stristr($input, $input2));
  377. }
  378. /**
  379. * Provides test data for test_strspn()
  380. */
  381. public function provider_strspn()
  382. {
  383. return [
  384. ["foo", "o", 1, 2, 2],
  385. ['Cocoñùт', 'oñ', NULL, NULL, 1],
  386. ['Cocoñùт', 'oñ', 2, 4, 1],
  387. ['Cocoñùт', 'šš', 3, 9, 4],
  388. ];
  389. }
  390. /**
  391. * Tests UTF8::strspn
  392. *
  393. * @test
  394. * @dataProvider provider_strspn
  395. */
  396. public function test_strspn($input, $mask, $offset, $length, $expected)
  397. {
  398. $this->assertSame($expected, UTF8::strspn($input, $mask, $offset, $length));
  399. }
  400. /**
  401. * Provides test data for test_strcspn()
  402. */
  403. public function provider_strcspn()
  404. {
  405. return [
  406. ['Cocoñùт', 'oñ', NULL, NULL, 1],
  407. ['Cocoñùт', 'oñ', 2, 4, 1],
  408. ['Cocoñùт', 'šš', 3, 9, 4],
  409. ];
  410. }
  411. /**
  412. * Tests UTF8::strcspn
  413. *
  414. * @test
  415. * @dataProvider provider_strcspn
  416. */
  417. public function test_strcspn($input, $mask, $offset, $length, $expected)
  418. {
  419. $this->assertSame($expected, UTF8::strcspn($input, $mask, $offset, $length));
  420. }
  421. /**
  422. * Provides test data for test_str_pad()
  423. */
  424. public function provider_str_pad()
  425. {
  426. return [
  427. ['Cocoñùт', 10, 'š', STR_PAD_RIGHT, 'Cocoñùтššš'],
  428. ['Cocoñùт', 10, 'š', STR_PAD_LEFT, 'šššCocoñùт'],
  429. ['Cocoñùт', 10, 'š', STR_PAD_BOTH, 'šCocoñùтšš'],
  430. ];
  431. }
  432. /**
  433. * Tests UTF8::str_pad
  434. *
  435. * @test
  436. * @dataProvider provider_str_pad
  437. */
  438. public function test_str_pad($input, $length, $pad, $type, $expected)
  439. {
  440. $this->assertSame($expected, UTF8::str_pad($input, $length, $pad, $type));
  441. }
  442. /**
  443. * Tests UTF8::str_pad error
  444. *
  445. * @test
  446. * @expectedException UTF8_Exception
  447. */
  448. public function test_str_pad_error()
  449. {
  450. UTF8::str_pad('Cocoñùт', 10, 'š', 15, 'šCocoñùтšš');
  451. }
  452. /**
  453. * Provides test data for test_str_split()
  454. */
  455. public function provider_str_split()
  456. {
  457. return [
  458. ['Bár', 1, ['B', 'á', 'r']],
  459. ['Cocoñùт', 2, ['Co', 'co', 'ñù', 'т']],
  460. ['Cocoñùт', 3, ['Coc', 'oñù', 'т']],
  461. ];
  462. }
  463. /**
  464. * Tests UTF8::str_split
  465. *
  466. * @test
  467. * @dataProvider provider_str_split
  468. */
  469. public function test_str_split($input, $split_length, $expected)
  470. {
  471. $this->assertSame($expected, UTF8::str_split($input, $split_length));
  472. }
  473. /**
  474. * Provides test data for test_strrev()
  475. */
  476. public function provider_strrev()
  477. {
  478. return [
  479. ['Cocoñùт', 'тùñocoC'],
  480. ];
  481. }
  482. /**
  483. * Tests UTF8::strrev
  484. *
  485. * @test
  486. * @dataProvider provider_strrev
  487. */
  488. public function test_strrev($input, $expected)
  489. {
  490. $this->assertSame($expected, UTF8::strrev($input));
  491. }
  492. /**
  493. * Provides test data for test_trim()
  494. */
  495. public function provider_trim()
  496. {
  497. return [
  498. [' bar ', NULL, 'bar'],
  499. ['bar', 'b', 'ar'],
  500. ['barb', 'b', 'ar'],
  501. ];
  502. }
  503. /**
  504. * Tests UTF8::trim
  505. *
  506. * @test
  507. * @dataProvider provider_trim
  508. */
  509. public function test_trim($input, $input2, $expected)
  510. {
  511. $this->assertSame($expected, UTF8::trim($input, $input2));
  512. }
  513. /**
  514. * Provides test data for test_ltrim()
  515. */
  516. public function provider_ltrim()
  517. {
  518. return [
  519. [' bar ', NULL, 'bar '],
  520. ['bar', 'b', 'ar'],
  521. ['barb', 'b', 'arb'],
  522. ['ñùт', 'ñ', 'ùт'],
  523. ];
  524. }
  525. /**
  526. * Tests UTF8::ltrim
  527. *
  528. * @test
  529. * @dataProvider provider_ltrim
  530. */
  531. public function test_ltrim($input, $charlist, $expected)
  532. {
  533. $this->assertSame($expected, UTF8::ltrim($input, $charlist));
  534. }
  535. /**
  536. * Provides test data for test_rtrim()
  537. */
  538. public function provider_rtrim()
  539. {
  540. return [
  541. [' bar ', NULL, ' bar'],
  542. ['bar', 'b', 'bar'],
  543. ['barb', 'b', 'bar'],
  544. ['Cocoñùт', 'т', 'Cocoñù'],
  545. ];
  546. }
  547. /**
  548. * Tests UTF8::rtrim
  549. *
  550. * @test
  551. * @dataProvider provider_rtrim
  552. */
  553. public function test_rtrim($input, $input2, $expected)
  554. {
  555. $this->assertSame($expected, UTF8::rtrim($input, $input2));
  556. }
  557. /**
  558. * Provides test data for test_ord()
  559. */
  560. public function provider_ord()
  561. {
  562. return [
  563. ['f', 102],
  564. ['ñ', 241],
  565. ['Ñ', 209],
  566. ];
  567. }
  568. /**
  569. * Tests UTF8::ord
  570. *
  571. * @test
  572. * @dataProvider provider_ord
  573. */
  574. public function test_ord($input, $expected)
  575. {
  576. $this->assertSame($expected, UTF8::ord($input));
  577. }
  578. }