Text.php 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696
  1. <?php
  2. /**
  3. * Text helper class. Provides simple methods for working with text.
  4. *
  5. * @package KO7
  6. * @category Helpers
  7. *
  8. * @copyright (c) 2007-2016 Kohana Team
  9. * @copyright (c) since 2016 Koseven Team
  10. * @license https://koseven.dev/LICENSE
  11. */
  12. class KO7_Text {
  13. /**
  14. * @var array number units and text equivalents
  15. */
  16. public static $units = [
  17. 1000000000 => 'billion',
  18. 1000000 => 'million',
  19. 1000 => 'thousand',
  20. 100 => 'hundred',
  21. 90 => 'ninety',
  22. 80 => 'eighty',
  23. 70 => 'seventy',
  24. 60 => 'sixty',
  25. 50 => 'fifty',
  26. 40 => 'fourty',
  27. 30 => 'thirty',
  28. 20 => 'twenty',
  29. 19 => 'nineteen',
  30. 18 => 'eighteen',
  31. 17 => 'seventeen',
  32. 16 => 'sixteen',
  33. 15 => 'fifteen',
  34. 14 => 'fourteen',
  35. 13 => 'thirteen',
  36. 12 => 'twelve',
  37. 11 => 'eleven',
  38. 10 => 'ten',
  39. 9 => 'nine',
  40. 8 => 'eight',
  41. 7 => 'seven',
  42. 6 => 'six',
  43. 5 => 'five',
  44. 4 => 'four',
  45. 3 => 'three',
  46. 2 => 'two',
  47. 1 => 'one',
  48. ];
  49. /**
  50. * Limits a phrase to a given number of words.
  51. *
  52. * $text = Text::limit_words($text);
  53. *
  54. * @param string $str phrase to limit words of
  55. * @param integer $limit number of words to limit to
  56. * @param string $end_char end character or entity
  57. * @return string
  58. */
  59. public static function limit_words($str, $limit = 100, $end_char = NULL)
  60. {
  61. $limit = (int) $limit;
  62. $end_char = ($end_char === NULL) ? '…' : $end_char;
  63. if (trim($str) === '')
  64. return $str;
  65. if ($limit <= 0)
  66. return $end_char;
  67. preg_match('/^\s*+(?:\S++\s*+){1,'.$limit.'}/u', $str, $matches);
  68. // Only attach the end character if the matched string is shorter
  69. // than the starting string.
  70. return rtrim($matches[0]).((strlen($matches[0]) === strlen($str)) ? '' : $end_char);
  71. }
  72. /**
  73. * Limits a phrase to a given number of characters.
  74. *
  75. * $text = Text::limit_chars($text);
  76. *
  77. * @param string $str phrase to limit characters of
  78. * @param integer $limit number of characters to limit to
  79. * @param string $end_char end character or entity
  80. * @param boolean $preserve_words enable or disable the preservation of words while limiting
  81. * @return string
  82. * @uses UTF8::strlen
  83. */
  84. public static function limit_chars($str, $limit = 100, $end_char = NULL, $preserve_words = FALSE)
  85. {
  86. $end_char = ($end_char === NULL) ? '…' : $end_char;
  87. $limit = (int) $limit;
  88. if (trim($str) === '' OR UTF8::strlen($str) <= $limit)
  89. return $str;
  90. if ($limit <= 0)
  91. return $end_char;
  92. if ($preserve_words === FALSE)
  93. return rtrim(UTF8::substr($str, 0, $limit)).$end_char;
  94. // Don't preserve words. The limit is considered the top limit.
  95. // No strings with a length longer than $limit should be returned.
  96. if ( ! preg_match('/^.{0,'.$limit.'}\s/us', $str, $matches))
  97. return $end_char;
  98. return rtrim($matches[0]).((strlen($matches[0]) === strlen($str)) ? '' : $end_char);
  99. }
  100. /**
  101. * Alternates between two or more strings.
  102. *
  103. * echo Text::alternate('one', 'two'); // "one"
  104. * echo Text::alternate('one', 'two'); // "two"
  105. * echo Text::alternate('one', 'two'); // "one"
  106. *
  107. * Note that using multiple iterations of different strings may produce
  108. * unexpected results.
  109. *
  110. * @param string $str,... strings to alternate between
  111. * @return string
  112. */
  113. public static function alternate()
  114. {
  115. static $i;
  116. if (func_num_args() === 0)
  117. {
  118. $i = 0;
  119. return '';
  120. }
  121. $args = func_get_args();
  122. return $args[($i++ % count($args))];
  123. }
  124. /**
  125. * Generates a random string of a given type and length.
  126. *
  127. *
  128. * $str = Text::random(); // 8 character random string
  129. *
  130. * The following types are supported:
  131. *
  132. * alnum
  133. * : Upper and lower case a-z, 0-9 (default)
  134. *
  135. * alpha
  136. * : Upper and lower case a-z
  137. *
  138. * hexdec
  139. * : Hexadecimal characters a-f, 0-9
  140. *
  141. * distinct
  142. * : Uppercase characters and numbers that cannot be confused
  143. *
  144. * You can also create a custom type by providing the "pool" of characters
  145. * as the type.
  146. *
  147. * @param string $type a type of pool, or a string of characters to use as the pool
  148. * @param integer $length length of string to return
  149. * @return string
  150. * @uses UTF8::split
  151. */
  152. public static function random($type = NULL, $length = 8)
  153. {
  154. if ($type === NULL)
  155. {
  156. // Default is to generate an alphanumeric string
  157. $type = 'alnum';
  158. }
  159. $utf8 = FALSE;
  160. switch ($type)
  161. {
  162. case 'alnum':
  163. $pool = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
  164. break;
  165. case 'alpha':
  166. $pool = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
  167. break;
  168. case 'hexdec':
  169. $pool = '0123456789abcdef';
  170. break;
  171. case 'numeric':
  172. $pool = '0123456789';
  173. break;
  174. case 'nozero':
  175. $pool = '123456789';
  176. break;
  177. case 'distinct':
  178. $pool = '2345679ACDEFHJKLMNPRSTUVWXYZ';
  179. break;
  180. default:
  181. $pool = (string) $type;
  182. $utf8 = ! UTF8::is_ascii($pool);
  183. break;
  184. }
  185. // Split the pool into an array of characters
  186. $pool = ($utf8 === TRUE) ? UTF8::str_split($pool, 1) : str_split($pool, 1);
  187. // Largest pool key
  188. $max = count($pool) - 1;
  189. $str = '';
  190. for ($i = 0; $i < $length; $i++)
  191. {
  192. // Select a random character from the pool and add it to the string
  193. $str .= $pool[mt_rand(0, $max)];
  194. }
  195. // Make sure alnum strings contain at least one letter and one digit
  196. if ($type === 'alnum' AND $length > 1)
  197. {
  198. if (ctype_alpha($str))
  199. {
  200. // Add a random digit
  201. $str[mt_rand(0, $length - 1)] = chr(mt_rand(48, 57));
  202. }
  203. elseif (ctype_digit($str))
  204. {
  205. // Add a random letter
  206. $str[mt_rand(0, $length - 1)] = chr(mt_rand(65, 90));
  207. }
  208. }
  209. return $str;
  210. }
  211. /**
  212. * Uppercase words that are not separated by spaces, using a custom
  213. * delimiter or the default.
  214. *
  215. * $str = Text::ucfirst('content-type'); // returns "Content-Type"
  216. *
  217. * @param string $string string to transform
  218. * @param string $delimiter delimiter to use
  219. * @uses UTF8::ucfirst
  220. * @return string
  221. */
  222. public static function ucfirst($string, $delimiter = '-')
  223. {
  224. // Put the keys back the Case-Convention expected
  225. return implode($delimiter, array_map('UTF8::ucfirst', explode($delimiter, $string)));
  226. }
  227. /**
  228. * Reduces multiple slashes in a string to single slashes.
  229. *
  230. * $str = Text::reduce_slashes('foo//bar/baz'); // "foo/bar/baz"
  231. *
  232. * @param string $str string to reduce slashes of
  233. * @return string
  234. */
  235. public static function reduce_slashes($str)
  236. {
  237. return preg_replace('#(?<!:)//+#', '/', $str);
  238. }
  239. /**
  240. * Replaces the given words with a string.
  241. *
  242. * // Displays "What the #####, man!"
  243. * echo Text::censor('What the frick, man!', array(
  244. * 'frick' => '#####',
  245. * ));
  246. *
  247. * @param string $str phrase to replace words in
  248. * @param array $badwords words to replace
  249. * @param string $replacement replacement string
  250. * @param boolean $replace_partial_words replace words across word boundaries (space, period, etc)
  251. * @return string
  252. * @uses UTF8::strlen
  253. */
  254. public static function censor($str, $badwords, $replacement = '#', $replace_partial_words = TRUE)
  255. {
  256. foreach ( (array) $badwords as $key => $badword)
  257. {
  258. $badwords[$key] = str_replace('\*', '\S*?', preg_quote( (string) $badword));
  259. }
  260. $regex = '('.implode('|', $badwords).')';
  261. if ($replace_partial_words === FALSE)
  262. {
  263. // Just using \b isn't sufficient when we need to replace a badword that already contains word boundaries itself
  264. $regex = '(?<=\b|\s|^)'.$regex.'(?=\b|\s|$)';
  265. }
  266. $regex = '!'.$regex.'!ui';
  267. // if $replacement is a single character: replace each of the characters of the badword with $replacement
  268. if (UTF8::strlen($replacement) == 1)
  269. {
  270. return preg_replace_callback($regex, function($matches) use ($replacement) {
  271. return str_repeat($replacement, UTF8::strlen($matches[1]));
  272. }, $str);
  273. }
  274. // if $replacement is not a single character, fully replace the badword with $replacement
  275. return preg_replace($regex, $replacement, $str);
  276. }
  277. /**
  278. * Finds the text that is similar between a set of words.
  279. *
  280. * $match = Text::similar(array('fred', 'fran', 'free'); // "fr"
  281. *
  282. * @param array $words words to find similar text of
  283. * @return string
  284. */
  285. public static function similar(array $words)
  286. {
  287. // First word is the word to match against
  288. $word = current($words);
  289. for ($i = 0, $max = strlen($word); $i < $max; ++$i)
  290. {
  291. foreach ($words as $w)
  292. {
  293. // Once a difference is found, break out of the loops
  294. if ( ! isset($w[$i]) OR $w[$i] !== $word[$i])
  295. break 2;
  296. }
  297. }
  298. // Return the similar text
  299. return substr($word, 0, $i);
  300. }
  301. /**
  302. * Converts text email addresses and anchors into links. Existing links
  303. * will not be altered.
  304. *
  305. * echo Text::auto_link($text);
  306. *
  307. * [!!] This method is not foolproof since it uses regex to parse HTML.
  308. *
  309. * @param string $text text to auto link
  310. * @return string
  311. * @uses Text::auto_link_urls
  312. * @uses Text::auto_link_emails
  313. */
  314. public static function auto_link($text)
  315. {
  316. // Auto link emails first to prevent problems with "www.domain.com@example.com"
  317. return Text::auto_link_urls(Text::auto_link_emails($text));
  318. }
  319. /**
  320. * Converts text anchors into links. Existing links will not be altered.
  321. *
  322. * echo Text::auto_link_urls($text);
  323. *
  324. * [!!] This method is not foolproof since it uses regex to parse HTML.
  325. *
  326. * @param string $text text to auto link
  327. * @return string
  328. * @uses HTML::anchor
  329. */
  330. public static function auto_link_urls($text)
  331. {
  332. // Find and replace all http/https/ftp/ftps links that are not part of an existing html anchor
  333. $text = preg_replace_callback('~\b(?<!href="|">)(?:ht|f)tps?://[^<\s]+(?:/|\b)~i', 'Text::_auto_link_urls_callback1', $text);
  334. // Find and replace all naked www.links.com (without http://)
  335. return preg_replace_callback('~\b(?<!://|">)www(?:\.[a-z0-9][-a-z0-9]*+)+\.[a-z]{2,6}[^<\s]*\b~i', 'Text::_auto_link_urls_callback2', $text);
  336. }
  337. protected static function _auto_link_urls_callback1($matches)
  338. {
  339. return HTML::anchor($matches[0]);
  340. }
  341. protected static function _auto_link_urls_callback2($matches)
  342. {
  343. return HTML::anchor('http://'.$matches[0], $matches[0]);
  344. }
  345. /**
  346. * Converts text email addresses into links. Existing links will not
  347. * be altered.
  348. *
  349. * echo Text::auto_link_emails($text);
  350. *
  351. * [!!] This method is not foolproof since it uses regex to parse HTML.
  352. *
  353. * @param string $text text to auto link
  354. * @return string
  355. * @uses HTML::mailto
  356. */
  357. public static function auto_link_emails($text)
  358. {
  359. // Find and replace all email addresses that are not part of an existing html mailto anchor
  360. // Note: The "58;" negative lookbehind prevents matching of existing encoded html mailto anchors
  361. // The html entity for a colon (:) is &#58; or &#058; or &#0058; etc.
  362. return preg_replace_callback('~\b(?<!href="mailto:|58;)(?!\.)[-+_a-z0-9.]++(?<!\.)@(?![-.])[-a-z0-9.]+(?<!\.)\.[a-z]{2,6}\b(?!</a>)~i', 'Text::_auto_link_emails_callback', $text);
  363. }
  364. protected static function _auto_link_emails_callback($matches)
  365. {
  366. return HTML::mailto($matches[0]);
  367. }
  368. /**
  369. * Automatically applies "p" and "br" markup to text.
  370. * Basically [nl2br](http://php.net/nl2br) on steroids.
  371. *
  372. * echo Text::auto_p($text);
  373. *
  374. * [!!] This method is not foolproof since it uses regex to parse HTML.
  375. *
  376. * @param string $str subject
  377. * @param boolean $br convert single linebreaks to <br />
  378. * @return string
  379. */
  380. public static function auto_p($str, $br = TRUE)
  381. {
  382. // Trim whitespace
  383. if (($str = trim($str)) === '')
  384. return '';
  385. // Standardize newlines
  386. $str = str_replace(["\r\n", "\r"], "\n", $str);
  387. // Trim whitespace on each line
  388. $str = preg_replace('~^[ \t]+~m', '', $str);
  389. $str = preg_replace('~[ \t]+$~m', '', $str);
  390. // The following regexes only need to be executed if the string contains html
  391. if ($html_found = (strpos($str, '<') !== FALSE))
  392. {
  393. // Elements that should not be surrounded by p tags
  394. $no_p = '(?:p|div|h[1-6r]|ul|ol|li|blockquote|d[dlt]|pre|t[dhr]|t(?:able|body|foot|head)|c(?:aption|olgroup)|form|s(?:elect|tyle)|a(?:ddress|rea)|ma(?:p|th))';
  395. // Put at least two linebreaks before and after $no_p elements
  396. $str = preg_replace('~^<'.$no_p.'[^>]*+>~im', "\n$0", $str);
  397. $str = preg_replace('~</'.$no_p.'\s*+>$~im', "$0\n", $str);
  398. }
  399. // Do the <p> magic!
  400. $str = '<p>'.trim($str).'</p>';
  401. $str = preg_replace('~\n{2,}~', "</p>\n\n<p>", $str);
  402. // The following regexes only need to be executed if the string contains html
  403. if ($html_found !== FALSE)
  404. {
  405. // Remove p tags around $no_p elements
  406. $str = preg_replace('~<p>(?=</?'.$no_p.'[^>]*+>)~i', '', $str);
  407. $str = preg_replace('~(</?'.$no_p.'[^>]*+>)</p>~i', '$1', $str);
  408. }
  409. // Convert single linebreaks to <br />
  410. if ($br === TRUE)
  411. {
  412. $str = preg_replace('~(?<!\n)\n(?!\n)~', "<br />\n", $str);
  413. }
  414. return $str;
  415. }
  416. /**
  417. * Returns human readable sizes. Based on original functions written by
  418. * [Aidan Lister](http://aidanlister.com/repos/v/function.size_readable.php)
  419. * and [Quentin Zervaas](http://www.phpriot.com/d/code/strings/filesize-format/).
  420. *
  421. * echo Text::bytes(filesize($file));
  422. *
  423. * @param integer $bytes size in bytes
  424. * @param string $force_unit a definitive unit
  425. * @param string $format the return string format
  426. * @param boolean $si whether to use SI prefixes or IEC
  427. * @return string
  428. */
  429. public static function bytes($bytes, $force_unit = NULL, $format = NULL, $si = TRUE)
  430. {
  431. // Format string
  432. $format = ($format === NULL) ? '%01.2f %s' : (string) $format;
  433. // IEC prefixes (binary)
  434. if ($si == FALSE OR strpos($force_unit, 'i') !== FALSE)
  435. {
  436. $units = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB'];
  437. $mod = 1024;
  438. }
  439. // SI prefixes (decimal)
  440. else
  441. {
  442. $units = ['B', 'kB', 'MB', 'GB', 'TB', 'PB'];
  443. $mod = 1000;
  444. }
  445. // Determine unit to use
  446. if (($power = array_search( (string) $force_unit, $units)) === FALSE)
  447. {
  448. $power = ($bytes > 0) ? floor(log($bytes, $mod)) : 0;
  449. }
  450. return sprintf($format, $bytes / pow($mod, $power), $units[$power]);
  451. }
  452. /**
  453. * Format a number to human-readable text.
  454. *
  455. * // Display: one thousand and twenty-four
  456. * echo Text::number(1024);
  457. *
  458. * // Display: five million, six hundred and thirty-two
  459. * echo Text::number(5000632);
  460. *
  461. * @param integer $number number to format
  462. * @return string
  463. * @since 3.0.8
  464. */
  465. public static function number($number)
  466. {
  467. // The number must always be an integer
  468. $number = (int) $number;
  469. // Uncompiled text version
  470. $text = [];
  471. // Last matched unit within the loop
  472. $last_unit = NULL;
  473. // The last matched item within the loop
  474. $last_item = '';
  475. foreach (Text::$units as $unit => $name)
  476. {
  477. if ($number / $unit >= 1)
  478. {
  479. // $value = the number of times the number is divisible by unit
  480. $number -= $unit * ($value = (int) floor($number / $unit));
  481. // Temporary var for textifying the current unit
  482. $item = '';
  483. if ($unit < 100)
  484. {
  485. if ($last_unit < 100 AND $last_unit >= 20)
  486. {
  487. $last_item .= '-'.$name;
  488. }
  489. else
  490. {
  491. $item = $name;
  492. }
  493. }
  494. else
  495. {
  496. $item = Text::number($value).' '.$name;
  497. }
  498. // In the situation that we need to make a composite number (i.e. twenty-three)
  499. // then we need to modify the previous entry
  500. if (empty($item))
  501. {
  502. array_pop($text);
  503. $item = $last_item;
  504. }
  505. $last_item = $text[] = $item;
  506. $last_unit = $unit;
  507. }
  508. }
  509. if (count($text) > 1)
  510. {
  511. $and = array_pop($text);
  512. }
  513. $text = implode(', ', $text);
  514. if (isset($and))
  515. {
  516. $text .= ' and '.$and;
  517. }
  518. return $text;
  519. }
  520. /**
  521. * Prevents [widow words](http://www.shauninman.com/archive/2006/08/22/widont_wordpress_plugin)
  522. * by inserting a non-breaking space between the last two words.
  523. *
  524. * echo Text::widont($text);
  525. *
  526. * regex courtesy of the Typogrify project
  527. * @link http://code.google.com/p/typogrify/
  528. *
  529. * @param string $str text to remove widows from
  530. * @return string
  531. */
  532. public static function widont($str)
  533. {
  534. // use '%' as delimiter and 'x' as modifier
  535. $widont_regex = "%
  536. ((?:</?(?:a|em|span|strong|i|b)[^>]*>)|[^<>\s]) # must be proceeded by an approved inline opening or closing tag or a nontag/nonspace
  537. \s+ # the space to replace
  538. ([^<>\s]+ # must be flollowed by non-tag non-space characters
  539. \s* # optional white space!
  540. (</(a|em|span|strong|i|b)>\s*)* # optional closing inline tags with optional white space after each
  541. ((</(p|h[1-6]|li|dt|dd)>)|$)) # end with a closing p, h1-6, li or the end of the string
  542. %x";
  543. return preg_replace($widont_regex, '$1&nbsp;$2', $str);
  544. }
  545. /**
  546. * Returns information about the client user agent.
  547. *
  548. * // Returns "Chrome" when using Google Chrome
  549. * $browser = Text::user_agent($agent, 'browser');
  550. *
  551. * Multiple values can be returned at once by using an array:
  552. *
  553. * // Get the browser and platform with a single call
  554. * $info = Text::user_agent($agent, array('browser', 'platform'));
  555. *
  556. * When using an array for the value, an associative array will be returned.
  557. *
  558. * @param string $agent user_agent
  559. * @param mixed $value array or string to return: browser, version, robot, mobile, platform
  560. * @return mixed requested information, FALSE if nothing is found
  561. * @uses KO7::$config
  562. */
  563. public static function user_agent($agent, $value)
  564. {
  565. if (is_array($value))
  566. {
  567. $data = [];
  568. foreach ($value as $part)
  569. {
  570. // Add each part to the set
  571. $data[$part] = Text::user_agent($agent, $part);
  572. }
  573. return $data;
  574. }
  575. if ($value === 'browser' OR $value == 'version')
  576. {
  577. // Extra data will be captured
  578. $info = [];
  579. // Load browsers
  580. $browsers = KO7::$config->load('user_agents')->browser;
  581. foreach ($browsers as $search => $name)
  582. {
  583. if (stripos($agent, $search) !== FALSE)
  584. {
  585. // Set the browser name
  586. $info['browser'] = $name;
  587. if (preg_match('#'.preg_quote($search).'[^0-9.]*+([0-9.][0-9.a-z]*)#i', $agent, $matches))
  588. {
  589. // Set the version number
  590. $info['version'] = $matches[1];
  591. }
  592. else
  593. {
  594. // No version number found
  595. $info['version'] = FALSE;
  596. }
  597. return $info[$value];
  598. }
  599. }
  600. }
  601. else
  602. {
  603. // Load the search group for this type
  604. $group = KO7::$config->load('user_agents')->$value;
  605. foreach ($group as $search => $name)
  606. {
  607. if (stripos($agent, $search) !== FALSE)
  608. {
  609. // Set the value name
  610. return $name;
  611. }
  612. }
  613. }
  614. // The value requested could not be found
  615. return FALSE;
  616. }
  617. }