Slug.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. <?php
  2. /**
  3. * @package Kohana/ORM
  4. * @author Koseven Team
  5. * @copyright (c) 2016-2018 Koseven Team
  6. * @license https://koseven.ga/LICENSE.md
  7. */
  8. class ORM_Behavior_Slug extends ORM_Behavior {
  9. /**
  10. * Prefered column to generate slug
  11. * @var string
  12. */
  13. protected $_slug_source_column = 'name';
  14. /**
  15. * Table column for slug value
  16. * @var string
  17. */
  18. protected $_slug_column = 'slug';
  19. /**
  20. * Constructs a behavior object
  21. *
  22. * @param array $config Configuration parameters
  23. */
  24. protected function __construct($config)
  25. {
  26. parent::__construct($config);
  27. $this->_slug_source_column = Arr::get($config, 'source', $this->_slug_source_column);
  28. $this->_slug_column = Arr::get($config, 'column', $this->_slug_column);
  29. }
  30. /**
  31. * Constructs a new model and loads a record if given
  32. *
  33. * @param ORM $model The model
  34. * @param mixed $id Parameter for find or object to load
  35. */
  36. public function on_construct($model, $id)
  37. {
  38. if (($id !== NULL) AND ! is_array($id) AND ! ctype_digit($id))
  39. {
  40. $model->where($this->_slug_column, '=', $id)->find();
  41. // Prevent further record loading
  42. return FALSE;
  43. }
  44. return TRUE;
  45. }
  46. /**
  47. * The model is updated, add a slug value if empty
  48. *
  49. * @param ORM $model The model
  50. */
  51. public function on_update($model)
  52. {
  53. $this->create_slug($model);
  54. }
  55. /**
  56. * A new model is created, add a slug value
  57. *
  58. * @param ORM $model The model
  59. */
  60. public function on_create($model)
  61. {
  62. $this->create_slug($model);
  63. }
  64. private function create_slug($model)
  65. {
  66. $index = 0;
  67. $current_slug = $model->get($this->_slug_column);
  68. // Create a valid slug name
  69. $source = $model->get($this->_slug_source_column);
  70. if (empty($source))
  71. {
  72. $source = $model->object_name();
  73. }
  74. // Prepare the query
  75. $query = DB::select()->from($model->table_name())
  76. ->where($this->_slug_column, '=', ':slug')
  77. ->where($model->primary_key(), '!=', $model->pk())
  78. ->limit(1);
  79. // Create a slugged value
  80. $slug_base = $this->slugify($source);
  81. if ($current_slug !== $slug_base)
  82. {
  83. // Just the base slug
  84. $current_slug = $slug_base;
  85. $query->param(':slug', $current_slug);
  86. // Default slug invalid, add index
  87. if ($query->execute()->get($model->primary_key(), FALSE) !== FALSE)
  88. {
  89. // Base slug string with an index
  90. do
  91. {
  92. $current_slug = sprintf('%s-%d', $slug_base, $index);
  93. $query->param(':slug', $current_slug);
  94. if ($query->execute()->get($model->primary_key(), FALSE) !== FALSE)
  95. {
  96. $index++;
  97. $current_slug = '';
  98. }
  99. }
  100. while (empty($current_slug));
  101. }
  102. $model->set($this->_slug_column, $current_slug);
  103. }
  104. }
  105. /**
  106. * Create a safe pathname
  107. */
  108. protected function slugify($text, $strict = TRUE)
  109. {
  110. $text = $this->remove_accents($text);
  111. // replace non letter or digits by -
  112. $text = preg_replace('~[^\\pL\d.]+~u', '-', $text);
  113. // trim
  114. $text = trim($text, '-');
  115. // lowercase
  116. $text = strtolower($text);
  117. // remove unwanted characters
  118. $text = preg_replace('~[^-\w.]+~', '', $text);
  119. if ($strict)
  120. {
  121. $text = str_replace('.', '', $text);
  122. }
  123. return $text;
  124. }
  125. /**
  126. * Check if the input file looks like an utf8 string
  127. */
  128. function seems_utf8($str)
  129. {
  130. $length = strlen($str);
  131. for ($i=0; $i<$length; $i++)
  132. {
  133. $c = ord($str[$i]);
  134. if ($c < 0x80) { $n = 0; } // 0bbbbbbb
  135. elseif (($c & 0xE0) == 0xC0) { $n = 1; } // 110bbbbb
  136. elseif (($c & 0xF0) == 0xE0) { $n = 2; } // 1110bbbb
  137. elseif (($c & 0xF8) == 0xF0) { $n = 3; } // 11110bbb
  138. elseif (($c & 0xFC) == 0xF8) { $n = 4; } // 111110bb
  139. elseif (($c & 0xFE) == 0xFC) { $n = 5; } // 1111110b
  140. else return FALSE; // Does not match any model
  141. for ($j=0; $j<$n; $j++)
  142. { // n bytes matching 10bbbbbb follow ?
  143. if ((++$i == $length) OR ((ord($str[$i]) & 0xC0) != 0x80))
  144. return FALSE;
  145. }
  146. }
  147. return TRUE;
  148. }
  149. /**
  150. * Replace the accents
  151. */
  152. function remove_accents($string)
  153. {
  154. if ( ! preg_match('/[\x80-\xff]/', $string))
  155. return $string;
  156. if ($this->seems_utf8($string))
  157. {
  158. $chars = [
  159. // Decompositions for Latin-1 Supplement
  160. chr(195).chr(128) => 'A', chr(195).chr(129) => 'A',
  161. chr(195).chr(130) => 'A', chr(195).chr(131) => 'A',
  162. chr(195).chr(132) => 'A', chr(195).chr(133) => 'A',
  163. chr(195).chr(134) => 'AE', chr(195).chr(135) => 'C',
  164. chr(195).chr(136) => 'E', chr(195).chr(137) => 'E',
  165. chr(195).chr(138) => 'E', chr(195).chr(139) => 'E',
  166. chr(195).chr(140) => 'I', chr(195).chr(141) => 'I',
  167. chr(195).chr(142) => 'I', chr(195).chr(143) => 'I',
  168. chr(195).chr(144) => 'D', chr(195).chr(145) => 'N',
  169. chr(195).chr(146) => 'O', chr(195).chr(147) => 'O',
  170. chr(195).chr(148) => 'O', chr(195).chr(149) => 'O',
  171. chr(195).chr(150) => 'O', chr(195).chr(153) => 'U',
  172. chr(195).chr(154) => 'U', chr(195).chr(155) => 'U',
  173. chr(195).chr(156) => 'U', chr(195).chr(157) => 'Y',
  174. chr(195).chr(158) => 'TH', chr(195).chr(159) => 's',
  175. chr(195).chr(160) => 'a', chr(195).chr(161) => 'a',
  176. chr(195).chr(162) => 'a', chr(195).chr(163) => 'a',
  177. chr(195).chr(164) => 'a', chr(195).chr(165) => 'a',
  178. chr(195).chr(166) => 'ae', chr(195).chr(167) => 'c',
  179. chr(195).chr(168) => 'e', chr(195).chr(169) => 'e',
  180. chr(195).chr(170) => 'e', chr(195).chr(171) => 'e',
  181. chr(195).chr(172) => 'i', chr(195).chr(173) => 'i',
  182. chr(195).chr(174) => 'i', chr(195).chr(175) => 'i',
  183. chr(195).chr(176) => 'd', chr(195).chr(177) => 'n',
  184. chr(195).chr(178) => 'o', chr(195).chr(179) => 'o',
  185. chr(195).chr(180) => 'o', chr(195).chr(181) => 'o',
  186. chr(195).chr(182) => 'o', chr(195).chr(184) => 'o',
  187. chr(195).chr(185) => 'u', chr(195).chr(186) => 'u',
  188. chr(195).chr(187) => 'u', chr(195).chr(188) => 'u',
  189. chr(195).chr(189) => 'y', chr(195).chr(190) => 'th',
  190. chr(195).chr(191) => 'y',
  191. // Decompositions for Latin Extended-A
  192. chr(196).chr(128) => 'A', chr(196).chr(129) => 'a',
  193. chr(196).chr(130) => 'A', chr(196).chr(131) => 'a',
  194. chr(196).chr(132) => 'A', chr(196).chr(133) => 'a',
  195. chr(196).chr(134) => 'C', chr(196).chr(135) => 'c',
  196. chr(196).chr(136) => 'C', chr(196).chr(137) => 'c',
  197. chr(196).chr(138) => 'C', chr(196).chr(139) => 'c',
  198. chr(196).chr(140) => 'C', chr(196).chr(141) => 'c',
  199. chr(196).chr(142) => 'D', chr(196).chr(143) => 'd',
  200. chr(196).chr(144) => 'D', chr(196).chr(145) => 'd',
  201. chr(196).chr(146) => 'E', chr(196).chr(147) => 'e',
  202. chr(196).chr(148) => 'E', chr(196).chr(149) => 'e',
  203. chr(196).chr(150) => 'E', chr(196).chr(151) => 'e',
  204. chr(196).chr(152) => 'E', chr(196).chr(153) => 'e',
  205. chr(196).chr(154) => 'E', chr(196).chr(155) => 'e',
  206. chr(196).chr(156) => 'G', chr(196).chr(157) => 'g',
  207. chr(196).chr(158) => 'G', chr(196).chr(159) => 'g',
  208. chr(196).chr(160) => 'G', chr(196).chr(161) => 'g',
  209. chr(196).chr(162) => 'G', chr(196).chr(163) => 'g',
  210. chr(196).chr(164) => 'H', chr(196).chr(165) => 'h',
  211. chr(196).chr(166) => 'H', chr(196).chr(167) => 'h',
  212. chr(196).chr(168) => 'I', chr(196).chr(169) => 'i',
  213. chr(196).chr(170) => 'I', chr(196).chr(171) => 'i',
  214. chr(196).chr(172) => 'I', chr(196).chr(173) => 'i',
  215. chr(196).chr(174) => 'I', chr(196).chr(175) => 'i',
  216. chr(196).chr(176) => 'I', chr(196).chr(177) => 'i',
  217. chr(196).chr(178) => 'IJ', chr(196).chr(179) => 'ij',
  218. chr(196).chr(180) => 'J', chr(196).chr(181) => 'j',
  219. chr(196).chr(182) => 'K', chr(196).chr(183) => 'k',
  220. chr(196).chr(184) => 'k', chr(196).chr(185) => 'L',
  221. chr(196).chr(186) => 'l', chr(196).chr(187) => 'L',
  222. chr(196).chr(188) => 'l', chr(196).chr(189) => 'L',
  223. chr(196).chr(190) => 'l', chr(196).chr(191) => 'L',
  224. chr(197).chr(128) => 'l', chr(197).chr(129) => 'L',
  225. chr(197).chr(130) => 'l', chr(197).chr(131) => 'N',
  226. chr(197).chr(132) => 'n', chr(197).chr(133) => 'N',
  227. chr(197).chr(134) => 'n', chr(197).chr(135) => 'N',
  228. chr(197).chr(136) => 'n', chr(197).chr(137) => 'N',
  229. chr(197).chr(138) => 'n', chr(197).chr(139) => 'N',
  230. chr(197).chr(140) => 'O', chr(197).chr(141) => 'o',
  231. chr(197).chr(142) => 'O', chr(197).chr(143) => 'o',
  232. chr(197).chr(144) => 'O', chr(197).chr(145) => 'o',
  233. chr(197).chr(146) => 'OE', chr(197).chr(147) => 'oe',
  234. chr(197).chr(148) => 'R', chr(197).chr(149) => 'r',
  235. chr(197).chr(150) => 'R', chr(197).chr(151) => 'r',
  236. chr(197).chr(152) => 'R', chr(197).chr(153) => 'r',
  237. chr(197).chr(154) => 'S', chr(197).chr(155) => 's',
  238. chr(197).chr(156) => 'S', chr(197).chr(157) => 's',
  239. chr(197).chr(158) => 'S', chr(197).chr(159) => 's',
  240. chr(197).chr(160) => 'S', chr(197).chr(161) => 's',
  241. chr(197).chr(162) => 'T', chr(197).chr(163) => 't',
  242. chr(197).chr(164) => 'T', chr(197).chr(165) => 't',
  243. chr(197).chr(166) => 'T', chr(197).chr(167) => 't',
  244. chr(197).chr(168) => 'U', chr(197).chr(169) => 'u',
  245. chr(197).chr(170) => 'U', chr(197).chr(171) => 'u',
  246. chr(197).chr(172) => 'U', chr(197).chr(173) => 'u',
  247. chr(197).chr(174) => 'U', chr(197).chr(175) => 'u',
  248. chr(197).chr(176) => 'U', chr(197).chr(177) => 'u',
  249. chr(197).chr(178) => 'U', chr(197).chr(179) => 'u',
  250. chr(197).chr(180) => 'W', chr(197).chr(181) => 'w',
  251. chr(197).chr(182) => 'Y', chr(197).chr(183) => 'y',
  252. chr(197).chr(184) => 'Y', chr(197).chr(185) => 'Z',
  253. chr(197).chr(186) => 'z', chr(197).chr(187) => 'Z',
  254. chr(197).chr(188) => 'z', chr(197).chr(189) => 'Z',
  255. chr(197).chr(190) => 'z', chr(197).chr(191) => 's',
  256. // Decompositions for Latin Extended-B
  257. chr(200).chr(152) => 'S', chr(200).chr(153) => 's',
  258. chr(200).chr(154) => 'T', chr(200).chr(155) => 't',
  259. // Euro Sign
  260. chr(226).chr(130).chr(172) => 'E',
  261. // GBP (Pound) Sign
  262. chr(194).chr(163) => ''
  263. ];
  264. $string = strtr($string, $chars);
  265. }
  266. else
  267. {
  268. // Assume ISO-8859-1 if not UTF-8
  269. $chars['in'] = chr(128).chr(131).chr(138).chr(142).chr(154).chr(158)
  270. . chr(159).chr(162).chr(165).chr(181).chr(192).chr(193).chr(194)
  271. . chr(195).chr(196).chr(197).chr(199).chr(200).chr(201).chr(202)
  272. . chr(203).chr(204).chr(205).chr(206).chr(207).chr(209).chr(210)
  273. . chr(211).chr(212).chr(213).chr(214).chr(216).chr(217).chr(218)
  274. . chr(219).chr(220).chr(221).chr(224).chr(225).chr(226).chr(227)
  275. . chr(228).chr(229).chr(231).chr(232).chr(233).chr(234).chr(235)
  276. . chr(236).chr(237).chr(238).chr(239).chr(241).chr(242).chr(243)
  277. . chr(244).chr(245).chr(246).chr(248).chr(249).chr(250).chr(251)
  278. . chr(252).chr(253).chr(255);
  279. $chars['out'] = "EfSZszYcYuAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy";
  280. $string = strtr($string, $chars['in'], $chars['out']);
  281. $double_chars['in'] = [chr(140), chr(156), chr(198), chr(208), chr(222), chr(223), chr(230), chr(240), chr(254)];
  282. $double_chars['out'] = ['OE', 'oe', 'AE', 'DH', 'TH', 'ss', 'ae', 'dh', 'th'];
  283. $string = str_replace($double_chars['in'], $double_chars['out'], $string);
  284. }
  285. return $string;
  286. }
  287. }