glob.c 27 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127
  1. /*-
  2. * SPDX-License-Identifier: BSD-3-Clause
  3. *
  4. * Copyright (c) 1989, 1993
  5. * The Regents of the University of California. All rights reserved.
  6. *
  7. * This code is derived from software contributed to Berkeley by
  8. * Guido van Rossum.
  9. *
  10. * Copyright (c) 2011 The FreeBSD Foundation
  11. * All rights reserved.
  12. * Portions of this software were developed by David Chisnall
  13. * under sponsorship from the FreeBSD Foundation.
  14. *
  15. * Redistribution and use in source and binary forms, with or without
  16. * modification, are permitted provided that the following conditions
  17. * are met:
  18. * 1. Redistributions of source code must retain the above copyright
  19. * notice, this list of conditions and the following disclaimer.
  20. * 2. Redistributions in binary form must reproduce the above copyright
  21. * notice, this list of conditions and the following disclaimer in the
  22. * documentation and/or other materials provided with the distribution.
  23. * 3. Neither the name of the University nor the names of its contributors
  24. * may be used to endorse or promote products derived from this software
  25. * without specific prior written permission.
  26. *
  27. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  28. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  29. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  30. * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  31. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  32. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  33. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  34. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  35. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  36. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  37. * SUCH DAMAGE.
  38. */
  39. #include <sys/cdefs.h>
  40. __SCCSID("@(#)glob.c 8.3 (Berkeley) 10/13/93");
  41. __FBSDID("$FreeBSD$");
  42. /*
  43. * glob(3) -- a superset of the one defined in POSIX 1003.2.
  44. *
  45. * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
  46. *
  47. * Optional extra services, controlled by flags not defined by POSIX:
  48. *
  49. * GLOB_QUOTE:
  50. * Escaping convention: \ inhibits any special meaning the following
  51. * character might have (except \ at end of string is retained).
  52. * GLOB_MAGCHAR:
  53. * Set in gl_flags if pattern contained a globbing character.
  54. * GLOB_NOMAGIC:
  55. * Same as GLOB_NOCHECK, but it will only append pattern if it did
  56. * not contain any magic characters. [Used in csh style globbing]
  57. * GLOB_ALTDIRFUNC:
  58. * Use alternately specified directory access functions.
  59. * GLOB_TILDE:
  60. * expand ~user/foo to the /home/dir/of/user/foo
  61. * GLOB_BRACE:
  62. * expand {1,2}{a,b} to 1a 1b 2a 2b
  63. * gl_matchc:
  64. * Number of matches in the current invocation of glob.
  65. */
  66. /*
  67. * Some notes on multibyte character support:
  68. * 1. Patterns with illegal byte sequences match nothing - even if
  69. * GLOB_NOCHECK is specified.
  70. * 2. Illegal byte sequences in filenames are handled by treating them as
  71. * single-byte characters with a values of such bytes of the sequence
  72. * cast to wchar_t.
  73. * 3. State-dependent encodings are not currently supported.
  74. */
  75. #include <sys/param.h>
  76. #include <sys/stat.h>
  77. #include <ctype.h>
  78. #include <dirent.h>
  79. #include <errno.h>
  80. #include "glob.h"
  81. #include <limits.h>
  82. #include <pwd.h>
  83. #include <stdint.h>
  84. #include <stdio.h>
  85. #include "stdlib.h"
  86. #include <string.h>
  87. #include "unistd.h"
  88. #include <wchar.h>
  89. #ifdef USE_LOCALE_COLLATE
  90. #include "collate.h"
  91. #endif
  92. /*
  93. * glob(3) expansion limits. Stop the expansion if any of these limits
  94. * is reached. This caps the runtime in the face of DoS attacks. See
  95. * also CVE-2010-2632
  96. */
  97. #define GLOB_LIMIT_BRACE 128 /* number of brace calls */
  98. #define GLOB_LIMIT_PATH 65536 /* number of path elements */
  99. #define GLOB_LIMIT_READDIR 16384 /* number of readdirs */
  100. #define GLOB_LIMIT_STAT 1024 /* number of stat system calls */
  101. #define GLOB_LIMIT_STRING ARG_MAX /* maximum total size for paths */
  102. struct glob_limit {
  103. size_t l_brace_cnt;
  104. size_t l_path_lim;
  105. size_t l_readdir_cnt;
  106. size_t l_stat_cnt;
  107. size_t l_string_cnt;
  108. };
  109. #define DOT L'.'
  110. #define EOS L'\0'
  111. #define LBRACKET L'['
  112. #define NOT L'!'
  113. #define QUESTION L'?'
  114. #define QUOTE L'\\'
  115. #define RANGE L'-'
  116. #define RBRACKET L']'
  117. #define SEP L'/'
  118. #define STAR L'*'
  119. #define TILDE L'~'
  120. #define LBRACE L'{'
  121. #define RBRACE L'}'
  122. #define COMMA L','
  123. #define M_QUOTE 0x8000000000ULL
  124. #define M_PROTECT 0x4000000000ULL
  125. #define M_MASK 0xffffffffffULL
  126. #define M_CHAR 0x00ffffffffULL
  127. typedef uint_fast64_t Char;
  128. #define CHAR(c) ((Char)((c)&M_CHAR))
  129. #define META(c) ((Char)((c)|M_QUOTE))
  130. #define UNPROT(c) ((c) & ~M_PROTECT)
  131. #define M_ALL META(L'*')
  132. #define M_END META(L']')
  133. #define M_NOT META(L'!')
  134. #define M_ONE META(L'?')
  135. #define M_RNG META(L'-')
  136. #define M_SET META(L'[')
  137. #define ismeta(c) (((c)&M_QUOTE) != 0)
  138. #ifdef DEBUG
  139. #define isprot(c) (((c)&M_PROTECT) != 0)
  140. #endif
  141. static int compare(const void *, const void *);
  142. static int g_Ctoc(const Char *, char *, size_t);
  143. static int g_lstat(Char *, struct stat *, glob_t *);
  144. static DIR *g_opendir(Char *, glob_t *);
  145. static const Char *g_strchr(const Char *, wchar_t);
  146. #ifdef notdef
  147. static Char *g_strcat(Char *, const Char *);
  148. #endif
  149. static int g_stat(Char *, struct stat *, glob_t *);
  150. static int glob0(const Char *, glob_t *, struct glob_limit *,
  151. const char *);
  152. static int glob1(Char *, glob_t *, struct glob_limit *);
  153. static int glob2(Char *, Char *, Char *, Char *, glob_t *,
  154. struct glob_limit *);
  155. static int glob3(Char *, Char *, Char *, Char *, Char *, glob_t *,
  156. struct glob_limit *);
  157. static int globextend(const Char *, glob_t *, struct glob_limit *,
  158. const char *);
  159. static const Char *
  160. globtilde(const Char *, Char *, size_t, glob_t *);
  161. static int globexp0(const Char *, glob_t *, struct glob_limit *,
  162. const char *);
  163. static int globexp1(const Char *, glob_t *, struct glob_limit *);
  164. static int globexp2(const Char *, const Char *, glob_t *,
  165. struct glob_limit *);
  166. static int globfinal(glob_t *, struct glob_limit *, size_t,
  167. const char *);
  168. static int match(Char *, Char *, Char *);
  169. static int err_nomatch(glob_t *, struct glob_limit *, const char *);
  170. static int err_aborted(glob_t *, int, char *);
  171. #ifdef DEBUG
  172. static void qprintf(const char *, Char *);
  173. #endif
  174. int
  175. glob(const char * __restrict pattern, int flags,
  176. int (*errfunc)(const char *, int), glob_t * __restrict pglob)
  177. {
  178. struct glob_limit limit = { 0, 0, 0, 0, 0 };
  179. const char *patnext;
  180. Char *bufnext, *bufend, patbuf[MAXPATHLEN], prot;
  181. mbstate_t mbs;
  182. wchar_t wc;
  183. size_t clen;
  184. int too_long;
  185. patnext = pattern;
  186. if (!(flags & GLOB_APPEND)) {
  187. pglob->gl_pathc = 0;
  188. pglob->gl_pathv = NULL;
  189. if (!(flags & GLOB_DOOFFS))
  190. pglob->gl_offs = 0;
  191. }
  192. if (flags & GLOB_LIMIT) {
  193. limit.l_path_lim = pglob->gl_matchc;
  194. if (limit.l_path_lim == 0)
  195. limit.l_path_lim = GLOB_LIMIT_PATH;
  196. }
  197. pglob->gl_flags = flags & ~GLOB_MAGCHAR;
  198. pglob->gl_errfunc = errfunc;
  199. pglob->gl_matchc = 0;
  200. bufnext = patbuf;
  201. bufend = bufnext + MAXPATHLEN - 1;
  202. too_long = 1;
  203. if (flags & GLOB_NOESCAPE) {
  204. memset(&mbs, 0, sizeof(mbs));
  205. while (bufnext <= bufend) {
  206. clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
  207. if (clen == (size_t)-1 || clen == (size_t)-2)
  208. return (err_nomatch(pglob, &limit, pattern));
  209. else if (clen == 0) {
  210. too_long = 0;
  211. break;
  212. }
  213. *bufnext++ = wc;
  214. patnext += clen;
  215. }
  216. } else {
  217. /* Protect the quoted characters. */
  218. memset(&mbs, 0, sizeof(mbs));
  219. while (bufnext <= bufend) {
  220. if (*patnext == '\\') {
  221. if (*++patnext == '\0') {
  222. *bufnext++ = QUOTE;
  223. continue;
  224. }
  225. prot = M_PROTECT;
  226. } else
  227. prot = 0;
  228. clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
  229. if (clen == (size_t)-1 || clen == (size_t)-2)
  230. return (err_nomatch(pglob, &limit, pattern));
  231. else if (clen == 0) {
  232. too_long = 0;
  233. break;
  234. }
  235. *bufnext++ = wc | prot;
  236. patnext += clen;
  237. }
  238. }
  239. if (too_long)
  240. return (err_nomatch(pglob, &limit, pattern));
  241. *bufnext = EOS;
  242. if (flags & GLOB_BRACE)
  243. return (globexp0(patbuf, pglob, &limit, pattern));
  244. else
  245. return (glob0(patbuf, pglob, &limit, pattern));
  246. }
  247. static int
  248. globexp0(const Char *pattern, glob_t *pglob, struct glob_limit *limit,
  249. const char *origpat) {
  250. int rv;
  251. size_t oldpathc;
  252. /* Protect a single {}, for find(1), like csh */
  253. if (pattern[0] == LBRACE && pattern[1] == RBRACE && pattern[2] == EOS) {
  254. if ((pglob->gl_flags & GLOB_LIMIT) &&
  255. limit->l_brace_cnt++ >= GLOB_LIMIT_BRACE) {
  256. errno = E2BIG;
  257. return (GLOB_NOSPACE);
  258. }
  259. return (glob0(pattern, pglob, limit, origpat));
  260. }
  261. oldpathc = pglob->gl_pathc;
  262. if ((rv = globexp1(pattern, pglob, limit)) != 0)
  263. return rv;
  264. return (globfinal(pglob, limit, oldpathc, origpat));
  265. }
  266. /*
  267. * Expand recursively a glob {} pattern. When there is no more expansion
  268. * invoke the standard globbing routine to glob the rest of the magic
  269. * characters
  270. */
  271. static int
  272. globexp1(const Char *pattern, glob_t *pglob, struct glob_limit *limit)
  273. {
  274. const Char* ptr;
  275. if ((ptr = g_strchr(pattern, LBRACE)) != NULL) {
  276. if ((pglob->gl_flags & GLOB_LIMIT) &&
  277. limit->l_brace_cnt++ >= GLOB_LIMIT_BRACE) {
  278. errno = E2BIG;
  279. return (GLOB_NOSPACE);
  280. }
  281. return (globexp2(ptr, pattern, pglob, limit));
  282. }
  283. return (glob0(pattern, pglob, limit, NULL));
  284. }
  285. /*
  286. * Recursive brace globbing helper. Tries to expand a single brace.
  287. * If it succeeds then it invokes globexp1 with the new pattern.
  288. * If it fails then it tries to glob the rest of the pattern and returns.
  289. */
  290. static int
  291. globexp2(const Char *ptr, const Char *pattern, glob_t *pglob,
  292. struct glob_limit *limit)
  293. {
  294. int i, rv;
  295. Char *lm, *ls;
  296. const Char *pe, *pm, *pm1, *pl;
  297. Char patbuf[MAXPATHLEN];
  298. /* copy part up to the brace */
  299. for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++)
  300. continue;
  301. *lm = EOS;
  302. ls = lm;
  303. /* Find the balanced brace */
  304. for (i = 0, pe = ++ptr; *pe != EOS; pe++)
  305. if (*pe == LBRACKET) {
  306. /* Ignore everything between [] */
  307. for (pm = pe++; *pe != RBRACKET && *pe != EOS; pe++)
  308. continue;
  309. if (*pe == EOS) {
  310. /*
  311. * We could not find a matching RBRACKET.
  312. * Ignore and just look for RBRACE
  313. */
  314. pe = pm;
  315. }
  316. }
  317. else if (*pe == LBRACE)
  318. i++;
  319. else if (*pe == RBRACE) {
  320. if (i == 0)
  321. break;
  322. i--;
  323. }
  324. /* Non matching braces; just glob the pattern */
  325. if (i != 0 || *pe == EOS)
  326. return (glob0(pattern, pglob, limit, NULL));
  327. for (i = 0, pl = pm = ptr; pm <= pe; pm++)
  328. switch (*pm) {
  329. case LBRACKET:
  330. /* Ignore everything between [] */
  331. for (pm1 = pm++; *pm != RBRACKET && *pm != EOS; pm++)
  332. continue;
  333. if (*pm == EOS) {
  334. /*
  335. * We could not find a matching RBRACKET.
  336. * Ignore and just look for RBRACE
  337. */
  338. pm = pm1;
  339. }
  340. break;
  341. case LBRACE:
  342. i++;
  343. break;
  344. case RBRACE:
  345. if (i) {
  346. i--;
  347. break;
  348. }
  349. /* FALLTHROUGH */
  350. case COMMA:
  351. if (i && *pm == COMMA)
  352. break;
  353. else {
  354. /* Append the current string */
  355. for (lm = ls; (pl < pm); *lm++ = *pl++)
  356. continue;
  357. /*
  358. * Append the rest of the pattern after the
  359. * closing brace
  360. */
  361. for (pl = pe + 1; (*lm++ = *pl++) != EOS;)
  362. continue;
  363. /* Expand the current pattern */
  364. #ifdef DEBUG
  365. qprintf("globexp2:", patbuf);
  366. #endif
  367. rv = globexp1(patbuf, pglob, limit);
  368. if (rv)
  369. return (rv);
  370. /* move after the comma, to the next string */
  371. pl = pm + 1;
  372. }
  373. break;
  374. default:
  375. break;
  376. }
  377. return (0);
  378. }
  379. /*
  380. * expand tilde from the passwd file.
  381. */
  382. static const Char *
  383. globtilde(const Char *pattern, Char *patbuf, size_t patbuf_len, glob_t *pglob)
  384. {
  385. struct passwd *pwd;
  386. char *h, *sc;
  387. const Char *p;
  388. Char *b, *eb;
  389. wchar_t wc;
  390. wchar_t wbuf[MAXPATHLEN];
  391. wchar_t *wbufend, *dc;
  392. size_t clen;
  393. mbstate_t mbs;
  394. int too_long;
  395. if (*pattern != TILDE || !(pglob->gl_flags & GLOB_TILDE))
  396. return (pattern);
  397. /*
  398. * Copy up to the end of the string or /
  399. */
  400. eb = &patbuf[patbuf_len - 1];
  401. for (p = pattern + 1, b = patbuf;
  402. b < eb && *p != EOS && UNPROT(*p) != SEP; *b++ = *p++)
  403. continue;
  404. if (*p != EOS && UNPROT(*p) != SEP)
  405. return (NULL);
  406. *b = EOS;
  407. h = NULL;
  408. if (patbuf[0] == EOS) {
  409. /*
  410. * handle a plain ~ or ~/ by expanding $HOME first (iff
  411. * we're not running setuid or setgid) and then trying
  412. * the password file
  413. */
  414. if (issetugid() != 0 ||
  415. (h = getenv("HOME")) == NULL) {
  416. if (((h = getlogin()) != NULL &&
  417. (pwd = getpwnam(h)) != NULL) ||
  418. (pwd = getpwuid(getuid())) != NULL)
  419. h = pwd->pw_dir;
  420. else
  421. return (pattern);
  422. }
  423. }
  424. else {
  425. /*
  426. * Expand a ~user
  427. */
  428. if (g_Ctoc(patbuf, (char *)wbuf, sizeof(wbuf)))
  429. return (NULL);
  430. if ((pwd = getpwnam((char *)wbuf)) == NULL)
  431. return (pattern);
  432. else
  433. h = pwd->pw_dir;
  434. }
  435. /* Copy the home directory */
  436. dc = wbuf;
  437. sc = h;
  438. wbufend = wbuf + MAXPATHLEN - 1;
  439. too_long = 1;
  440. memset(&mbs, 0, sizeof(mbs));
  441. while (dc <= wbufend) {
  442. clen = mbrtowc(&wc, sc, MB_LEN_MAX, &mbs);
  443. if (clen == (size_t)-1 || clen == (size_t)-2) {
  444. /* XXX See initial comment #2. */
  445. wc = (unsigned char)*sc;
  446. clen = 1;
  447. memset(&mbs, 0, sizeof(mbs));
  448. }
  449. if ((*dc++ = wc) == EOS) {
  450. too_long = 0;
  451. break;
  452. }
  453. sc += clen;
  454. }
  455. if (too_long)
  456. return (NULL);
  457. dc = wbuf;
  458. for (b = patbuf; b < eb && *dc != EOS; *b++ = *dc++ | M_PROTECT)
  459. continue;
  460. if (*dc != EOS)
  461. return (NULL);
  462. /* Append the rest of the pattern */
  463. if (*p != EOS) {
  464. too_long = 1;
  465. while (b <= eb) {
  466. if ((*b++ = *p++) == EOS) {
  467. too_long = 0;
  468. break;
  469. }
  470. }
  471. if (too_long)
  472. return (NULL);
  473. } else
  474. *b = EOS;
  475. return (patbuf);
  476. }
  477. /*
  478. * The main glob() routine: compiles the pattern (optionally processing
  479. * quotes), calls glob1() to do the real pattern matching, and finally
  480. * sorts the list (unless unsorted operation is requested). Returns 0
  481. * if things went well, nonzero if errors occurred.
  482. */
  483. static int
  484. glob0(const Char *pattern, glob_t *pglob, struct glob_limit *limit,
  485. const char *origpat) {
  486. const Char *qpatnext;
  487. int err;
  488. size_t oldpathc;
  489. Char *bufnext, c, patbuf[MAXPATHLEN];
  490. qpatnext = globtilde(pattern, patbuf, MAXPATHLEN, pglob);
  491. if (qpatnext == NULL) {
  492. errno = E2BIG;
  493. return (GLOB_NOSPACE);
  494. }
  495. oldpathc = pglob->gl_pathc;
  496. bufnext = patbuf;
  497. /* We don't need to check for buffer overflow any more. */
  498. while ((c = *qpatnext++) != EOS) {
  499. switch (c) {
  500. case LBRACKET:
  501. c = *qpatnext;
  502. if (c == NOT)
  503. ++qpatnext;
  504. if (*qpatnext == EOS ||
  505. g_strchr(qpatnext+1, RBRACKET) == NULL) {
  506. *bufnext++ = LBRACKET;
  507. if (c == NOT)
  508. --qpatnext;
  509. break;
  510. }
  511. *bufnext++ = M_SET;
  512. if (c == NOT)
  513. *bufnext++ = M_NOT;
  514. c = *qpatnext++;
  515. do {
  516. *bufnext++ = CHAR(c);
  517. if (*qpatnext == RANGE &&
  518. (c = qpatnext[1]) != RBRACKET) {
  519. *bufnext++ = M_RNG;
  520. *bufnext++ = CHAR(c);
  521. qpatnext += 2;
  522. }
  523. } while ((c = *qpatnext++) != RBRACKET);
  524. pglob->gl_flags |= GLOB_MAGCHAR;
  525. *bufnext++ = M_END;
  526. break;
  527. case QUESTION:
  528. pglob->gl_flags |= GLOB_MAGCHAR;
  529. *bufnext++ = M_ONE;
  530. break;
  531. case STAR:
  532. pglob->gl_flags |= GLOB_MAGCHAR;
  533. /* collapse adjacent stars to one,
  534. * to ensure "**" at the end continues to match the
  535. * empty string
  536. */
  537. if (bufnext == patbuf || bufnext[-1] != M_ALL)
  538. *bufnext++ = M_ALL;
  539. break;
  540. default:
  541. *bufnext++ = CHAR(c);
  542. break;
  543. }
  544. }
  545. *bufnext = EOS;
  546. #ifdef DEBUG
  547. qprintf("glob0:", patbuf);
  548. #endif
  549. if ((err = glob1(patbuf, pglob, limit)) != 0)
  550. return(err);
  551. if (origpat != NULL)
  552. return (globfinal(pglob, limit, oldpathc, origpat));
  553. return (0);
  554. }
  555. static int
  556. globfinal(glob_t *pglob, struct glob_limit *limit, size_t oldpathc,
  557. const char *origpat) {
  558. if (pglob->gl_pathc == oldpathc)
  559. return (err_nomatch(pglob, limit, origpat));
  560. if (!(pglob->gl_flags & GLOB_NOSORT))
  561. qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
  562. pglob->gl_pathc - oldpathc, sizeof(char *), compare);
  563. return (0);
  564. }
  565. static int
  566. compare(const void *p, const void *q)
  567. {
  568. return (strcoll(*(char **)p, *(char **)q));
  569. }
  570. static int
  571. glob1(Char *pattern, glob_t *pglob, struct glob_limit *limit)
  572. {
  573. Char pathbuf[MAXPATHLEN];
  574. /* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
  575. if (*pattern == EOS)
  576. return (0);
  577. return (glob2(pathbuf, pathbuf, pathbuf + MAXPATHLEN - 1,
  578. pattern, pglob, limit));
  579. }
  580. /*
  581. * The functions glob2 and glob3 are mutually recursive; there is one level
  582. * of recursion for each segment in the pattern that contains one or more
  583. * meta characters.
  584. */
  585. static int
  586. glob2(Char *pathbuf, Char *pathend, Char *pathend_last, Char *pattern,
  587. glob_t *pglob, struct glob_limit *limit)
  588. {
  589. struct stat sb;
  590. Char *p, *q;
  591. int anymeta;
  592. /*
  593. * Loop over pattern segments until end of pattern or until
  594. * segment with meta character found.
  595. */
  596. for (anymeta = 0;;) {
  597. if (*pattern == EOS) { /* End of pattern? */
  598. *pathend = EOS;
  599. if (g_lstat(pathbuf, &sb, pglob))
  600. return (0);
  601. if ((pglob->gl_flags & GLOB_LIMIT) &&
  602. limit->l_stat_cnt++ >= GLOB_LIMIT_STAT) {
  603. errno = E2BIG;
  604. return (GLOB_NOSPACE);
  605. }
  606. if ((pglob->gl_flags & GLOB_MARK) &&
  607. UNPROT(pathend[-1]) != SEP &&
  608. (S_ISDIR(sb.st_mode) ||
  609. (S_ISLNK(sb.st_mode) &&
  610. g_stat(pathbuf, &sb, pglob) == 0 &&
  611. S_ISDIR(sb.st_mode)))) {
  612. if (pathend + 1 > pathend_last) {
  613. errno = E2BIG;
  614. return (GLOB_NOSPACE);
  615. }
  616. *pathend++ = SEP;
  617. *pathend = EOS;
  618. }
  619. ++pglob->gl_matchc;
  620. return (globextend(pathbuf, pglob, limit, NULL));
  621. }
  622. /* Find end of next segment, copy tentatively to pathend. */
  623. q = pathend;
  624. p = pattern;
  625. while (*p != EOS && UNPROT(*p) != SEP) {
  626. if (ismeta(*p))
  627. anymeta = 1;
  628. if (q + 1 > pathend_last) {
  629. errno = E2BIG;
  630. return (GLOB_NOSPACE);
  631. }
  632. *q++ = *p++;
  633. }
  634. if (!anymeta) { /* No expansion, do next segment. */
  635. pathend = q;
  636. pattern = p;
  637. while (UNPROT(*pattern) == SEP) {
  638. if (pathend + 1 > pathend_last) {
  639. errno = E2BIG;
  640. return (GLOB_NOSPACE);
  641. }
  642. *pathend++ = *pattern++;
  643. }
  644. } else /* Need expansion, recurse. */
  645. return (glob3(pathbuf, pathend, pathend_last, pattern,
  646. p, pglob, limit));
  647. }
  648. /* NOTREACHED */
  649. }
  650. static int
  651. glob3(Char *pathbuf, Char *pathend, Char *pathend_last,
  652. Char *pattern, Char *restpattern,
  653. glob_t *pglob, struct glob_limit *limit)
  654. {
  655. struct dirent *dp;
  656. DIR *dirp;
  657. int err, too_long, saverrno, saverrno2;
  658. char buf[MAXPATHLEN + MB_LEN_MAX - 1];
  659. struct dirent *(*readdirfunc)(DIR *);
  660. if (pathend > pathend_last) {
  661. errno = E2BIG;
  662. return (GLOB_NOSPACE);
  663. }
  664. *pathend = EOS;
  665. if (pglob->gl_errfunc != NULL &&
  666. g_Ctoc(pathbuf, buf, sizeof(buf))) {
  667. errno = E2BIG;
  668. return (GLOB_NOSPACE);
  669. }
  670. saverrno = errno;
  671. errno = 0;
  672. if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
  673. if (errno == ENOENT || errno == ENOTDIR)
  674. return (0);
  675. err = err_aborted(pglob, errno, buf);
  676. if (errno == 0)
  677. errno = saverrno;
  678. return (err);
  679. }
  680. err = 0;
  681. /* pglob->gl_readdir takes a void *, fix this manually */
  682. if (pglob->gl_flags & GLOB_ALTDIRFUNC)
  683. readdirfunc = (struct dirent *(*)(DIR *))pglob->gl_readdir;
  684. else
  685. readdirfunc = readdir;
  686. errno = 0;
  687. /* Search directory for matching names. */
  688. while ((dp = (*readdirfunc)(dirp)) != NULL) {
  689. char *sc;
  690. Char *dc;
  691. wchar_t wc;
  692. size_t clen;
  693. mbstate_t mbs;
  694. if ((pglob->gl_flags & GLOB_LIMIT) &&
  695. limit->l_readdir_cnt++ >= GLOB_LIMIT_READDIR) {
  696. errno = E2BIG;
  697. err = GLOB_NOSPACE;
  698. break;
  699. }
  700. /* Initial DOT must be matched literally. */
  701. if (dp->d_name[0] == '.' && UNPROT(*pattern) != DOT) {
  702. errno = 0;
  703. continue;
  704. }
  705. memset(&mbs, 0, sizeof(mbs));
  706. dc = pathend;
  707. sc = dp->d_name;
  708. too_long = 1;
  709. while (dc <= pathend_last) {
  710. clen = mbrtowc(&wc, sc, MB_LEN_MAX, &mbs);
  711. if (clen == (size_t)-1 || clen == (size_t)-2) {
  712. /* XXX See initial comment #2. */
  713. wc = (unsigned char)*sc;
  714. clen = 1;
  715. memset(&mbs, 0, sizeof(mbs));
  716. }
  717. if ((*dc++ = wc) == EOS) {
  718. too_long = 0;
  719. break;
  720. }
  721. sc += clen;
  722. }
  723. if (too_long && (err = err_aborted(pglob, ENAMETOOLONG,
  724. buf))) {
  725. errno = ENAMETOOLONG;
  726. break;
  727. }
  728. if (too_long || !match(pathend, pattern, restpattern)) {
  729. *pathend = EOS;
  730. errno = 0;
  731. continue;
  732. }
  733. if (errno == 0)
  734. errno = saverrno;
  735. err = glob2(pathbuf, --dc, pathend_last, restpattern,
  736. pglob, limit);
  737. if (err)
  738. break;
  739. errno = 0;
  740. }
  741. saverrno2 = errno;
  742. if (pglob->gl_flags & GLOB_ALTDIRFUNC)
  743. (*pglob->gl_closedir)(dirp);
  744. else
  745. closedir(dirp);
  746. errno = saverrno2;
  747. if (err)
  748. return (err);
  749. if (dp == NULL && errno != 0 &&
  750. (err = err_aborted(pglob, errno, buf)))
  751. return (err);
  752. if (errno == 0)
  753. errno = saverrno;
  754. return (0);
  755. }
  756. /*
  757. * Extend the gl_pathv member of a glob_t structure to accommodate a new item,
  758. * add the new item, and update gl_pathc.
  759. *
  760. * This assumes the BSD realloc, which only copies the block when its size
  761. * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
  762. * behavior.
  763. *
  764. * Return 0 if new item added, error code if memory couldn't be allocated.
  765. *
  766. * Invariant of the glob_t structure:
  767. * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
  768. * gl_pathv points to (gl_offs + gl_pathc + 1) items.
  769. */
  770. static int
  771. globextend(const Char *path, glob_t *pglob, struct glob_limit *limit,
  772. const char *origpat)
  773. {
  774. char **pathv;
  775. size_t i, newn, len;
  776. char *copy;
  777. const Char *p;
  778. if ((pglob->gl_flags & GLOB_LIMIT) &&
  779. pglob->gl_matchc > limit->l_path_lim) {
  780. errno = E2BIG;
  781. return (GLOB_NOSPACE);
  782. }
  783. newn = 2 + pglob->gl_pathc + pglob->gl_offs;
  784. /* reallocarray(NULL, newn, size) is equivalent to malloc(newn*size). */
  785. pathv = reallocarray(pglob->gl_pathv, newn, sizeof(*pathv));
  786. if (pathv == NULL)
  787. return (GLOB_NOSPACE);
  788. if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
  789. /* first time around -- clear initial gl_offs items */
  790. pathv += pglob->gl_offs;
  791. for (i = pglob->gl_offs + 1; --i > 0; )
  792. *--pathv = NULL;
  793. }
  794. pglob->gl_pathv = pathv;
  795. if (origpat != NULL)
  796. copy = strdup(origpat);
  797. else {
  798. for (p = path; *p++ != EOS;)
  799. continue;
  800. len = MB_CUR_MAX * (size_t)(p - path); /* XXX overallocation */
  801. if ((copy = malloc(len)) != NULL) {
  802. if (g_Ctoc(path, copy, len)) {
  803. free(copy);
  804. errno = E2BIG;
  805. return (GLOB_NOSPACE);
  806. }
  807. }
  808. }
  809. if (copy != NULL) {
  810. limit->l_string_cnt += strlen(copy) + 1;
  811. if ((pglob->gl_flags & GLOB_LIMIT) &&
  812. limit->l_string_cnt >= GLOB_LIMIT_STRING) {
  813. free(copy);
  814. errno = E2BIG;
  815. return (GLOB_NOSPACE);
  816. }
  817. pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
  818. }
  819. pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
  820. return (copy == NULL ? GLOB_NOSPACE : 0);
  821. }
  822. /*
  823. * pattern matching function for filenames.
  824. */
  825. static int
  826. match(Char *name, Char *pat, Char *patend)
  827. {
  828. int ok, negate_range;
  829. Char c, k, *nextp, *nextn;
  830. #ifdef USE_LOCALE_COLLATE
  831. struct xlocale_collate *table =
  832. (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
  833. #endif
  834. nextn = NULL;
  835. nextp = NULL;
  836. while (1) {
  837. while (pat < patend) {
  838. c = *pat++;
  839. switch (c & M_MASK) {
  840. case M_ALL:
  841. if (pat == patend)
  842. return (1);
  843. if (*name == EOS)
  844. return (0);
  845. nextn = name + 1;
  846. nextp = pat - 1;
  847. break;
  848. case M_ONE:
  849. if (*name++ == EOS)
  850. goto fail;
  851. break;
  852. case M_SET:
  853. ok = 0;
  854. if ((k = *name++) == EOS)
  855. goto fail;
  856. negate_range = ((*pat & M_MASK) == M_NOT);
  857. if (negate_range != 0)
  858. ++pat;
  859. while (((c = *pat++) & M_MASK) != M_END)
  860. if ((*pat & M_MASK) == M_RNG) {
  861. #ifdef USE_LOCALE_COLLATE
  862. if (table->__collate_load_error ?
  863. CHAR(c) <= CHAR(k) &&
  864. CHAR(k) <= CHAR(pat[1]) :
  865. __wcollate_range_cmp(CHAR(c),
  866. CHAR(k)) <= 0 &&
  867. __wcollate_range_cmp(CHAR(k),
  868. CHAR(pat[1])) <= 0)
  869. #else
  870. if (c <= k && k <= pat[1])
  871. #endif
  872. ok = 1;
  873. pat += 2;
  874. } else if (c == k)
  875. ok = 1;
  876. if (ok == negate_range)
  877. goto fail;
  878. break;
  879. default:
  880. if (*name++ != c)
  881. goto fail;
  882. break;
  883. }
  884. }
  885. if (*name == EOS)
  886. return (1);
  887. fail:
  888. if (nextn == NULL)
  889. break;
  890. pat = nextp;
  891. name = nextn;
  892. }
  893. return (0);
  894. }
  895. /* Free allocated data belonging to a glob_t structure. */
  896. void
  897. globfree(glob_t *pglob)
  898. {
  899. size_t i;
  900. char **pp;
  901. if (pglob->gl_pathv != NULL) {
  902. pp = pglob->gl_pathv + pglob->gl_offs;
  903. for (i = pglob->gl_pathc; i--; ++pp)
  904. if (*pp)
  905. free(*pp);
  906. free(pglob->gl_pathv);
  907. pglob->gl_pathv = NULL;
  908. }
  909. }
  910. static DIR *
  911. g_opendir(Char *str, glob_t *pglob)
  912. {
  913. char buf[MAXPATHLEN + MB_LEN_MAX - 1];
  914. if (*str == EOS)
  915. strcpy(buf, ".");
  916. else {
  917. if (g_Ctoc(str, buf, sizeof(buf))) {
  918. errno = ENAMETOOLONG;
  919. return (NULL);
  920. }
  921. }
  922. if (pglob->gl_flags & GLOB_ALTDIRFUNC)
  923. return ((*pglob->gl_opendir)(buf));
  924. return (opendir(buf));
  925. }
  926. static int
  927. g_lstat(Char *fn, struct stat *sb, glob_t *pglob)
  928. {
  929. char buf[MAXPATHLEN + MB_LEN_MAX - 1];
  930. if (g_Ctoc(fn, buf, sizeof(buf))) {
  931. errno = ENAMETOOLONG;
  932. return (-1);
  933. }
  934. if (pglob->gl_flags & GLOB_ALTDIRFUNC)
  935. return((*pglob->gl_lstat)(buf, sb));
  936. return (lstat(buf, sb));
  937. }
  938. static int
  939. g_stat(Char *fn, struct stat *sb, glob_t *pglob)
  940. {
  941. char buf[MAXPATHLEN + MB_LEN_MAX - 1];
  942. if (g_Ctoc(fn, buf, sizeof(buf))) {
  943. errno = ENAMETOOLONG;
  944. return (-1);
  945. }
  946. if (pglob->gl_flags & GLOB_ALTDIRFUNC)
  947. return ((*pglob->gl_stat)(buf, sb));
  948. return (stat(buf, sb));
  949. }
  950. static const Char *
  951. g_strchr(const Char *str, wchar_t ch)
  952. {
  953. do {
  954. if (*str == ch)
  955. return (str);
  956. } while (*str++);
  957. return (NULL);
  958. }
  959. static int
  960. g_Ctoc(const Char *str, char *buf, size_t len)
  961. {
  962. mbstate_t mbs;
  963. size_t clen;
  964. memset(&mbs, 0, sizeof(mbs));
  965. while (len >= MB_CUR_MAX) {
  966. clen = wcrtomb(buf, CHAR(*str), &mbs);
  967. if (clen == (size_t)-1) {
  968. /* XXX See initial comment #2. */
  969. *buf = (char)CHAR(*str);
  970. clen = 1;
  971. memset(&mbs, 0, sizeof(mbs));
  972. }
  973. if (CHAR(*str) == EOS)
  974. return (0);
  975. str++;
  976. buf += clen;
  977. len -= clen;
  978. }
  979. return (1);
  980. }
  981. static int
  982. err_nomatch(glob_t *pglob, struct glob_limit *limit, const char *origpat) {
  983. /*
  984. * If there was no match we are going to append the origpat
  985. * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
  986. * and the origpat did not contain any magic characters
  987. * GLOB_NOMAGIC is there just for compatibility with csh.
  988. */
  989. if ((pglob->gl_flags & GLOB_NOCHECK) ||
  990. ((pglob->gl_flags & GLOB_NOMAGIC) &&
  991. !(pglob->gl_flags & GLOB_MAGCHAR)))
  992. return (globextend(NULL, pglob, limit, origpat));
  993. return (GLOB_NOMATCH);
  994. }
  995. static int
  996. err_aborted(glob_t *pglob, int err, char *buf) {
  997. if ((pglob->gl_errfunc != NULL && pglob->gl_errfunc(buf, err)) ||
  998. (pglob->gl_flags & GLOB_ERR))
  999. return (GLOB_ABORTED);
  1000. return (0);
  1001. }
  1002. #ifdef DEBUG
  1003. static void
  1004. qprintf(const char *str, Char *s)
  1005. {
  1006. Char *p;
  1007. (void)printf("%s\n", str);
  1008. if (s != NULL) {
  1009. for (p = s; *p != EOS; p++)
  1010. (void)printf("%c", (char)CHAR(*p));
  1011. (void)printf("\n");
  1012. for (p = s; *p != EOS; p++)
  1013. (void)printf("%c", (isprot(*p) ? '\\' : ' '));
  1014. (void)printf("\n");
  1015. for (p = s; *p != EOS; p++)
  1016. (void)printf("%c", (ismeta(*p) ? '_' : ' '));
  1017. (void)printf("\n");
  1018. }
  1019. }
  1020. #endif