git
/
wildmatch.c
287 строк · 8.0 Кб
1/*
2** Do shell-style pattern matching for ?, \, [], and * characters.
3** It is 8bit clean.
4**
5** Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
6** Rich $alz is now <rsalz@bbn.com>.
7**
8** Modified by Wayne Davison to special-case '/' matching, to make '**'
9** work differently than '*', and to fix the character-class code.
10*/
11
12#include "git-compat-util.h"13#include "wildmatch.h"14
15typedef unsigned char uchar;16
17/* Internal return values */
18#define WM_ABORT_ALL -119#define WM_ABORT_TO_STARSTAR -220
21/* What character marks an inverted character class? */
22#define NEGATE_CLASS '!'23#define NEGATE_CLASS2 '^'24
25#define CC_EQ(class, len, litmatch) ((len) == sizeof (litmatch)-1 \26&& *(class) == *(litmatch) \27&& strncmp((char*)class, litmatch, len) == 0)28
29#if defined STDC_HEADERS || !defined isascii30# define ISASCII(c) 131#else32# define ISASCII(c) isascii(c)33#endif34
35#ifdef isblank36# define ISBLANK(c) (ISASCII(c) && isblank(c))37#else38# define ISBLANK(c) ((c) == ' ' || (c) == '\t')39#endif40
41#ifdef isgraph42# define ISGRAPH(c) (ISASCII(c) && isgraph(c))43#else44# define ISGRAPH(c) (ISASCII(c) && isprint(c) && !isspace(c))45#endif46
47#define ISPRINT(c) (ISASCII(c) && isprint(c))48#define ISDIGIT(c) (ISASCII(c) && isdigit(c))49#define ISALNUM(c) (ISASCII(c) && isalnum(c))50#define ISALPHA(c) (ISASCII(c) && isalpha(c))51#define ISCNTRL(c) (ISASCII(c) && iscntrl(c))52#define ISLOWER(c) (ISASCII(c) && islower(c))53#define ISPUNCT(c) (ISASCII(c) && ispunct(c))54#define ISSPACE(c) (ISASCII(c) && isspace(c))55#define ISUPPER(c) (ISASCII(c) && isupper(c))56#define ISXDIGIT(c) (ISASCII(c) && isxdigit(c))57
58/* Match pattern "p" against "text" */
59static int dowild(const uchar *p, const uchar *text, unsigned int flags)60{
61uchar p_ch;62const uchar *pattern = p;63
64for ( ; (p_ch = *p) != '\0'; text++, p++) {65int matched, match_slash, negated;66uchar t_ch, prev_ch;67if ((t_ch = *text) == '\0' && p_ch != '*')68return WM_ABORT_ALL;69if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))70t_ch = tolower(t_ch);71if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))72p_ch = tolower(p_ch);73switch (p_ch) {74case '\\':75/* Literal match with following character. Note that the test76* in "default" handles the p[1] == '\0' failure case. */
77p_ch = *++p;78/* FALLTHROUGH */79default:80if (t_ch != p_ch)81return WM_NOMATCH;82continue;83case '?':84/* Match anything but '/'. */85if ((flags & WM_PATHNAME) && t_ch == '/')86return WM_NOMATCH;87continue;88case '*':89if (*++p == '*') {90const uchar *prev_p = p;91while (*++p == '*') {}92if (!(flags & WM_PATHNAME))93/* without WM_PATHNAME, '*' == '**' */94match_slash = 1;95else if ((prev_p - pattern < 2 || *(prev_p - 2) == '/') &&96(*p == '\0' || *p == '/' ||97(p[0] == '\\' && p[1] == '/'))) {98/*99* Assuming we already match 'foo/' and are at
100* <star star slash>, just assume it matches
101* nothing and go ahead match the rest of the
102* pattern with the remaining string. This
103* helps make foo/<*><*>/bar (<> because
104* otherwise it breaks C comment syntax) match
105* both foo/bar and foo/a/bar.
106*/
107if (p[0] == '/' &&108dowild(p + 1, text, flags) == WM_MATCH)109return WM_MATCH;110match_slash = 1;111} else /* WM_PATHNAME is set */112match_slash = 0;113} else114/* without WM_PATHNAME, '*' == '**' */115match_slash = flags & WM_PATHNAME ? 0 : 1;116if (*p == '\0') {117/* Trailing "**" matches everything. Trailing "*" matches118* only if there are no more slash characters. */
119if (!match_slash) {120if (strchr((char *)text, '/'))121return WM_ABORT_TO_STARSTAR;122}123return WM_MATCH;124} else if (!match_slash && *p == '/') {125/*126* _one_ asterisk followed by a slash
127* with WM_PATHNAME matches the next
128* directory
129*/
130const char *slash = strchr((char*)text, '/');131if (!slash)132return WM_ABORT_ALL;133text = (const uchar*)slash;134/* the slash is consumed by the top-level for loop */135break;136}137while (1) {138if (t_ch == '\0')139break;140/*141* Try to advance faster when an asterisk is
142* followed by a literal. We know in this case
143* that the string before the literal
144* must belong to "*".
145* If match_slash is false, do not look past
146* the first slash as it cannot belong to '*'.
147*/
148if (!is_glob_special(*p)) {149p_ch = *p;150if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))151p_ch = tolower(p_ch);152while ((t_ch = *text) != '\0' &&153(match_slash || t_ch != '/')) {154if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))155t_ch = tolower(t_ch);156if (t_ch == p_ch)157break;158text++;159}160if (t_ch != p_ch) {161if (match_slash)162return WM_ABORT_ALL;163else164return WM_ABORT_TO_STARSTAR;165}166}167if ((matched = dowild(p, text, flags)) != WM_NOMATCH) {168if (!match_slash || matched != WM_ABORT_TO_STARSTAR)169return matched;170} else if (!match_slash && t_ch == '/')171return WM_ABORT_TO_STARSTAR;172t_ch = *++text;173}174return WM_ABORT_ALL;175case '[':176p_ch = *++p;177#ifdef NEGATE_CLASS2178if (p_ch == NEGATE_CLASS2)179p_ch = NEGATE_CLASS;180#endif181/* Assign literal 1/0 because of "matched" comparison. */182negated = p_ch == NEGATE_CLASS ? 1 : 0;183if (negated) {184/* Inverted character class. */185p_ch = *++p;186}187prev_ch = 0;188matched = 0;189do {190if (!p_ch)191return WM_ABORT_ALL;192if (p_ch == '\\') {193p_ch = *++p;194if (!p_ch)195return WM_ABORT_ALL;196if (t_ch == p_ch)197matched = 1;198} else if (p_ch == '-' && prev_ch && p[1] && p[1] != ']') {199p_ch = *++p;200if (p_ch == '\\') {201p_ch = *++p;202if (!p_ch)203return WM_ABORT_ALL;204}205if (t_ch <= p_ch && t_ch >= prev_ch)206matched = 1;207else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch)) {208uchar t_ch_upper = toupper(t_ch);209if (t_ch_upper <= p_ch && t_ch_upper >= prev_ch)210matched = 1;211}212p_ch = 0; /* This makes "prev_ch" get set to 0. */213} else if (p_ch == '[' && p[1] == ':') {214const uchar *s;215int i;216for (s = p += 2; (p_ch = *p) && p_ch != ']'; p++) {} /*SHARED ITERATOR*/217if (!p_ch)218return WM_ABORT_ALL;219i = p - s - 1;220if (i < 0 || p[-1] != ':') {221/* Didn't find ":]", so treat like a normal set. */222p = s - 2;223p_ch = '[';224if (t_ch == p_ch)225matched = 1;226continue;227}228if (CC_EQ(s,i, "alnum")) {229if (ISALNUM(t_ch))230matched = 1;231} else if (CC_EQ(s,i, "alpha")) {232if (ISALPHA(t_ch))233matched = 1;234} else if (CC_EQ(s,i, "blank")) {235if (ISBLANK(t_ch))236matched = 1;237} else if (CC_EQ(s,i, "cntrl")) {238if (ISCNTRL(t_ch))239matched = 1;240} else if (CC_EQ(s,i, "digit")) {241if (ISDIGIT(t_ch))242matched = 1;243} else if (CC_EQ(s,i, "graph")) {244if (ISGRAPH(t_ch))245matched = 1;246} else if (CC_EQ(s,i, "lower")) {247if (ISLOWER(t_ch))248matched = 1;249} else if (CC_EQ(s,i, "print")) {250if (ISPRINT(t_ch))251matched = 1;252} else if (CC_EQ(s,i, "punct")) {253if (ISPUNCT(t_ch))254matched = 1;255} else if (CC_EQ(s,i, "space")) {256if (ISSPACE(t_ch))257matched = 1;258} else if (CC_EQ(s,i, "upper")) {259if (ISUPPER(t_ch))260matched = 1;261else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch))262matched = 1;263} else if (CC_EQ(s,i, "xdigit")) {264if (ISXDIGIT(t_ch))265matched = 1;266} else /* malformed [:class:] string */267return WM_ABORT_ALL;268p_ch = 0; /* This makes "prev_ch" get set to 0. */269} else if (t_ch == p_ch)270matched = 1;271} while (prev_ch = p_ch, (p_ch = *++p) != ']');272if (matched == negated ||273((flags & WM_PATHNAME) && t_ch == '/'))274return WM_NOMATCH;275continue;276}277}278
279return *text ? WM_NOMATCH : WM_MATCH;280}
281
282/* Match the "pattern" against the "text" string. */
283int wildmatch(const char *pattern, const char *text, unsigned int flags)284{
285int res = dowild((const uchar*)pattern, (const uchar*)text, flags);286return res == WM_MATCH ? WM_MATCH : WM_NOMATCH;287}
288