mirror of
https://github.com/git/git.git
synced 2025-03-27 18:19:41 +00:00
Merge branch 'nd/pathspec-wildcard'
Optimize matching paths with common forms of pathspecs that contain wildcard characters. * nd/pathspec-wildcard: tree_entry_interesting: do basedir compare on wildcard patterns when possible pathspec: apply "*.c" optimization from exclude pathspec: do exact comparison on the leading non-wildcard part pathspec: save the non-wildcard length part
This commit is contained in:
commit
990a4fea96
@ -337,7 +337,7 @@ void overlay_tree_on_cache(const char *tree_name, const char *prefix)
|
|||||||
matchbuf[0] = prefix;
|
matchbuf[0] = prefix;
|
||||||
matchbuf[1] = NULL;
|
matchbuf[1] = NULL;
|
||||||
init_pathspec(&pathspec, matchbuf);
|
init_pathspec(&pathspec, matchbuf);
|
||||||
pathspec.items[0].use_wildcard = 0;
|
pathspec.items[0].nowildcard_len = pathspec.items[0].len;
|
||||||
} else
|
} else
|
||||||
init_pathspec(&pathspec, NULL);
|
init_pathspec(&pathspec, NULL);
|
||||||
if (read_tree(tree, 1, &pathspec))
|
if (read_tree(tree, 1, &pathspec))
|
||||||
|
@ -168,7 +168,7 @@ int cmd_ls_tree(int argc, const char **argv, const char *prefix)
|
|||||||
|
|
||||||
init_pathspec(&pathspec, get_pathspec(prefix, argv + 1));
|
init_pathspec(&pathspec, get_pathspec(prefix, argv + 1));
|
||||||
for (i = 0; i < pathspec.nr; i++)
|
for (i = 0; i < pathspec.nr; i++)
|
||||||
pathspec.items[i].use_wildcard = 0;
|
pathspec.items[i].nowildcard_len = pathspec.items[i].len;
|
||||||
pathspec.has_wildcard = 0;
|
pathspec.has_wildcard = 0;
|
||||||
tree = parse_tree_indirect(sha1);
|
tree = parse_tree_indirect(sha1);
|
||||||
if (!tree)
|
if (!tree)
|
||||||
|
5
cache.h
5
cache.h
@ -473,6 +473,8 @@ extern int index_name_is_other(const struct index_state *, const char *, int);
|
|||||||
extern int ie_match_stat(const struct index_state *, struct cache_entry *, struct stat *, unsigned int);
|
extern int ie_match_stat(const struct index_state *, struct cache_entry *, struct stat *, unsigned int);
|
||||||
extern int ie_modified(const struct index_state *, struct cache_entry *, struct stat *, unsigned int);
|
extern int ie_modified(const struct index_state *, struct cache_entry *, struct stat *, unsigned int);
|
||||||
|
|
||||||
|
#define PATHSPEC_ONESTAR 1 /* the pathspec pattern sastisfies GFNM_ONESTAR */
|
||||||
|
|
||||||
struct pathspec {
|
struct pathspec {
|
||||||
const char **raw; /* get_pathspec() result, not freed by free_pathspec() */
|
const char **raw; /* get_pathspec() result, not freed by free_pathspec() */
|
||||||
int nr;
|
int nr;
|
||||||
@ -482,7 +484,8 @@ struct pathspec {
|
|||||||
struct pathspec_item {
|
struct pathspec_item {
|
||||||
const char *match;
|
const char *match;
|
||||||
int len;
|
int len;
|
||||||
unsigned int use_wildcard:1;
|
int nowildcard_len;
|
||||||
|
int flags;
|
||||||
} *items;
|
} *items;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
36
dir.c
36
dir.c
@ -34,6 +34,28 @@ int fnmatch_icase(const char *pattern, const char *string, int flags)
|
|||||||
return fnmatch(pattern, string, flags | (ignore_case ? FNM_CASEFOLD : 0));
|
return fnmatch(pattern, string, flags | (ignore_case ? FNM_CASEFOLD : 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline int git_fnmatch(const char *pattern, const char *string,
|
||||||
|
int flags, int prefix)
|
||||||
|
{
|
||||||
|
int fnm_flags = 0;
|
||||||
|
if (flags & GFNM_PATHNAME)
|
||||||
|
fnm_flags |= FNM_PATHNAME;
|
||||||
|
if (prefix > 0) {
|
||||||
|
if (strncmp(pattern, string, prefix))
|
||||||
|
return FNM_NOMATCH;
|
||||||
|
pattern += prefix;
|
||||||
|
string += prefix;
|
||||||
|
}
|
||||||
|
if (flags & GFNM_ONESTAR) {
|
||||||
|
int pattern_len = strlen(++pattern);
|
||||||
|
int string_len = strlen(string);
|
||||||
|
return string_len < pattern_len ||
|
||||||
|
strcmp(pattern,
|
||||||
|
string + string_len - pattern_len);
|
||||||
|
}
|
||||||
|
return fnmatch(pattern, string, fnm_flags);
|
||||||
|
}
|
||||||
|
|
||||||
static size_t common_prefix_len(const char **pathspec)
|
static size_t common_prefix_len(const char **pathspec)
|
||||||
{
|
{
|
||||||
const char *n, *first;
|
const char *n, *first;
|
||||||
@ -230,7 +252,10 @@ static int match_pathspec_item(const struct pathspec_item *item, int prefix,
|
|||||||
return MATCHED_RECURSIVELY;
|
return MATCHED_RECURSIVELY;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (item->use_wildcard && !fnmatch(match, name, 0))
|
if (item->nowildcard_len < item->len &&
|
||||||
|
!git_fnmatch(match, name,
|
||||||
|
item->flags & PATHSPEC_ONESTAR ? GFNM_ONESTAR : 0,
|
||||||
|
item->nowildcard_len - prefix))
|
||||||
return MATCHED_FNMATCH;
|
return MATCHED_FNMATCH;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -1429,9 +1454,14 @@ int init_pathspec(struct pathspec *pathspec, const char **paths)
|
|||||||
|
|
||||||
item->match = path;
|
item->match = path;
|
||||||
item->len = strlen(path);
|
item->len = strlen(path);
|
||||||
item->use_wildcard = !no_wildcard(path);
|
item->nowildcard_len = simple_length(path);
|
||||||
if (item->use_wildcard)
|
item->flags = 0;
|
||||||
|
if (item->nowildcard_len < item->len) {
|
||||||
pathspec->has_wildcard = 1;
|
pathspec->has_wildcard = 1;
|
||||||
|
if (path[item->nowildcard_len] == '*' &&
|
||||||
|
no_wildcard(path + item->nowildcard_len + 1))
|
||||||
|
item->flags |= PATHSPEC_ONESTAR;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
qsort(pathspec->items, pathspec->nr,
|
qsort(pathspec->items, pathspec->nr,
|
||||||
|
9
dir.h
9
dir.h
@ -139,4 +139,13 @@ extern int strcmp_icase(const char *a, const char *b);
|
|||||||
extern int strncmp_icase(const char *a, const char *b, size_t count);
|
extern int strncmp_icase(const char *a, const char *b, size_t count);
|
||||||
extern int fnmatch_icase(const char *pattern, const char *string, int flags);
|
extern int fnmatch_icase(const char *pattern, const char *string, int flags);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The prefix part of pattern must not contains wildcards.
|
||||||
|
*/
|
||||||
|
#define GFNM_PATHNAME 1 /* similar to FNM_PATHNAME */
|
||||||
|
#define GFNM_ONESTAR 2 /* there is only _one_ wildcard, a star */
|
||||||
|
|
||||||
|
extern int git_fnmatch(const char *pattern, const char *string,
|
||||||
|
int flags, int prefix);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
77
tree-walk.c
77
tree-walk.c
@ -572,6 +572,54 @@ static int match_dir_prefix(const char *base,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Perform matching on the leading non-wildcard part of
|
||||||
|
* pathspec. item->nowildcard_len must be greater than zero. Return
|
||||||
|
* non-zero if base is matched.
|
||||||
|
*/
|
||||||
|
static int match_wildcard_base(const struct pathspec_item *item,
|
||||||
|
const char *base, int baselen,
|
||||||
|
int *matched)
|
||||||
|
{
|
||||||
|
const char *match = item->match;
|
||||||
|
/* the wildcard part is not considered in this function */
|
||||||
|
int matchlen = item->nowildcard_len;
|
||||||
|
|
||||||
|
if (baselen) {
|
||||||
|
int dirlen;
|
||||||
|
/*
|
||||||
|
* Return early if base is longer than the
|
||||||
|
* non-wildcard part but it does not match.
|
||||||
|
*/
|
||||||
|
if (baselen >= matchlen) {
|
||||||
|
*matched = matchlen;
|
||||||
|
return !strncmp(base, match, matchlen);
|
||||||
|
}
|
||||||
|
|
||||||
|
dirlen = matchlen;
|
||||||
|
while (dirlen && match[dirlen - 1] != '/')
|
||||||
|
dirlen--;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Return early if base is shorter than the
|
||||||
|
* non-wildcard part but it does not match. Note that
|
||||||
|
* base ends with '/' so we are sure it really matches
|
||||||
|
* directory
|
||||||
|
*/
|
||||||
|
if (strncmp(base, match, baselen))
|
||||||
|
return 0;
|
||||||
|
*matched = baselen;
|
||||||
|
} else
|
||||||
|
*matched = 0;
|
||||||
|
/*
|
||||||
|
* we could have checked entry against the non-wildcard part
|
||||||
|
* that is not in base and does similar never_interesting
|
||||||
|
* optimization as in match_entry. For now just be happy with
|
||||||
|
* base comparison.
|
||||||
|
*/
|
||||||
|
return entry_interesting;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Is a tree entry interesting given the pathspec we have?
|
* Is a tree entry interesting given the pathspec we have?
|
||||||
*
|
*
|
||||||
@ -602,7 +650,7 @@ enum interesting tree_entry_interesting(const struct name_entry *entry,
|
|||||||
const struct pathspec_item *item = ps->items+i;
|
const struct pathspec_item *item = ps->items+i;
|
||||||
const char *match = item->match;
|
const char *match = item->match;
|
||||||
const char *base_str = base->buf + base_offset;
|
const char *base_str = base->buf + base_offset;
|
||||||
int matchlen = item->len;
|
int matchlen = item->len, matched = 0;
|
||||||
|
|
||||||
if (baselen >= matchlen) {
|
if (baselen >= matchlen) {
|
||||||
/* If it doesn't match, move along... */
|
/* If it doesn't match, move along... */
|
||||||
@ -626,8 +674,10 @@ enum interesting tree_entry_interesting(const struct name_entry *entry,
|
|||||||
&never_interesting))
|
&never_interesting))
|
||||||
return entry_interesting;
|
return entry_interesting;
|
||||||
|
|
||||||
if (item->use_wildcard) {
|
if (item->nowildcard_len < item->len) {
|
||||||
if (!fnmatch(match + baselen, entry->path, 0))
|
if (!git_fnmatch(match + baselen, entry->path,
|
||||||
|
item->flags & PATHSPEC_ONESTAR ? GFNM_ONESTAR : 0,
|
||||||
|
item->nowildcard_len - baselen))
|
||||||
return entry_interesting;
|
return entry_interesting;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -642,17 +692,34 @@ enum interesting tree_entry_interesting(const struct name_entry *entry,
|
|||||||
}
|
}
|
||||||
|
|
||||||
match_wildcards:
|
match_wildcards:
|
||||||
if (!item->use_wildcard)
|
if (item->nowildcard_len == item->len)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
if (item->nowildcard_len &&
|
||||||
|
!match_wildcard_base(item, base_str, baselen, &matched))
|
||||||
|
return entry_not_interesting;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Concatenate base and entry->path into one and do
|
* Concatenate base and entry->path into one and do
|
||||||
* fnmatch() on it.
|
* fnmatch() on it.
|
||||||
|
*
|
||||||
|
* While we could avoid concatenation in certain cases
|
||||||
|
* [1], which saves a memcpy and potentially a
|
||||||
|
* realloc, it turns out not worth it. Measurement on
|
||||||
|
* linux-2.6 does not show any clear improvements,
|
||||||
|
* partly because of the nowildcard_len optimization
|
||||||
|
* in git_fnmatch(). Avoid micro-optimizations here.
|
||||||
|
*
|
||||||
|
* [1] if match_wildcard_base() says the base
|
||||||
|
* directory is already matched, we only need to match
|
||||||
|
* the rest, which is shorter so _in theory_ faster.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
strbuf_add(base, entry->path, pathlen);
|
strbuf_add(base, entry->path, pathlen);
|
||||||
|
|
||||||
if (!fnmatch(match, base->buf + base_offset, 0)) {
|
if (!git_fnmatch(match, base->buf + base_offset,
|
||||||
|
item->flags & PATHSPEC_ONESTAR ? GFNM_ONESTAR : 0,
|
||||||
|
item->nowildcard_len)) {
|
||||||
strbuf_setlen(base, base_offset + baselen);
|
strbuf_setlen(base, base_offset + baselen);
|
||||||
return entry_interesting;
|
return entry_interesting;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user