diff options
Diffstat (limited to 'websdk/mercurial/fileset.py')
-rw-r--r-- | websdk/mercurial/fileset.py | 440 |
1 files changed, 440 insertions, 0 deletions
diff --git a/websdk/mercurial/fileset.py b/websdk/mercurial/fileset.py new file mode 100644 index 0000000..95a2c17 --- /dev/null +++ b/websdk/mercurial/fileset.py @@ -0,0 +1,440 @@ +# fileset.py - file set queries for mercurial +# +# Copyright 2010 Matt Mackall <mpm@selenic.com> +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +import parser, error, util, merge, re +from i18n import _ + +elements = { + "(": (20, ("group", 1, ")"), ("func", 1, ")")), + "-": (5, ("negate", 19), ("minus", 5)), + "not": (10, ("not", 10)), + "!": (10, ("not", 10)), + "and": (5, None, ("and", 5)), + "&": (5, None, ("and", 5)), + "or": (4, None, ("or", 4)), + "|": (4, None, ("or", 4)), + "+": (4, None, ("or", 4)), + ",": (2, None, ("list", 2)), + ")": (0, None, None), + "symbol": (0, ("symbol",), None), + "string": (0, ("string",), None), + "end": (0, None, None), +} + +keywords = set(['and', 'or', 'not']) + +globchars = ".*{}[]?/\\" + +def tokenize(program): + pos, l = 0, len(program) + while pos < l: + c = program[pos] + if c.isspace(): # skip inter-token whitespace + pass + elif c in "(),-|&+!": # handle simple operators + yield (c, None, pos) + elif (c in '"\'' or c == 'r' and + program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings + if c == 'r': + pos += 1 + c = program[pos] + decode = lambda x: x + else: + decode = lambda x: x.decode('string-escape') + pos += 1 + s = pos + while pos < l: # find closing quote + d = program[pos] + if d == '\\': # skip over escaped characters + pos += 2 + continue + if d == c: + yield ('string', decode(program[s:pos]), s) + break + pos += 1 + else: + raise error.ParseError(_("unterminated string"), s) + elif c.isalnum() or c in globchars or ord(c) > 127: + # gather up a symbol/keyword + s = pos + pos += 1 + while pos < l: # find end of symbol + d = program[pos] + if not (d.isalnum() or d in globchars or ord(d) > 127): + break + pos += 1 + sym = program[s:pos] + if sym in keywords: # operator keywords + yield (sym, None, s) + else: + yield ('symbol', sym, s) + pos -= 1 + else: + raise error.ParseError(_("syntax error"), pos) + pos += 1 + yield ('end', None, pos) + +parse = parser.parser(tokenize, elements).parse + +def getstring(x, err): + if x and (x[0] == 'string' or x[0] == 'symbol'): + return x[1] + raise error.ParseError(err) + +def getset(mctx, x): + if not x: + raise error.ParseError(_("missing argument")) + return methods[x[0]](mctx, *x[1:]) + +def stringset(mctx, x): + m = mctx.matcher([x]) + return [f for f in mctx.subset if m(f)] + +def andset(mctx, x, y): + return getset(mctx.narrow(getset(mctx, x)), y) + +def orset(mctx, x, y): + # needs optimizing + xl = getset(mctx, x) + yl = getset(mctx, y) + return xl + [f for f in yl if f not in xl] + +def notset(mctx, x): + s = set(getset(mctx, x)) + return [r for r in mctx.subset if r not in s] + +def listset(mctx, a, b): + raise error.ParseError(_("can't use a list in this context")) + +def modified(mctx, x): + """``modified()`` + File that is modified according to status. + """ + # i18n: "modified" is a keyword + getargs(x, 0, 0, _("modified takes no arguments")) + s = mctx.status()[0] + return [f for f in mctx.subset if f in s] + +def added(mctx, x): + """``added()`` + File that is added according to status. + """ + # i18n: "added" is a keyword + getargs(x, 0, 0, _("added takes no arguments")) + s = mctx.status()[1] + return [f for f in mctx.subset if f in s] + +def removed(mctx, x): + """``removed()`` + File that is removed according to status. + """ + # i18n: "removed" is a keyword + getargs(x, 0, 0, _("removed takes no arguments")) + s = mctx.status()[2] + return [f for f in mctx.subset if f in s] + +def deleted(mctx, x): + """``deleted()`` + File that is deleted according to status. + """ + # i18n: "deleted" is a keyword + getargs(x, 0, 0, _("deleted takes no arguments")) + s = mctx.status()[3] + return [f for f in mctx.subset if f in s] + +def unknown(mctx, x): + """``unknown()`` + File that is unknown according to status. These files will only be + considered if this predicate is used. + """ + # i18n: "unknown" is a keyword + getargs(x, 0, 0, _("unknown takes no arguments")) + s = mctx.status()[4] + return [f for f in mctx.subset if f in s] + +def ignored(mctx, x): + """``ignored()`` + File that is ignored according to status. These files will only be + considered if this predicate is used. + """ + # i18n: "ignored" is a keyword + getargs(x, 0, 0, _("ignored takes no arguments")) + s = mctx.status()[5] + return [f for f in mctx.subset if f in s] + +def clean(mctx, x): + """``clean()`` + File that is clean according to status. + """ + # i18n: "clean" is a keyword + getargs(x, 0, 0, _("clean takes no arguments")) + s = mctx.status()[6] + return [f for f in mctx.subset if f in s] + +def func(mctx, a, b): + if a[0] == 'symbol' and a[1] in symbols: + return symbols[a[1]](mctx, b) + raise error.ParseError(_("not a function: %s") % a[1]) + +def getlist(x): + if not x: + return [] + if x[0] == 'list': + return getlist(x[1]) + [x[2]] + return [x] + +def getargs(x, min, max, err): + l = getlist(x) + if len(l) < min or len(l) > max: + raise error.ParseError(err) + return l + +def binary(mctx, x): + """``binary()`` + File that appears to be binary (contains NUL bytes). + """ + # i18n: "binary" is a keyword + getargs(x, 0, 0, _("binary takes no arguments")) + return [f for f in mctx.subset if util.binary(mctx.ctx[f].data())] + +def exec_(mctx, x): + """``exec()`` + File that is marked as executable. + """ + # i18n: "exec" is a keyword + getargs(x, 0, 0, _("exec takes no arguments")) + return [f for f in mctx.subset if mctx.ctx.flags(f) == 'x'] + +def symlink(mctx, x): + """``symlink()`` + File that is marked as a symlink. + """ + # i18n: "symlink" is a keyword + getargs(x, 0, 0, _("symlink takes no arguments")) + return [f for f in mctx.subset if mctx.ctx.flags(f) == 'l'] + +def resolved(mctx, x): + """``resolved()`` + File that is marked resolved according to the resolve state. + """ + # i18n: "resolved" is a keyword + getargs(x, 0, 0, _("resolved takes no arguments")) + if mctx.ctx.rev() is not None: + return [] + ms = merge.mergestate(mctx.ctx._repo) + return [f for f in mctx.subset if f in ms and ms[f] == 'r'] + +def unresolved(mctx, x): + """``unresolved()`` + File that is marked unresolved according to the resolve state. + """ + # i18n: "unresolved" is a keyword + getargs(x, 0, 0, _("unresolved takes no arguments")) + if mctx.ctx.rev() is not None: + return [] + ms = merge.mergestate(mctx.ctx._repo) + return [f for f in mctx.subset if f in ms and ms[f] == 'u'] + +def hgignore(mctx, x): + """``hgignore()`` + File that matches the active .hgignore pattern. + """ + getargs(x, 0, 0, _("hgignore takes no arguments")) + ignore = mctx.ctx._repo.dirstate._ignore + return [f for f in mctx.subset if ignore(f)] + +def grep(mctx, x): + """``grep(regex)`` + File contains the given regular expression. + """ + pat = getstring(x, _("grep requires a pattern")) + r = re.compile(pat) + return [f for f in mctx.subset if r.search(mctx.ctx[f].data())] + +_units = dict(k=2**10, K=2**10, kB=2**10, KB=2**10, + M=2**20, MB=2**20, G=2**30, GB=2**30) + +def _sizetoint(s): + try: + s = s.strip() + for k, v in _units.items(): + if s.endswith(k): + return int(float(s[:-len(k)]) * v) + return int(s) + except ValueError: + raise error.ParseError(_("couldn't parse size: %s") % s) + +def _sizetomax(s): + try: + s = s.strip() + for k, v in _units.items(): + if s.endswith(k): + # max(4k) = 5k - 1, max(4.5k) = 4.6k - 1 + n = s[:-len(k)] + inc = 1.0 + if "." in n: + inc /= 10 ** len(n.split(".")[1]) + return int((float(n) + inc) * v) - 1 + # no extension, this is a precise value + return int(s) + except ValueError: + raise error.ParseError(_("couldn't parse size: %s") % s) + +def size(mctx, x): + """``size(expression)`` + File size matches the given expression. Examples: + + - 1k (files from 1024 to 2047 bytes) + - < 20k (files less than 20480 bytes) + - >= .5MB (files at least 524288 bytes) + - 4k - 1MB (files from 4096 bytes to 1048576 bytes) + """ + + # i18n: "size" is a keyword + expr = getstring(x, _("size requires an expression")).strip() + if '-' in expr: # do we have a range? + a, b = expr.split('-', 1) + a = _sizetoint(a) + b = _sizetoint(b) + m = lambda x: x >= a and x <= b + elif expr.startswith("<="): + a = _sizetoint(expr[2:]) + m = lambda x: x <= a + elif expr.startswith("<"): + a = _sizetoint(expr[1:]) + m = lambda x: x < a + elif expr.startswith(">="): + a = _sizetoint(expr[2:]) + m = lambda x: x >= a + elif expr.startswith(">"): + a = _sizetoint(expr[1:]) + m = lambda x: x > a + elif expr[0].isdigit or expr[0] == '.': + a = _sizetoint(expr) + b = _sizetomax(expr) + m = lambda x: x >= a and x <= b + else: + raise error.ParseError(_("couldn't parse size: %s") % expr) + + return [f for f in mctx.subset if m(mctx.ctx[f].size())] + +def encoding(mctx, x): + """``encoding(name)`` + File can be successfully decoded with the given character + encoding. May not be useful for encodings other than ASCII and + UTF-8. + """ + + # i18n: "encoding" is a keyword + enc = getstring(x, _("encoding requires an encoding name")) + + s = [] + for f in mctx.subset: + d = mctx.ctx[f].data() + try: + d.decode(enc) + except LookupError: + raise util.Abort(_("unknown encoding '%s'") % enc) + except UnicodeDecodeError: + continue + s.append(f) + + return s + +def copied(mctx, x): + """``copied()`` + File that is recorded as being copied. + """ + # i18n: "copied" is a keyword + getargs(x, 0, 0, _("copied takes no arguments")) + s = [] + for f in mctx.subset: + p = mctx.ctx[f].parents() + if p and p[0].path() != f: + s.append(f) + return s + +symbols = { + 'added': added, + 'binary': binary, + 'clean': clean, + 'copied': copied, + 'deleted': deleted, + 'encoding': encoding, + 'exec': exec_, + 'grep': grep, + 'ignored': ignored, + 'hgignore': hgignore, + 'modified': modified, + 'removed': removed, + 'resolved': resolved, + 'size': size, + 'symlink': symlink, + 'unknown': unknown, + 'unresolved': unresolved, +} + +methods = { + 'string': stringset, + 'symbol': stringset, + 'and': andset, + 'or': orset, + 'list': listset, + 'group': getset, + 'not': notset, + 'func': func, +} + +class matchctx(object): + def __init__(self, ctx, subset=None, status=None): + self.ctx = ctx + self.subset = subset + self._status = status + def status(self): + return self._status + def matcher(self, patterns): + return self.ctx.match(patterns) + def filter(self, files): + return [f for f in files if f in self.subset] + def narrow(self, files): + return matchctx(self.ctx, self.filter(files), self._status) + +def _intree(funcs, tree): + if isinstance(tree, tuple): + if tree[0] == 'func' and tree[1][0] == 'symbol': + if tree[1][1] in funcs: + return True + for s in tree[1:]: + if _intree(funcs, s): + return True + return False + +def getfileset(ctx, expr): + tree, pos = parse(expr) + if (pos != len(expr)): + raise error.ParseError(_("invalid token"), pos) + + # do we need status info? + if _intree(['modified', 'added', 'removed', 'deleted', + 'unknown', 'ignored', 'clean'], tree): + unknown = _intree(['unknown'], tree) + ignored = _intree(['ignored'], tree) + + r = ctx._repo + status = r.status(ctx.p1(), ctx, + unknown=unknown, ignored=ignored, clean=True) + subset = [] + for c in status: + subset.extend(c) + else: + status = None + subset = ctx.walk(ctx.match([])) + + return getset(matchctx(ctx, subset, status), tree) + +# tell hggettext to extract docstrings from these functions: +i18nfunctions = symbols.values() |