diff options
Diffstat (limited to 'buildbot/buildbot/changes/svnpoller.py')
-rw-r--r-- | buildbot/buildbot/changes/svnpoller.py | 463 |
1 files changed, 463 insertions, 0 deletions
diff --git a/buildbot/buildbot/changes/svnpoller.py b/buildbot/buildbot/changes/svnpoller.py new file mode 100644 index 0000000..223c8b5 --- /dev/null +++ b/buildbot/buildbot/changes/svnpoller.py @@ -0,0 +1,463 @@ +# -*- test-case-name: buildbot.test.test_svnpoller -*- + +# Based on the work of Dave Peticolas for the P4poll +# Changed to svn (using xml.dom.minidom) by Niklaus Giger +# Hacked beyond recognition by Brian Warner + +from twisted.python import log +from twisted.internet import defer, reactor, utils +from twisted.internet.task import LoopingCall + +from buildbot import util +from buildbot.changes import base +from buildbot.changes.changes import Change + +import xml.dom.minidom + +def _assert(condition, msg): + if condition: + return True + raise AssertionError(msg) + +def dbgMsg(myString): + log.msg(myString) + return 1 + +# these split_file_* functions are available for use as values to the +# split_file= argument. +def split_file_alwaystrunk(path): + return (None, path) + +def split_file_branches(path): + # turn trunk/subdir/file.c into (None, "subdir/file.c") + # and branches/1.5.x/subdir/file.c into ("branches/1.5.x", "subdir/file.c") + pieces = path.split('/') + if pieces[0] == 'trunk': + return (None, '/'.join(pieces[1:])) + elif pieces[0] == 'branches': + return ('/'.join(pieces[0:2]), '/'.join(pieces[2:])) + else: + return None + + +class SVNPoller(base.ChangeSource, util.ComparableMixin): + """This source will poll a Subversion repository for changes and submit + them to the change master.""" + + compare_attrs = ["svnurl", "split_file_function", + "svnuser", "svnpasswd", + "pollinterval", "histmax", + "svnbin"] + + parent = None # filled in when we're added + last_change = None + loop = None + working = False + + def __init__(self, svnurl, split_file=None, + svnuser=None, svnpasswd=None, + pollinterval=10*60, histmax=100, + svnbin='svn'): + """ + @type svnurl: string + @param svnurl: the SVN URL that describes the repository and + subdirectory to watch. If this ChangeSource should + only pay attention to a single branch, this should + point at the repository for that branch, like + svn://svn.twistedmatrix.com/svn/Twisted/trunk . If it + should follow multiple branches, point it at the + repository directory that contains all the branches + like svn://svn.twistedmatrix.com/svn/Twisted and also + provide a branch-determining function. + + Each file in the repository has a SVN URL in the form + (SVNURL)/(BRANCH)/(FILEPATH), where (BRANCH) could be + empty or not, depending upon your branch-determining + function. Only files that start with (SVNURL)/(BRANCH) + will be monitored. The Change objects that are sent to + the Schedulers will see (FILEPATH) for each modified + file. + + @type split_file: callable or None + @param split_file: a function that is called with a string of the + form (BRANCH)/(FILEPATH) and should return a tuple + (BRANCH, FILEPATH). This function should match + your repository's branch-naming policy. Each + changed file has a fully-qualified URL that can be + split into a prefix (which equals the value of the + 'svnurl' argument) and a suffix; it is this suffix + which is passed to the split_file function. + + If the function returns None, the file is ignored. + Use this to indicate that the file is not a part + of this project. + + For example, if your repository puts the trunk in + trunk/... and branches are in places like + branches/1.5/..., your split_file function could + look like the following (this function is + available as svnpoller.split_file_branches):: + + pieces = path.split('/') + if pieces[0] == 'trunk': + return (None, '/'.join(pieces[1:])) + elif pieces[0] == 'branches': + return ('/'.join(pieces[0:2]), + '/'.join(pieces[2:])) + else: + return None + + If instead your repository layout puts the trunk + for ProjectA in trunk/ProjectA/... and the 1.5 + branch in branches/1.5/ProjectA/..., your + split_file function could look like:: + + pieces = path.split('/') + if pieces[0] == 'trunk': + branch = None + pieces.pop(0) # remove 'trunk' + elif pieces[0] == 'branches': + pieces.pop(0) # remove 'branches' + # grab branch name + branch = 'branches/' + pieces.pop(0) + else: + return None # something weird + projectname = pieces.pop(0) + if projectname != 'ProjectA': + return None # wrong project + return (branch, '/'.join(pieces)) + + The default of split_file= is None, which + indicates that no splitting should be done. This + is equivalent to the following function:: + + return (None, path) + + If you wish, you can override the split_file + method with the same sort of function instead of + passing in a split_file= argument. + + + @type svnuser: string + @param svnuser: If set, the --username option will be added to + the 'svn log' command. You may need this to get + access to a private repository. + @type svnpasswd: string + @param svnpasswd: If set, the --password option will be added. + + @type pollinterval: int + @param pollinterval: interval in seconds between polls. The default + is 600 seconds (10 minutes). Smaller values + decrease the latency between the time a change + is recorded and the time the buildbot notices + it, but it also increases the system load. + + @type histmax: int + @param histmax: maximum number of changes to look back through. + The default is 100. Smaller values decrease + system load, but if more than histmax changes + are recorded between polls, the extra ones will + be silently lost. + + @type svnbin: string + @param svnbin: path to svn binary, defaults to just 'svn'. Use + this if your subversion command lives in an + unusual location. + """ + + if svnurl.endswith("/"): + svnurl = svnurl[:-1] # strip the trailing slash + self.svnurl = svnurl + self.split_file_function = split_file or split_file_alwaystrunk + self.svnuser = svnuser + self.svnpasswd = svnpasswd + + self.svnbin = svnbin + self.pollinterval = pollinterval + self.histmax = histmax + self._prefix = None + self.overrun_counter = 0 + self.loop = LoopingCall(self.checksvn) + + def split_file(self, path): + # use getattr() to avoid turning this function into a bound method, + # which would require it to have an extra 'self' argument + f = getattr(self, "split_file_function") + return f(path) + + def startService(self): + log.msg("SVNPoller(%s) starting" % self.svnurl) + base.ChangeSource.startService(self) + # Don't start the loop just yet because the reactor isn't running. + # Give it a chance to go and install our SIGCHLD handler before + # spawning processes. + reactor.callLater(0, self.loop.start, self.pollinterval) + + def stopService(self): + log.msg("SVNPoller(%s) shutting down" % self.svnurl) + self.loop.stop() + return base.ChangeSource.stopService(self) + + def describe(self): + return "SVNPoller watching %s" % self.svnurl + + def checksvn(self): + # Our return value is only used for unit testing. + + # we need to figure out the repository root, so we can figure out + # repository-relative pathnames later. Each SVNURL is in the form + # (ROOT)/(PROJECT)/(BRANCH)/(FILEPATH), where (ROOT) is something + # like svn://svn.twistedmatrix.com/svn/Twisted (i.e. there is a + # physical repository at /svn/Twisted on that host), (PROJECT) is + # something like Projects/Twisted (i.e. within the repository's + # internal namespace, everything under Projects/Twisted/ has + # something to do with Twisted, but these directory names do not + # actually appear on the repository host), (BRANCH) is something like + # "trunk" or "branches/2.0.x", and (FILEPATH) is a tree-relative + # filename like "twisted/internet/defer.py". + + # our self.svnurl attribute contains (ROOT)/(PROJECT) combined + # together in a way that we can't separate without svn's help. If the + # user is not using the split_file= argument, then self.svnurl might + # be (ROOT)/(PROJECT)/(BRANCH) . In any case, the filenames we will + # get back from 'svn log' will be of the form + # (PROJECT)/(BRANCH)/(FILEPATH), but we want to be able to remove + # that (PROJECT) prefix from them. To do this without requiring the + # user to tell us how svnurl is split into ROOT and PROJECT, we do an + # 'svn info --xml' command at startup. This command will include a + # <root> element that tells us ROOT. We then strip this prefix from + # self.svnurl to determine PROJECT, and then later we strip the + # PROJECT prefix from the filenames reported by 'svn log --xml' to + # get a (BRANCH)/(FILEPATH) that can be passed to split_file() to + # turn into separate BRANCH and FILEPATH values. + + # whew. + + if self.working: + log.msg("SVNPoller(%s) overrun: timer fired but the previous " + "poll had not yet finished." % self.svnurl) + self.overrun_counter += 1 + return defer.succeed(None) + self.working = True + + log.msg("SVNPoller polling") + if not self._prefix: + # this sets self._prefix when it finishes. It fires with + # self._prefix as well, because that makes the unit tests easier + # to write. + d = self.get_root() + d.addCallback(self.determine_prefix) + else: + d = defer.succeed(self._prefix) + + d.addCallback(self.get_logs) + d.addCallback(self.parse_logs) + d.addCallback(self.get_new_logentries) + d.addCallback(self.create_changes) + d.addCallback(self.submit_changes) + d.addCallbacks(self.finished_ok, self.finished_failure) + return d + + def getProcessOutput(self, args): + # this exists so we can override it during the unit tests + d = utils.getProcessOutput(self.svnbin, args, {}) + return d + + def get_root(self): + args = ["info", "--xml", "--non-interactive", self.svnurl] + if self.svnuser: + args.extend(["--username=%s" % self.svnuser]) + if self.svnpasswd: + args.extend(["--password=%s" % self.svnpasswd]) + d = self.getProcessOutput(args) + return d + + def determine_prefix(self, output): + try: + doc = xml.dom.minidom.parseString(output) + except xml.parsers.expat.ExpatError: + dbgMsg("_process_changes: ExpatError in %s" % output) + log.msg("SVNPoller._determine_prefix_2: ExpatError in '%s'" + % output) + raise + rootnodes = doc.getElementsByTagName("root") + if not rootnodes: + # this happens if the URL we gave was already the root. In this + # case, our prefix is empty. + self._prefix = "" + return self._prefix + rootnode = rootnodes[0] + root = "".join([c.data for c in rootnode.childNodes]) + # root will be a unicode string + _assert(self.svnurl.startswith(root), + "svnurl='%s' doesn't start with <root>='%s'" % + (self.svnurl, root)) + self._prefix = self.svnurl[len(root):] + if self._prefix.startswith("/"): + self._prefix = self._prefix[1:] + log.msg("SVNPoller: svnurl=%s, root=%s, so prefix=%s" % + (self.svnurl, root, self._prefix)) + return self._prefix + + def get_logs(self, ignored_prefix=None): + args = [] + args.extend(["log", "--xml", "--verbose", "--non-interactive"]) + if self.svnuser: + args.extend(["--username=%s" % self.svnuser]) + if self.svnpasswd: + args.extend(["--password=%s" % self.svnpasswd]) + args.extend(["--limit=%d" % (self.histmax), self.svnurl]) + d = self.getProcessOutput(args) + return d + + def parse_logs(self, output): + # parse the XML output, return a list of <logentry> nodes + try: + doc = xml.dom.minidom.parseString(output) + except xml.parsers.expat.ExpatError: + dbgMsg("_process_changes: ExpatError in %s" % output) + log.msg("SVNPoller._parse_changes: ExpatError in '%s'" % output) + raise + logentries = doc.getElementsByTagName("logentry") + return logentries + + + def _filter_new_logentries(self, logentries, last_change): + # given a list of logentries, return a tuple of (new_last_change, + # new_logentries), where new_logentries contains only the ones after + # last_change + if not logentries: + # no entries, so last_change must stay at None + return (None, []) + + mostRecent = int(logentries[0].getAttribute("revision")) + + if last_change is None: + # if this is the first time we've been run, ignore any changes + # that occurred before now. This prevents a build at every + # startup. + log.msg('svnPoller: starting at change %s' % mostRecent) + return (mostRecent, []) + + if last_change == mostRecent: + # an unmodified repository will hit this case + log.msg('svnPoller: _process_changes last %s mostRecent %s' % ( + last_change, mostRecent)) + return (mostRecent, []) + + new_logentries = [] + for el in logentries: + if last_change == int(el.getAttribute("revision")): + break + new_logentries.append(el) + new_logentries.reverse() # return oldest first + return (mostRecent, new_logentries) + + def get_new_logentries(self, logentries): + last_change = self.last_change + (new_last_change, + new_logentries) = self._filter_new_logentries(logentries, + self.last_change) + self.last_change = new_last_change + log.msg('svnPoller: _process_changes %s .. %s' % + (last_change, new_last_change)) + return new_logentries + + + def _get_text(self, element, tag_name): + try: + child_nodes = element.getElementsByTagName(tag_name)[0].childNodes + text = "".join([t.data for t in child_nodes]) + except: + text = "<unknown>" + return text + + def _transform_path(self, path): + _assert(path.startswith(self._prefix), + "filepath '%s' should start with prefix '%s'" % + (path, self._prefix)) + relative_path = path[len(self._prefix):] + if relative_path.startswith("/"): + relative_path = relative_path[1:] + where = self.split_file(relative_path) + # 'where' is either None or (branch, final_path) + return where + + def create_changes(self, new_logentries): + changes = [] + + for el in new_logentries: + branch_files = [] # get oldest change first + revision = str(el.getAttribute("revision")) + dbgMsg("Adding change revision %s" % (revision,)) + # TODO: the rest of buildbot may not be ready for unicode 'who' + # values + author = self._get_text(el, "author") + comments = self._get_text(el, "msg") + # there is a "date" field, but it provides localtime in the + # repository's timezone, whereas we care about buildmaster's + # localtime (since this will get used to position the boxes on + # the Waterfall display, etc). So ignore the date field and use + # our local clock instead. + #when = self._get_text(el, "date") + #when = time.mktime(time.strptime("%.19s" % when, + # "%Y-%m-%dT%H:%M:%S")) + branches = {} + pathlist = el.getElementsByTagName("paths")[0] + for p in pathlist.getElementsByTagName("path"): + action = p.getAttribute("action") + path = "".join([t.data for t in p.childNodes]) + # the rest of buildbot is certaily not yet ready to handle + # unicode filenames, because they get put in RemoteCommands + # which get sent via PB to the buildslave, and PB doesn't + # handle unicode. + path = path.encode("ascii") + if path.startswith("/"): + path = path[1:] + where = self._transform_path(path) + + # if 'where' is None, the file was outside any project that + # we care about and we should ignore it + if where: + branch, filename = where + if not branch in branches: + branches[branch] = { 'files': []} + branches[branch]['files'].append(filename) + + if not branches[branch].has_key('action'): + branches[branch]['action'] = action + + for branch in branches.keys(): + action = branches[branch]['action'] + files = branches[branch]['files'] + number_of_files_changed = len(files) + + if action == u'D' and number_of_files_changed == 1 and files[0] == '': + log.msg("Ignoring deletion of branch '%s'" % branch) + else: + c = Change(who=author, + files=files, + comments=comments, + revision=revision, + branch=branch) + changes.append(c) + + return changes + + def submit_changes(self, changes): + for c in changes: + self.parent.addChange(c) + + def finished_ok(self, res): + log.msg("SVNPoller finished polling") + dbgMsg('_finished : %s' % res) + assert self.working + self.working = False + return res + + def finished_failure(self, f): + log.msg("SVNPoller failed") + dbgMsg('_finished : %s' % f) + assert self.working + self.working = False + return None # eat the failure |