1#!/usr/bin/env python
2# ***** BEGIN LICENSE BLOCK *****
3# This Source Code Form is subject to the terms of the Mozilla Public
4# License, v. 2.0. If a copy of the MPL was not distributed with this file,
5# You can obtain one at http://mozilla.org/MPL/2.0/.
6# ***** END LICENSE BLOCK *****
7"""Mercurial VCS support.
8"""
9
10from __future__ import absolute_import
11import hashlib
12import os
13import re
14import subprocess
15import sys
16from collections import namedtuple
17
18try:
19    from urlparse import urlsplit
20except ImportError:
21    from urllib.parse import urlsplit
22
23import mozharness
24from mozharness.base.errors import HgErrorList, VCSException
25from mozharness.base.log import LogMixin, OutputParser
26from mozharness.base.script import ScriptMixin
27from mozharness.base.transfer import TransferMixin
28
29sys.path.insert(1, os.path.dirname(os.path.dirname(os.path.dirname(sys.path[0]))))
30
31
32external_tools_path = os.path.join(
33    os.path.abspath(os.path.dirname(os.path.dirname(mozharness.__file__))),
34    "external_tools",
35)
36
37
38HG_OPTIONS = ["--config", "ui.merge=internal:merge"]
39
40# MercurialVCS {{{1
41# TODO Make the remaining functions more mozharness-friendly.
42# TODO Add the various tag functionality that are currently in
43# build/tools/scripts to MercurialVCS -- generic tagging logic belongs here.
44REVISION, BRANCH = 0, 1
45
46
47class RepositoryUpdateRevisionParser(OutputParser):
48    """Parse `hg pull` output for "repository unrelated" errors."""
49
50    revision = None
51    RE_UPDATED = re.compile("^updated to ([a-f0-9]{40})$")
52
53    def parse_single_line(self, line):
54        m = self.RE_UPDATED.match(line)
55        if m:
56            self.revision = m.group(1)
57
58        return super(RepositoryUpdateRevisionParser, self).parse_single_line(line)
59
60
61def make_hg_url(hg_host, repo_path, protocol="http", revision=None, filename=None):
62    """Helper function.
63
64    Construct a valid hg url from a base hg url (hg.mozilla.org),
65    repo_path, revision and possible filename
66    """
67    base = "%s://%s" % (protocol, hg_host)
68    repo = "/".join(p.strip("/") for p in [base, repo_path])
69    if not filename:
70        if not revision:
71            return repo
72        else:
73            return "/".join([p.strip("/") for p in [repo, "rev", revision]])
74    else:
75        assert revision
76        return "/".join([p.strip("/") for p in [repo, "raw-file", revision, filename]])
77
78
79class MercurialVCS(ScriptMixin, LogMixin, TransferMixin):
80    # For the most part, scripts import mercurial, update
81    # tag-release.py imports
82    #  apply_and_push, update, get_revision, out, BRANCH, REVISION,
83    #  get_branches, cleanOutgoingRevs
84
85    def __init__(self, log_obj=None, config=None, vcs_config=None, script_obj=None):
86        super(MercurialVCS, self).__init__()
87        self.can_share = None
88        self.log_obj = log_obj
89        self.script_obj = script_obj
90        if config:
91            self.config = config
92        else:
93            self.config = {}
94        # vcs_config = {
95        #  hg_host: hg_host,
96        #  repo: repository,
97        #  branch: branch,
98        #  revision: revision,
99        #  ssh_username: ssh_username,
100        #  ssh_key: ssh_key,
101        # }
102        self.vcs_config = vcs_config or {}
103        self.hg = self.query_exe("hg", return_type="list") + HG_OPTIONS
104
105    def _make_absolute(self, repo):
106        if repo.startswith("file://"):
107            path = repo[len("file://") :]
108            repo = "file://%s" % os.path.abspath(path)
109        elif "://" not in repo:
110            repo = os.path.abspath(repo)
111        return repo
112
113    def get_repo_name(self, repo):
114        return repo.rstrip("/").split("/")[-1]
115
116    def get_repo_path(self, repo):
117        repo = self._make_absolute(repo)
118        if repo.startswith("/"):
119            return repo.lstrip("/")
120        else:
121            return urlsplit(repo).path.lstrip("/")
122
123    def get_revision_from_path(self, path):
124        """Returns which revision directory `path` currently has checked out."""
125        return self.get_output_from_command(
126            self.hg + ["parent", "--template", "{node}"], cwd=path
127        )
128
129    def get_branch_from_path(self, path):
130        branch = self.get_output_from_command(self.hg + ["branch"], cwd=path)
131        return str(branch).strip()
132
133    def get_branches_from_path(self, path):
134        branches = []
135        for line in self.get_output_from_command(
136            self.hg + ["branches", "-c"], cwd=path
137        ).splitlines():
138            branches.append(line.split()[0])
139        return branches
140
141    def hg_ver(self):
142        """Returns the current version of hg, as a tuple of
143        (major, minor, build)"""
144        ver_string = self.get_output_from_command(self.hg + ["-q", "version"])
145        match = re.search(r"\(version ([0-9.]+)\)", ver_string)
146        if match:
147            bits = match.group(1).split(".")
148            if len(bits) < 3:
149                bits += (0,)
150            ver = tuple(int(b) for b in bits)
151        else:
152            ver = (0, 0, 0)
153        self.debug("Running hg version %s" % str(ver))
154        return ver
155
156    def update(self, dest, branch=None, revision=None):
157        """Updates working copy `dest` to `branch` or `revision`.
158        If revision is set, branch will be ignored.
159        If neither is set then the working copy will be updated to the
160        latest revision on the current branch.  Local changes will be
161        discarded.
162        """
163        # If we have a revision, switch to that
164        msg = "Updating %s" % dest
165        if branch:
166            msg += " to branch %s" % branch
167        if revision:
168            msg += " revision %s" % revision
169        self.info("%s." % msg)
170        if revision is not None:
171            cmd = self.hg + ["update", "-C", "-r", revision]
172            if self.run_command(cmd, cwd=dest, error_list=HgErrorList):
173                raise VCSException("Unable to update %s to %s!" % (dest, revision))
174        else:
175            # Check & switch branch
176            local_branch = self.get_branch_from_path(dest)
177
178            cmd = self.hg + ["update", "-C"]
179
180            # If this is different, checkout the other branch
181            if branch and branch != local_branch:
182                cmd.append(branch)
183
184            if self.run_command(cmd, cwd=dest, error_list=HgErrorList):
185                raise VCSException("Unable to update %s!" % dest)
186        return self.get_revision_from_path(dest)
187
188    def clone(self, repo, dest, branch=None, revision=None, update_dest=True):
189        """Clones hg repo and places it at `dest`, replacing whatever else
190        is there.  The working copy will be empty.
191
192        If `revision` is set, only the specified revision and its ancestors
193        will be cloned.  If revision is set, branch is ignored.
194
195        If `update_dest` is set, then `dest` will be updated to `revision`
196        if set, otherwise to `branch`, otherwise to the head of default.
197        """
198        msg = "Cloning %s to %s" % (repo, dest)
199        if branch:
200            msg += " on branch %s" % branch
201        if revision:
202            msg += " to revision %s" % revision
203        self.info("%s." % msg)
204        parent_dest = os.path.dirname(dest)
205        if parent_dest and not os.path.exists(parent_dest):
206            self.mkdir_p(parent_dest)
207        if os.path.exists(dest):
208            self.info("Removing %s before clone." % dest)
209            self.rmtree(dest)
210
211        cmd = self.hg + ["clone"]
212        if not update_dest:
213            cmd.append("-U")
214
215        if revision:
216            cmd.extend(["-r", revision])
217        elif branch:
218            # hg >= 1.6 supports -b branch for cloning
219            ver = self.hg_ver()
220            if ver >= (1, 6, 0):
221                cmd.extend(["-b", branch])
222
223        cmd.extend([repo, dest])
224        output_timeout = self.config.get(
225            "vcs_output_timeout", self.vcs_config.get("output_timeout")
226        )
227        if (
228            self.run_command(cmd, error_list=HgErrorList, output_timeout=output_timeout)
229            != 0
230        ):
231            raise VCSException("Unable to clone %s to %s!" % (repo, dest))
232
233        if update_dest:
234            return self.update(dest, branch, revision)
235
236    def common_args(self, revision=None, branch=None, ssh_username=None, ssh_key=None):
237        """Fill in common hg arguments, encapsulating logic checks that
238        depend on mercurial versions and provided arguments
239        """
240        args = []
241        if ssh_username or ssh_key:
242            opt = ["-e", "ssh"]
243            if ssh_username:
244                opt[1] += " -l %s" % ssh_username
245            if ssh_key:
246                opt[1] += " -i %s" % ssh_key
247            args.extend(opt)
248        if revision:
249            args.extend(["-r", revision])
250        elif branch:
251            if self.hg_ver() >= (1, 6, 0):
252                args.extend(["-b", branch])
253        return args
254
255    def pull(self, repo, dest, update_dest=True, **kwargs):
256        """Pulls changes from hg repo and places it in `dest`.
257
258        If `revision` is set, only the specified revision and its ancestors
259        will be pulled.
260
261        If `update_dest` is set, then `dest` will be updated to `revision`
262        if set, otherwise to `branch`, otherwise to the head of default.
263        """
264        msg = "Pulling %s to %s" % (repo, dest)
265        if update_dest:
266            msg += " and updating"
267        self.info("%s." % msg)
268        if not os.path.exists(dest):
269            # Error or clone?
270            # If error, should we have a halt_on_error=False above?
271            self.error("Can't hg pull in  nonexistent directory %s." % dest)
272            return -1
273        # Convert repo to an absolute path if it's a local repository
274        repo = self._make_absolute(repo)
275        cmd = self.hg + ["pull"]
276        cmd.extend(self.common_args(**kwargs))
277        cmd.append(repo)
278        output_timeout = self.config.get(
279            "vcs_output_timeout", self.vcs_config.get("output_timeout")
280        )
281        if (
282            self.run_command(
283                cmd, cwd=dest, error_list=HgErrorList, output_timeout=output_timeout
284            )
285            != 0
286        ):
287            raise VCSException("Can't pull in %s!" % dest)
288
289        if update_dest:
290            branch = self.vcs_config.get("branch")
291            revision = self.vcs_config.get("revision")
292            return self.update(dest, branch=branch, revision=revision)
293
294    # Defines the places of attributes in the tuples returned by `out'
295
296    def out(self, src, remote, **kwargs):
297        """Check for outgoing changesets present in a repo"""
298        self.info("Checking for outgoing changesets from %s to %s." % (src, remote))
299        cmd = self.hg + ["-q", "out", "--template", "{node} {branches}\n"]
300        cmd.extend(self.common_args(**kwargs))
301        cmd.append(remote)
302        if os.path.exists(src):
303            try:
304                revs = []
305                for line in (
306                    self.get_output_from_command(cmd, cwd=src, throw_exception=True)
307                    .rstrip()
308                    .split("\n")
309                ):
310                    try:
311                        rev, branch = line.split()
312                    # Mercurial displays no branch at all if the revision
313                    # is on "default"
314                    except ValueError:
315                        rev = line.rstrip()
316                        branch = "default"
317                    revs.append((rev, branch))
318                return revs
319            except subprocess.CalledProcessError as inst:
320                # In some situations, some versions of Mercurial return "1"
321                # if no changes are found, so we need to ignore this return
322                # code
323                if inst.returncode == 1:
324                    return []
325                raise
326
327    def push(self, src, remote, push_new_branches=True, **kwargs):
328        # This doesn't appear to work with hg_ver < (1, 6, 0).
329        # Error out, or let you try?
330        self.info("Pushing new changes from %s to %s." % (src, remote))
331        cmd = self.hg + ["push"]
332        cmd.extend(self.common_args(**kwargs))
333        if push_new_branches and self.hg_ver() >= (1, 6, 0):
334            cmd.append("--new-branch")
335        cmd.append(remote)
336        status = self.run_command(
337            cmd,
338            cwd=src,
339            error_list=HgErrorList,
340            success_codes=(0, 1),
341            return_type="num_errors",
342        )
343        if status:
344            raise VCSException("Can't push %s to %s!" % (src, remote))
345        return status
346
347    @property
348    def robustcheckout_path(self):
349        """Path to the robustcheckout extension."""
350        ext = os.path.join(external_tools_path, "robustcheckout.py")
351        if os.path.exists(ext):
352            return ext
353
354    def ensure_repo_and_revision(self):
355        """Makes sure that `dest` is has `revision` or `branch` checked out
356        from `repo`.
357
358        Do what it takes to make that happen, including possibly clobbering
359        dest.
360        """
361        c = self.vcs_config
362        dest = c["dest"]
363        repo_url = c["repo"]
364        rev = c.get("revision")
365        branch = c.get("branch")
366        purge = c.get("clone_with_purge", False)
367        upstream = c.get("clone_upstream_url")
368
369        # The API here is kind of bad because we're relying on state in
370        # self.vcs_config instead of passing arguments. This confuses
371        # scripts that have multiple repos. This includes the clone_tools()
372        # step :(
373
374        if not rev and not branch:
375            self.warning('did not specify revision or branch; assuming "default"')
376            branch = "default"
377
378        share_base = c.get("vcs_share_base") or os.environ.get("HG_SHARE_BASE_DIR")
379        if share_base and c.get("use_vcs_unique_share"):
380            # Bug 1277041 - update migration scripts to support robustcheckout
381            # fake a share but don't really share
382            share_base = os.path.join(share_base, hashlib.md5(dest).hexdigest())
383
384        # We require shared storage is configured because it guarantees we
385        # only have 1 local copy of logical repo stores.
386        if not share_base:
387            raise VCSException(
388                "vcs share base not defined; " "refusing to operate sub-optimally"
389            )
390
391        if not self.robustcheckout_path:
392            raise VCSException("could not find the robustcheckout Mercurial extension")
393
394        # Log HG version and install info to aid debugging.
395        self.run_command(self.hg + ["--version"])
396        self.run_command(self.hg + ["debuginstall", "--config=ui.username=worker"])
397
398        args = self.hg + [
399            "--config",
400            "extensions.robustcheckout=%s" % self.robustcheckout_path,
401            "robustcheckout",
402            repo_url,
403            dest,
404            "--sharebase",
405            share_base,
406        ]
407        if purge:
408            args.append("--purge")
409        if upstream:
410            args.extend(["--upstream", upstream])
411
412        if rev:
413            args.extend(["--revision", rev])
414        if branch:
415            args.extend(["--branch", branch])
416
417        parser = RepositoryUpdateRevisionParser(
418            config=self.config, log_obj=self.log_obj
419        )
420        if self.run_command(args, output_parser=parser):
421            raise VCSException("repo checkout failed!")
422
423        if not parser.revision:
424            raise VCSException("could not identify revision updated to")
425
426        return parser.revision
427
428    def cleanOutgoingRevs(self, reponame, remote, username, sshKey):
429        # TODO retry
430        self.info("Wiping outgoing local changes from %s to %s." % (reponame, remote))
431        outgoingRevs = self.out(
432            src=reponame, remote=remote, ssh_username=username, ssh_key=sshKey
433        )
434        for r in reversed(outgoingRevs):
435            self.run_command(
436                self.hg + ["strip", "-n", r[REVISION]],
437                cwd=reponame,
438                error_list=HgErrorList,
439            )
440
441    def query_pushinfo(self, repository, revision):
442        """Query the pushdate and pushid of a repository/revision.
443        This is intended to be used on hg.mozilla.org/mozilla-central and
444        similar. It may or may not work for other hg repositories.
445        """
446        PushInfo = namedtuple("PushInfo", ["pushid", "pushdate"])
447
448        try:
449            url = "%s/json-pushes?changeset=%s" % (repository, revision)
450            self.info("Pushdate URL is: %s" % url)
451            contents = self.retry(self.load_json_from_url, args=(url,))
452
453            # The contents should be something like:
454            # {
455            #   "28537": {
456            #    "changesets": [
457            #     "1d0a914ae676cc5ed203cdc05c16d8e0c22af7e5",
458            #    ],
459            #    "date": 1428072488,
460            #    "user": "user@mozilla.com"
461            #   }
462            # }
463            #
464            # So we grab the first element ("28537" in this case) and then pull
465            # out the 'date' field.
466            pushid = next(contents.keys())
467            self.info("Pushid is: %s" % pushid)
468            pushdate = contents[pushid]["date"]
469            self.info("Pushdate is: %s" % pushdate)
470            return PushInfo(pushid, pushdate)
471
472        except Exception:
473            self.exception("Failed to get push info from hg.mozilla.org")
474            raise
475
476
477# __main__ {{{1
478if __name__ == "__main__":
479    pass
480