1#!/usr/bin/env python
2"""Simple tools to query github.com and gather stats about issues.
3
4Taken from ipython
5
6"""
7# -----------------------------------------------------------------------------
8# Imports
9# -----------------------------------------------------------------------------
10
11import json
12import re
13import sys
14
15from datetime import datetime, timedelta
16from subprocess import check_output
17from urllib.request import urlopen
18
19
20# -----------------------------------------------------------------------------
21# Globals
22# -----------------------------------------------------------------------------
23
24ISO8601 = "%Y-%m-%dT%H:%M:%SZ"
25PER_PAGE = 100
26
27element_pat = re.compile(r'<(.+?)>')
28rel_pat = re.compile(r'rel=[\'"](\w+)[\'"]')
29
30LAST_RELEASE = datetime(2015, 3, 18)
31
32# -----------------------------------------------------------------------------
33# Functions
34# -----------------------------------------------------------------------------
35
36
37def parse_link_header(headers):
38    link_s = headers.get('link', '')
39    urls = element_pat.findall(link_s)
40    rels = rel_pat.findall(link_s)
41    d = {}
42    for rel, url in zip(rels, urls):
43        d[rel] = url
44    return d
45
46
47def get_paged_request(url):
48    """Get a full list, handling APIv3's paging."""
49    results = []
50    while url:
51        print("fetching %s" % url, file=sys.stderr)
52        f = urlopen(url)
53        results.extend(json.load(f))
54        links = parse_link_header(f.headers)
55        url = links.get('next')
56    return results
57
58
59def get_issues(project="dipy/dipy", state="closed", pulls=False):
60    """Get a list of the issues from the Github API."""
61    which = 'pulls' if pulls else 'issues'
62    url = "https://api.github.com/repos/%s/%s?state=%s&per_page=%i" \
63          % (project, which, state, PER_PAGE)
64    return get_paged_request(url)
65
66
67def _parse_datetime(s):
68    """Parse dates in the format returned by the Github API."""
69    if s:
70        return datetime.strptime(s, ISO8601)
71    else:
72        return datetime.fromtimestamp(0)
73
74
75def issues2dict(issues):
76    """Convert a list of issues to a dict, keyed by issue number."""
77    idict = {}
78    for i in issues:
79        idict[i['number']] = i
80    return idict
81
82
83def is_pull_request(issue):
84    """Return True if the given issue is a pull request."""
85    return 'pull_request_url' in issue
86
87
88def issues_closed_since(period=LAST_RELEASE, project="dipy/dipy", pulls=False):
89    """Get all issues closed since a particular point in time.
90
91    Period can either be a datetime object, or a timedelta object. In the
92    latter case, it is used as a time before the present.
93
94    """
95    which = 'pulls' if pulls else 'issues'
96
97    if isinstance(period, timedelta):
98        period = datetime.now() - period
99    url = "https://api.github.com/repos/%s/%s?state=closed&sort=updated&" \
100          "since=%s&per_page=%i" % (project, which, period.strftime(ISO8601),
101                                    PER_PAGE)
102
103    allclosed = get_paged_request(url)
104    # allclosed = get_issues(project=project, state='closed', pulls=pulls,
105    #                        since=period)
106    filtered = [i for i in allclosed
107                if _parse_datetime(i['closed_at']) > period]
108
109    # exclude rejected PRs
110    if pulls:
111        filtered = [pr for pr in filtered if pr['merged_at']]
112
113    return filtered
114
115
116def sorted_by_field(issues, field='closed_at', reverse=False):
117    """Return a list of issues sorted by closing date date."""
118    return sorted(issues, key=lambda i: i[field], reverse=reverse)
119
120
121def report(issues, show_urls=False):
122    """Summary report about a list of issues, printing number and title."""
123    # titles may have unicode in them, so we must encode everything below
124    if show_urls:
125        for i in issues:
126            role = 'ghpull' if 'merged_at' in i else 'ghissue'
127            print('* :%s:`%d`: %s' % (role, i['number'],
128                                      i['title']))
129    else:
130        for i in issues:
131            print('* %d: %s' % (i['number'], i['title']))
132
133# -----------------------------------------------------------------------------
134# Main script
135# -----------------------------------------------------------------------------
136
137if __name__ == "__main__":
138    # Whether to add reST urls for all issues in printout.
139    show_urls = True
140
141    # By default, search one month back
142    tag = None
143    if len(sys.argv) > 1:
144        try:
145            days = int(sys.argv[1])
146        except:
147            tag = sys.argv[1]
148    else:
149        tag = check_output(['git', 'describe', '--abbrev=0'],
150                           universal_newlines=True).strip()
151
152    if tag:
153        cmd = ['git', 'log', '-1', '--format=%ai', tag]
154        tagday, tz = check_output(cmd,
155                                  universal_newlines=True).strip().rsplit(' ',
156                                                                          1)
157        since = datetime.strptime(tagday, "%Y-%m-%d %H:%M:%S")
158    else:
159        since = datetime.now() - timedelta(days=days)
160
161    print("fetching GitHub stats since %s (tag: %s)" % (since, tag),
162          file=sys.stderr)
163    # turn off to play interactively without redownloading, use %run -i
164    if 1:
165        issues = issues_closed_since(since, pulls=False)
166        pulls = issues_closed_since(since, pulls=True)
167
168    # For regular reports, it's nice to show them in reverse
169    # chronological order
170    issues = sorted_by_field(issues, reverse=True)
171    pulls = sorted_by_field(pulls, reverse=True)
172
173    n_issues, n_pulls = map(len, (issues, pulls))
174    n_total = n_issues + n_pulls
175
176    # Print summary report we can directly include into release notes.
177    print()
178    since_day = since.strftime("%Y/%m/%d")
179    today = datetime.today().strftime("%Y/%m/%d")
180    print("GitHub stats for %s - %s (tag: %s)" % (since_day, today, tag))
181    print()
182    print("These lists are automatically generated, and may be incomplete or"
183          " contain duplicates.")
184    print()
185    if tag:
186        # print git info, in addition to GitHub info:
187        since_tag = tag + '..'
188        cmd = ['git', 'log', '--oneline', since_tag]
189        ncommits = len(check_output(cmd, universal_newlines=True).splitlines())
190
191        author_cmd = ['git', 'log', '--format=* %aN', since_tag]
192        all_authors = check_output(author_cmd, universal_newlines=True) \
193            .splitlines()
194        unique_authors = sorted(set(all_authors))
195
196        if len(unique_authors) == 0:
197            print("No commits during this period.")
198        else:
199            print("The following %i authors contributed %i commits." %
200                  (len(unique_authors), ncommits))
201            print()
202            print('\n'.join(unique_authors))
203            print()
204
205            print()
206            print("We closed a total of %d issues, %d pull requests and %d"
207                  " regular issues;\n"
208                  "this is the full list (generated with the script \n"
209                  ":file:`tools/github_stats.py`):" % (n_total, n_pulls,
210                                                       n_issues))
211            print()
212            print('Pull Requests (%d):\n' % n_pulls)
213            report(pulls, show_urls)
214            print()
215            print('Issues (%d):\n' % n_issues)
216            report(issues, show_urls)
217