1# This Source Code Form is subject to the terms of the Mozilla Public
2# License, v. 2.0. If a copy of the MPL was not distributed with this
3# file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
5
6# There seem to be sometimes identical events recorded twice by telemetry
7def sanitize(rows):
8    newrows = []
9    pcid = "unset"
10    psid = "unset"
11    pseq = "unset"
12    for row in rows:
13        cid = row["client_id"]
14        sid = row["session_id"]
15        seq = row["seq"]
16        if cid != pcid or sid != psid or seq != pseq:
17            newrows.append(row)
18        pcid = cid
19        psid = sid
20        pseq = seq
21
22    return newrows
23
24
25# Given a set of rows, find all distinct build ids
26def extractBuildIDs(rows):
27    buildids = {}
28    for row in rows:
29        id = row["build_id"]
30        if id in buildids:
31            buildids[id] = buildids[id] + 1
32        else:
33            buildids[id] = 1
34    return buildids
35
36
37# Given a set of build ids and rows, enrich each row by an hg link.
38# Relys on the result of utils.fetchBuildRevisions in buildids.
39def constructHGLinks(buildids, rows):
40    for row in rows:
41        id = row["build_id"]
42        if id in buildids:
43            row["location"] = (
44                buildids[id] + "/" + row["source_file"] + "#l" + row["source_line"]
45            )
46        else:
47            row["location"] = id + "/" + row["source_file"] + "#l" + row["source_line"]
48
49
50topmost_stackframes = set()
51delta_frames = {}
52
53
54def isTopmostFrame(frame):
55    f = (frame["location"], frame["result"])
56    return f in topmost_stackframes
57
58
59def addTopmostFrame(frame):
60    f = (frame["location"], frame["result"])
61    if not isTopmostFrame(frame):
62        # print("Found new topmost frame {}.".format(frame))
63        topmost_stackframes.add(f)
64        frame["topmost"] = True
65
66
67def addFrameDelta(frame1, frame2):
68    if frame1["client_id"] != frame2["client_id"]:
69        return
70    if frame1["session_id"] != frame2["session_id"]:
71        return
72
73    fkey = "{}:{}-{}:{}".format(
74        frame2["location"], frame2["result"], frame1["location"], frame1["result"]
75    )
76    if fkey not in delta_frames:
77        fdelta = {"delta_sum": 0, "delta_cnt": 0}
78        fdelta["prev_row"] = frame1
79        fdelta["candidate"] = frame2
80        delta_frames[fkey] = fdelta
81
82    fdelta = delta_frames[fkey]
83    etv1 = frame1["event_timestamp"]
84    etv2 = frame2["event_timestamp"]
85    if isinstance(etv1, int) and isinstance(etv2, int) and etv2 > etv1:
86        delta = etv2 - etv1
87        fdelta["delta_sum"] = fdelta["delta_sum"] + delta
88        fdelta["delta_cnt"] = fdelta["delta_cnt"] + 1
89
90
91# There can be outliers in terms of time distance between two stack frames
92# that belong to the same propagation stack. In order to not increase the
93# risk that one outlier breaks thousands of stacks, we check for the average
94# time distance.
95def checkAverageFrameTimeDeltas(rows, max_delta):
96    # print("checkAverageFrameTimeDeltas")
97    prev_row = None
98    for row in rows:
99        if "topmost" in row or not row["session_complete"]:
100            prev_row = None
101            continue
102
103        if prev_row:
104            addFrameDelta(prev_row, row)
105        prev_row = row
106
107    for fd in delta_frames:
108        sum = delta_frames[fd]["delta_sum"]
109        cnt = delta_frames[fd]["delta_cnt"]
110        if cnt > 0 and (sum / cnt) > max_delta:
111            # print(delta_frames[fd])
112            addTopmostFrame(delta_frames[fd]["candidate"])
113
114
115# A topmost frame is considered to initiate a new raw stack. We collect all
116# candidates before we actually apply them. This implies, that we should run
117# this function on a "large enough" sample of rows to be more accurate.
118# As a side effect, we mark all rows that are part of a "complete" session
119# (a session, that started within our data scope).
120def collectTopmostFrames(rows):
121    prev_cid = "unset"
122    prev_sid = "unset"
123    prev_ctx = "unset"
124    prev_sev = "ERROR"
125    session_complete = False
126    after_severity_downgrade = False
127    for row in rows:
128        cid = row["client_id"]
129        sid = row["session_id"]
130        ctx = row["context"]
131        seq = row["seq"]
132        sev = row["severity"]
133
134        # If we have a new session, ensure it is complete from start,
135        # otherwise we will ignore it entirely.
136        if cid != prev_cid or sid != prev_sid:
137            if seq == 1:
138                session_complete = True
139            else:
140                session_complete = False
141        row["session_complete"] = session_complete
142        if session_complete:
143            # If we change client, session or context, we can be sure to have
144            # a new topmost frame.
145            if seq == 1 or cid != prev_cid or sid != prev_sid or ctx != prev_ctx:
146                addTopmostFrame(row)
147                after_severity_downgrade = False
148            # We do not expect a non-error to be ever upgraded to an error
149            elif sev == "ERROR" and prev_sev != "ERROR":
150                addTopmostFrame(row)
151                after_severity_downgrade = False
152            # If we just had a severity downgrade, we assume that we wanted
153            # to break the error propagation after this point and split, too
154            elif after_severity_downgrade:
155                addTopmostFrame(row)
156                after_severity_downgrade = False
157            elif prev_sev == "ERROR" and sev != "ERROR":
158                after_severity_downgrade = True
159
160        prev_cid = cid
161        prev_sid = sid
162        prev_ctx = ctx
163        prev_sev = sev
164
165    # Should be ms. We've seen quite some runtime between stackframes in the
166    # wild. We might want to consider to make this configurable. In general
167    # we prefer local context over letting slip through some topmost frame
168    # unrecognized, assuming that fixing the issues one by one they will
169    # uncover them succesively. This is achieved by a rather high delta value.
170    max_avg_delta = 200
171    checkAverageFrameTimeDeltas(rows, max_avg_delta)
172
173
174def getFrameKey(frame):
175    return "{}.{}|".format(frame["location"], frame["result"])
176
177
178def getStackKey(stack):
179    stack_key = ""
180    for frame in stack["frames"]:
181        stack_key += getFrameKey(frame)
182    return hash(stack_key)
183
184
185# A "raw stack" is a list of frames, that:
186# - share the same build_id (implicitely through location)
187# - share the same client_id
188# - share the same session_id
189# - has a growing sequence number
190# - stops at the first downgrade of severity from ERROR to else
191# - XXX: contains each location at most once (no recursion)
192# - appears to be in a reasonable short timeframe
193# Calculates also a hash key to identify identical stacks
194def collectRawStacks(rows):
195    collectTopmostFrames(rows)
196    raw_stacks = []
197    stack = {
198        "stack_id": "unset",
199        "client_id": "unset",
200        "session_id": "unset",
201        "submit_timeabs": "unset",
202        "frames": [{"location": "unset"}],
203    }
204    stack_id = 1
205    first = True
206    for row in rows:
207        if isTopmostFrame(row):
208            if not first:
209                stack["stack_key"] = getStackKey(stack)
210                raw_stacks.append(stack)
211            stack_id += 1
212            stack = {
213                "stack_id": stack_id,
214                "client_id": row["client_id"],
215                "session_id": row["session_id"],
216                "submit_timeabs": row["submit_timeabs"],
217                "context": row["context"],
218                "frames": [],
219            }
220
221        stack["frames"].append(
222            {
223                "location": row["location"],
224                "source_file": row["source_file"],
225                "source_line": row["source_line"],
226                "seq": row["seq"],
227                "severity": row["severity"],
228                "result": row["result"],
229            }
230        )
231        first = False
232
233    return raw_stacks
234
235
236# Merge all stacks that have the same hash key and count occurences.
237# Relys on the ordering per client_id/session_id for correct counting.
238def mergeEqualStacks(raw_stacks):
239    merged_stacks = {}
240    last_client_id = "none"
241    last_session_id = "none"
242    for stack in raw_stacks:
243        stack_key = stack["stack_key"]
244        merged_stack = stack
245        if stack_key in merged_stacks:
246            merged_stack = merged_stacks[stack_key]
247            if stack["client_id"] != last_client_id:
248                last_client_id = stack["client_id"]
249                merged_stack["client_count"] += 1
250            if stack["session_id"] != last_session_id:
251                last_session_id = stack["session_id"]
252                merged_stack["session_count"] += 1
253            merged_stack["hit_count"] += 1
254        else:
255            merged_stack["client_count"] = 1
256            last_client_id = merged_stack["client_id"]
257            merged_stack["session_count"] = 1
258            last_session_id = merged_stack["session_id"]
259            merged_stack["hit_count"] = 1
260            merged_stacks[stack_key] = merged_stack
261
262    merged_list = list(merged_stacks.values())
263    merged_list.sort(key=lambda x: x.get("hit_count"), reverse=True)
264    return merged_list
265
266
267# Split the list of stacks into:
268# - aborted (has at least one frame with NS_ERROR_ABORT)
269# - info/warning (has at least one frame with that severity)
270# - error (has only error frames)
271def filterStacksForPropagation(
272    all_stacks, error_stacks, warn_stacks, info_stacks, abort_stacks
273):
274    for stack in all_stacks:
275        warn = list(filter(lambda x: x["severity"] == "WARNING", stack["frames"]))
276        info = list(filter(lambda x: x["severity"] == "INFO", stack["frames"]))
277        abort = list(filter(lambda x: x["result"] == "NS_ERROR_ABORT", stack["frames"]))
278        if len(abort) > 0:
279            abort_stacks.append(stack)
280        elif len(info) > 0:
281            info_stacks.append(stack)
282        elif len(warn) > 0:
283            warn_stacks.append(stack)
284        else:
285            error_stacks.append(stack)
286
287
288# Bugzilla comment markup
289def printStacks(stacks):
290    out = ""
291    row_format = "{} | {} | {} | {} | {}\n"
292    out += row_format.format("Clients", "Sessions", "Hits", "Anchor", "Stack")
293    out += row_format.format("-------", "-------", "--------", "--------", "--------")
294    for stack in stacks:
295        framestr = ""
296        first = True
297        for frame in stack["frames"]:
298            if not first:
299                framestr += " <- "
300            framestr += "[{}#{}:{}]({})".format(
301                frame["source_file"],
302                frame["source_line"],
303                frame["result"],
304                frame["location"],
305            )
306            first = False
307        out += row_format.format(
308            stack["client_count"],
309            stack["session_count"],
310            stack["hit_count"],
311            stack["frames"][0]["anchor"],
312            framestr,
313        )
314
315    return out
316
317
318def groupStacksForAnchors(stacks):
319    anchors = {}
320    for stack in stacks:
321        anchor_name = stack["frames"][0]["anchor"]
322        if anchor_name in anchors:
323            anchors[anchor_name]["stacks"].append(stack)
324        else:
325            anchor = {"anchor": anchor_name, "stacks": [stack]}
326            anchors[anchor_name] = anchor
327    return anchors
328
329
330"""
331def getSummaryForAnchor(anchor):
332    return "[QM_TRY] Errors in function {}".format(anchor)
333
334
335def searchBugForAnchor(bugzilla_key, anchor):
336    summary = getSummaryForAnchor(anchor)
337    bug_url = "https://bugzilla.mozilla.org/rest/bug?" \
338              "summary={}&api_key={}".format(summary, bugzilla_key)
339    return requests.get(url=bug_url).json()["bugs"]
340
341
342def createBugForAnchor(bugzilla_key, anchor):
343    summary = getSummaryForAnchor(anchor)
344    bug_url = "https://bugzilla.mozilla.org/rest/bug?" \
345              "Bugzilla_api_key={}".format(bugzilla_key)
346    body = {
347        "product" : "Core",
348        "component" : "Storage: Quota Manager",
349        "version" : "unspecified",
350        "summary" : summary,
351        "description" : "This bug collects errors reported by QM_TRY"
352                        "macros for function {}.".format(anchor),
353    }
354    resp = requests.post(url=bug_url, json=body)
355    if resp.status_code != 200:
356        print(resp)
357        return 0
358    id = resp.json()["id"]
359    print("Added new bug {}:".format(id))
360    return id
361
362
363def ensureBugForAnchor(bugzilla_key, anchor):
364    buglist = searchBugForAnchor(bugzilla_key, anchor)
365    if (len(buglist) > 0):
366        id = buglist[0]["id"]
367        print("Found existing bug {}:".format(id))
368        return id
369    return createBugForAnchor(bugzilla_key, anchor)
370
371
372def addCommentForAnchor(bugzilla_key, anchor, stacks):
373    id = ensureBugForAnchor(bugzilla_key, anchor)
374    if (id <= 0):
375        print("Unable to create a bug for {}.".format(anchor))
376        return
377    comment = printStacks(stacks)
378    print("")
379    print("Add comment to bug {}:".format(id))
380    print(comment)
381
382
383def addCommentsForStacks(bugzilla_key, stacks):
384    anchors = groupStacksForAnchors(stacks)
385    for anchor in anchors:
386        addCommentForAnchor(bugzilla_key, anchors[anchor]["anchor"], anchors[anchor]["stacks"])
387"""
388