#!/usr/bin/env python # -*- coding: utf-8 -*- # This file is part of Cockpit. # # Copyright (C) 2013 Red Hat, Inc. # # Cockpit is free software; you can redistribute it and/or modify it # under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or # (at your option) any later version. # # Cockpit is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with Cockpit; If not, see . import argparse import datetime import fnmatch import re import os import sys sys.dont_write_bytecode = True from task import github BOTS = os.path.join(os.path.dirname(__file__)) def main(): parser = argparse.ArgumentParser(description='Check a traceback for a known issue') parser.add_argument('-o', "--offline", action='store_true', help="Work offline, don't fetch new data from origin for rebase") parser.add_argument('image', help="The image to check against") opts = parser.parse_args() api = None if opts.offline else github.GitHub() trace = sys.stdin.read() number = 0 try: if trace: number = check_known_issue(api, trace, opts.image) except RuntimeError, ex: sys.stderr.write("image-naughty: {0}\n".format(ret)) return 1 if number: post_github(api, number, trace, opts.image) sys.stdout.write("{0}\n".format(number)) return 0; def normalize_traceback(trace): # All file paths converted to basename return re.sub(r'File "[^"]*/([^/"]+)"', 'File "\\1"', trace.strip()) def list_directories(dirs): result = [ ]; for d in dirs: for f in os.listdir(d): result.append(os.path.join(d, f)) return result def check_known_issue(api, trace, image): directories = [ ] image_naughty = os.path.join(BOTS, "naughty", image) if os.path.exists(image_naughty): directories.append(image_naughty) trace = normalize_traceback(trace) number = 0 for naughty in list_directories(directories): (prefix, unused, name) = os.path.basename(naughty).partition("-") try: n = int(prefix) except: continue with open(naughty, "r") as fp: match = "*" + normalize_traceback(fp.read()) + "*" # Match as in a file name glob, albeit multi line, and account for literal pastes with '[]' if fnmatch.fnmatchcase(trace, match) or fnmatch.fnmatchcase(trace, match.replace("[", "?")): number = n return number def redact_audit_variables(message): """ Reformat audit events so that the same error recorded at different times will match when using string comparison Match lines like Error: audit: type=1400 audit(1458739098.632:268): avc: denied { read } for pid=1290 comm="ssh-transport-c" \ name="unix" dev="proc" ino=4026532021 scontext=system_u:system_r:cockpit_ws_t:s0 \ tcontext=system_u:object_r:proc_net_t:s0 tclass=file permissive=0 Error: audit: type=1401 audit(1461925292.392:293): op=security_compute_av reason=bounds \ scontext=system_u:system_r:init_t:s0 tcontext=system_u:system_r:docker_t:s0 tclass=process perms=siginh It will ignore changed timestamp, pid and ino entries """ audit_timestamp_re = re.compile(r"""(^\s*Error: audit:.+audit\()([0-9\.\:]+)(.*)""") audit_pid_re = re.compile(r"""(.*pid=)([0-9]+)(.*)""") audit_ino_re = re.compile(r"""(.*ino=)([0-9]+)(.*)""") lines = message.split("\n") for line_idx, line in enumerate(lines): if line.strip().startswith("Error: audit:"): m = audit_timestamp_re.match(line) if m and len(m.groups()) == 3: fields = list(m.groups()) fields[1] = "[timestamp]" line = "".join(fields) m = audit_pid_re.match(line) if m and len(m.groups()) == 3: fields = list(m.groups()) fields[1] = "[pid]" line = "".join(fields) m = audit_ino_re.match(line) if m and len(m.groups()) == 3: fields = list(m.groups()) fields[1] = "[ino]" line = "".join(fields) lines[line_idx] = line return "\n".join(lines) # Update a known issue thread on GitHub # # The idea is to combine repeated errors into fewer commits by # editing them and keeping all relevant information. # # For this we keep one comment per context (e.g. 'verify/fedora-24') # and divide that into sections, one each per error description / trace. # In each section, we keep the error description / trace as well as # the number of recorded events, the first occurrence and the last 10 # occurrences. # For each (listed) occurrence we display the timestamp and some details # provided by the caller, such as a revision or link to log files. # The details can't contain newline characters and should be brief def update_known_issue(api, number, err, details, context, timestamp=None): timestamp = timestamp or datetime.datetime.now().isoformat() link = timestamp if details: link = "{0} | {1}".format(timestamp, details) comments = issue_comments(api, number) # try to find an existing comment to update comment_key = "{0}\n".format(context) err_key = """ ``` {0} ```""".format(err.strip()) redacted_err_key = redact_audit_variables(err_key) latest_occurrences = "Latest occurrences:\n\n" for comment in reversed(comments): if 'body' in comment and comment['body'].startswith(comment_key): parts = comment['body'].split("
") updated = False for part_idx, part in enumerate(parts): if redact_audit_variables(part).startswith(redacted_err_key): latest = part.split(latest_occurrences) if len(latest) < 2: sys.stderr.write("Error while parsing latest occurrences\n") else: # number of times this error was recorded header = latest[0].split("\n") for header_idx, entry in enumerate(header): if entry.startswith("Times recorded: "): rec_entries = entry.split(" ") rec_entries[-1] = str(int(rec_entries[-1]) + 1) header[header_idx] = " ".join(rec_entries) latest[0] = "\n".join(header) # list of recent occurrences occurrences = filter(None, latest[1].split("\n")) occurrences.append("- {0}\n".format(link)) # only keep the last 10 if len(occurrences) > 10: occurrences.pop(0) parts[part_idx] = "{0}{1}{2}".format(latest[0], latest_occurrences, "\n".join(occurrences)) updated = True break if not updated: parts.append("""{0} First occurrence: {1} Times recorded: 1 {2}- {1} """.format(err_key, link, latest_occurrences)) updated = True # This comment is already too long body = "
".join(parts) if len(body) >= 65536: break # update comment, no need to check others return api.patch("issues/comments/{0}".format(comment['id']), { "body": body }) # create a new comment, since we didn't find one to update data = { "body": """{0}\nOoops, it happened again
{1} First occurrence: {2} Times recorded: 1 {3}- {2} """.format(context, err_key, link, latest_occurrences) } return api.post("issues/{0}/comments".format(number), data) def issue_comments(api, number): result = [ ] page = 1 count = 100 while count == 100: comments = api.get("issues/{0}/comments?page={1}&per_page={2}".format(number, page, count)) count = 0 page += 1 if comments: result += comments count = len(comments) return result def post_github(api, number, trace, image): # Ignore this if we were not given a token if not api or not api.available: return context = "verify/{0}".format(image) # Lookup the link being logged to link = None revision = os.environ.get("TEST_REVISION", None) if revision: link = "revision {0}".format(revision) statuses = api.get("commits/{0}/statuses".format(revision)) if statuses: for status in statuses: if status["context"] == context: link = "revision {0}, [logs]({1})".format(revision, status["target_url"]) break update_known_issue(api, number, trace, link, context) if __name__ == '__main__': sys.exit(main())