#!/usr/bin/env python
# -*- coding: utf-8 -*-
# This file is part of Cockpit.
#
# Copyright (C) 2013 Red Hat, Inc.
#
# Cockpit is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 2.1 of the License, or
# (at your option) any later version.
#
# Cockpit is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with Cockpit; If not, see .
import argparse
import datetime
import fnmatch
import re
import os
import sys
sys.dont_write_bytecode = True
from task import github
BOTS = os.path.join(os.path.dirname(__file__))
def main():
parser = argparse.ArgumentParser(description='Check a traceback for a known issue')
parser.add_argument('-o', "--offline", action='store_true',
help="Work offline, don't fetch new data from origin for rebase")
parser.add_argument('image', help="The image to check against")
opts = parser.parse_args()
api = None if opts.offline else github.GitHub()
trace = sys.stdin.read()
number = 0
try:
if trace:
number = check_known_issue(api, trace, opts.image)
except RuntimeError, ex:
sys.stderr.write("image-naughty: {0}\n".format(ret))
return 1
if number:
post_github(api, number, trace, opts.image)
sys.stdout.write("{0}\n".format(number))
return 0;
def normalize_traceback(trace):
# All file paths converted to basename
return re.sub(r'File "[^"]*/([^/"]+)"', 'File "\\1"', trace.strip())
def list_directories(dirs):
result = [ ];
for d in dirs:
for f in os.listdir(d):
result.append(os.path.join(d, f))
return result
def check_known_issue(api, trace, image):
directories = [ ]
image_naughty = os.path.join(BOTS, "naughty", image)
if os.path.exists(image_naughty):
directories.append(image_naughty)
trace = normalize_traceback(trace)
number = 0
for naughty in list_directories(directories):
(prefix, unused, name) = os.path.basename(naughty).partition("-")
try:
n = int(prefix)
except:
continue
with open(naughty, "r") as fp:
match = "*" + normalize_traceback(fp.read()) + "*"
# Match as in a file name glob, albeit multi line, and account for literal pastes with '[]'
if fnmatch.fnmatchcase(trace, match) or fnmatch.fnmatchcase(trace, match.replace("[", "?")):
number = n
return number
def redact_audit_variables(message):
""" Reformat audit events so that the same error recorded at different
times will match when using string comparison
Match lines like
Error: audit: type=1400 audit(1458739098.632:268): avc: denied { read } for pid=1290 comm="ssh-transport-c" \
name="unix" dev="proc" ino=4026532021 scontext=system_u:system_r:cockpit_ws_t:s0 \
tcontext=system_u:object_r:proc_net_t:s0 tclass=file permissive=0
Error: audit: type=1401 audit(1461925292.392:293): op=security_compute_av reason=bounds \
scontext=system_u:system_r:init_t:s0 tcontext=system_u:system_r:docker_t:s0 tclass=process perms=siginh
It will ignore changed timestamp, pid and ino entries
"""
audit_timestamp_re = re.compile(r"""(^\s*Error: audit:.+audit\()([0-9\.\:]+)(.*)""")
audit_pid_re = re.compile(r"""(.*pid=)([0-9]+)(.*)""")
audit_ino_re = re.compile(r"""(.*ino=)([0-9]+)(.*)""")
lines = message.split("\n")
for line_idx, line in enumerate(lines):
if line.strip().startswith("Error: audit:"):
m = audit_timestamp_re.match(line)
if m and len(m.groups()) == 3:
fields = list(m.groups())
fields[1] = "[timestamp]"
line = "".join(fields)
m = audit_pid_re.match(line)
if m and len(m.groups()) == 3:
fields = list(m.groups())
fields[1] = "[pid]"
line = "".join(fields)
m = audit_ino_re.match(line)
if m and len(m.groups()) == 3:
fields = list(m.groups())
fields[1] = "[ino]"
line = "".join(fields)
lines[line_idx] = line
return "\n".join(lines)
# Update a known issue thread on GitHub
#
# The idea is to combine repeated errors into fewer commits by
# editing them and keeping all relevant information.
#
# For this we keep one comment per context (e.g. 'verify/fedora-24')
# and divide that into sections, one each per error description / trace.
# In each section, we keep the error description / trace as well as
# the number of recorded events, the first occurrence and the last 10
# occurrences.
# For each (listed) occurrence we display the timestamp and some details
# provided by the caller, such as a revision or link to log files.
# The details can't contain newline characters and should be brief
def update_known_issue(api, number, err, details, context, timestamp=None):
timestamp = timestamp or datetime.datetime.now().isoformat()
link = timestamp
if details:
link = "{0} | {1}".format(timestamp, details)
comments = issue_comments(api, number)
# try to find an existing comment to update
comment_key = "{0}\n".format(context)
err_key = """
```
{0}
```""".format(err.strip())
redacted_err_key = redact_audit_variables(err_key)
latest_occurrences = "Latest occurrences:\n\n"
for comment in reversed(comments):
if 'body' in comment and comment['body'].startswith(comment_key):
parts = comment['body'].split("
")
updated = False
for part_idx, part in enumerate(parts):
if redact_audit_variables(part).startswith(redacted_err_key):
latest = part.split(latest_occurrences)
if len(latest) < 2:
sys.stderr.write("Error while parsing latest occurrences\n")
else:
# number of times this error was recorded
header = latest[0].split("\n")
for header_idx, entry in enumerate(header):
if entry.startswith("Times recorded: "):
rec_entries = entry.split(" ")
rec_entries[-1] = str(int(rec_entries[-1]) + 1)
header[header_idx] = " ".join(rec_entries)
latest[0] = "\n".join(header)
# list of recent occurrences
occurrences = filter(None, latest[1].split("\n"))
occurrences.append("- {0}\n".format(link))
# only keep the last 10
if len(occurrences) > 10:
occurrences.pop(0)
parts[part_idx] = "{0}{1}{2}".format(latest[0], latest_occurrences, "\n".join(occurrences))
updated = True
break
if not updated:
parts.append("""{0}
First occurrence: {1}
Times recorded: 1
{2}- {1}
""".format(err_key, link, latest_occurrences))
updated = True
# This comment is already too long
body = "
".join(parts)
if len(body) >= 65536:
break
# update comment, no need to check others
return api.patch("issues/comments/{0}".format(comment['id']), { "body": body })
# create a new comment, since we didn't find one to update
data = { "body": """{0}\nOoops, it happened again
{1}
First occurrence: {2}
Times recorded: 1
{3}- {2}
""".format(context, err_key, link, latest_occurrences) }
return api.post("issues/{0}/comments".format(number), data)
def issue_comments(api, number):
result = [ ]
page = 1
count = 100
while count == 100:
comments = api.get("issues/{0}/comments?page={1}&per_page={2}".format(number, page, count))
count = 0
page += 1
if comments:
result += comments
count = len(comments)
return result
def post_github(api, number, trace, image):
# Ignore this if we were not given a token
if not api or not api.available:
return
context = "verify/{0}".format(image)
# Lookup the link being logged to
link = None
revision = os.environ.get("TEST_REVISION", None)
if revision:
link = "revision {0}".format(revision)
statuses = api.get("commits/{0}/statuses".format(revision))
if statuses:
for status in statuses:
if status["context"] == context:
link = "revision {0}, [logs]({1})".format(revision, status["target_url"])
break
update_known_issue(api, number, trace, link, context)
if __name__ == '__main__':
sys.exit(main())