#!/usr/bin/env python
# -*- coding: utf-8 -*-

# This file is part of Cockpit.
#
# Copyright (C) 2013 Red Hat, Inc.
#
# Cockpit is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 2.1 of the License, or
# (at your option) any later version.
#
# Cockpit is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with Cockpit; If not, see <http://www.gnu.org/licenses/>.

import argparse
import datetime
import fnmatch
import re
import os
import sys

sys.dont_write_bytecode = True

from task import github

BOTS = os.path.join(os.path.dirname(__file__))

def main():
    parser = argparse.ArgumentParser(description='Check a traceback for a known issue')
    parser.add_argument('-o', "--offline", action='store_true',
            help="Work offline, don't fetch new data from origin for rebase")
    parser.add_argument('image', help="The image to check against")
    opts = parser.parse_args()

    api = None if opts.offline else github.GitHub()
    trace = sys.stdin.read()
    number = 0

    try:
        if trace:
            number = check_known_issue(api, trace, opts.image)
    except RuntimeError, ex:
        sys.stderr.write("image-naughty: {0}\n".format(ret))
        return 1

    if number:
        post_github(api, number, trace, opts.image)
        sys.stdout.write("{0}\n".format(number))
    return 0;

def normalize_traceback(trace):
    # All file paths converted to basename
    return re.sub(r'File "[^"]*/([^/"]+)"', 'File "\\1"', trace.strip())

def list_directories(dirs):
    result = [ ];
    for d in dirs:
        for f in os.listdir(d):
            result.append(os.path.join(d, f))
    return result

def check_known_issue(api, trace, image):
    directories =  [ ]
    image_naughty = os.path.join(BOTS, "naughty", image)
    if os.path.exists(image_naughty):
        directories.append(image_naughty)
    trace = normalize_traceback(trace)
    number = 0
    for naughty in list_directories(directories):
        (prefix, unused, name) = os.path.basename(naughty).partition("-")
        try:
            n = int(prefix)
        except:
            continue
        with open(naughty, "r") as fp:
            match = "*" + normalize_traceback(fp.read()) + "*"
        # Match as in a file name glob, albeit multi line, and account for literal pastes with '[]'
        if fnmatch.fnmatchcase(trace, match) or fnmatch.fnmatchcase(trace, match.replace("[", "?")):
            number = n
    return number

def redact_audit_variables(message):
    """ Reformat audit events so that the same error recorded at different
        times will match when using string comparison
        Match lines like
        Error: audit: type=1400 audit(1458739098.632:268): avc:  denied  { read } for  pid=1290 comm="ssh-transport-c" \
            name="unix" dev="proc" ino=4026532021 scontext=system_u:system_r:cockpit_ws_t:s0 \
            tcontext=system_u:object_r:proc_net_t:s0 tclass=file permissive=0
        Error: audit: type=1401 audit(1461925292.392:293): op=security_compute_av reason=bounds \
            scontext=system_u:system_r:init_t:s0 tcontext=system_u:system_r:docker_t:s0 tclass=process perms=siginh
        It will ignore changed timestamp, pid and ino entries
    """
    audit_timestamp_re = re.compile(r"""(^\s*Error: audit:.+audit\()([0-9\.\:]+)(.*)""")
    audit_pid_re = re.compile(r"""(.*pid=)([0-9]+)(.*)""")
    audit_ino_re = re.compile(r"""(.*ino=)([0-9]+)(.*)""")
    lines = message.split("\n")
    for line_idx, line in enumerate(lines):
        if line.strip().startswith("Error: audit:"):
            m = audit_timestamp_re.match(line)
            if m and len(m.groups()) == 3:
                fields = list(m.groups())
                fields[1] = "[timestamp]"
                line = "".join(fields)
            m = audit_pid_re.match(line)
            if m and len(m.groups()) == 3:
                fields = list(m.groups())
                fields[1] = "[pid]"
                line = "".join(fields)
            m = audit_ino_re.match(line)
            if m and len(m.groups()) == 3:
                fields = list(m.groups())
                fields[1] = "[ino]"
                line = "".join(fields)
            lines[line_idx] = line
    return "\n".join(lines)

# Update a known issue thread on GitHub
#
# The idea is to combine repeated errors into fewer commits by
# editing them and keeping all relevant information.
#
# For this we keep one comment per context (e.g. 'verify/fedora-24')
# and divide that into sections, one each per error description / trace.
# In each section, we keep the error description / trace as well as
# the number of recorded events, the first occurrence and the last 10
# occurrences.
# For each (listed) occurrence we display the timestamp and some details
# provided by the caller, such as a revision or link to log files.
# The details can't contain newline characters and should be brief
def update_known_issue(api, number, err, details, context, timestamp=None):
    timestamp = timestamp or datetime.datetime.now().isoformat()

    link = timestamp
    if details:
        link = "{0} | {1}".format(timestamp, details)

    comments = issue_comments(api, number)

    # try to find an existing comment to update
    comment_key = "{0}\n".format(context)
    err_key = """
```
{0}
```""".format(err.strip())
    redacted_err_key = redact_audit_variables(err_key)
    latest_occurrences = "Latest occurrences:\n\n"
    for comment in reversed(comments):
        if 'body' in comment and comment['body'].startswith(comment_key):
            parts = comment['body'].split("<hr>")
            updated = False
            for part_idx, part in enumerate(parts):
                if redact_audit_variables(part).startswith(redacted_err_key):
                    latest = part.split(latest_occurrences)
                    if len(latest) < 2:
                        sys.stderr.write("Error while parsing latest occurrences\n")
                    else:
                        # number of times this error was recorded
                        header = latest[0].split("\n")
                        for header_idx, entry in enumerate(header):
                            if entry.startswith("Times recorded: "):
                                rec_entries = entry.split(" ")
                                rec_entries[-1] = str(int(rec_entries[-1]) + 1)
                                header[header_idx] = " ".join(rec_entries)
                        latest[0] = "\n".join(header)
                        # list of recent occurrences
                        occurrences = filter(None, latest[1].split("\n"))
                        occurrences.append("- {0}\n".format(link))
                        # only keep the last 10
                        if len(occurrences) > 10:
                            occurrences.pop(0)
                        parts[part_idx] = "{0}{1}{2}".format(latest[0], latest_occurrences, "\n".join(occurrences))
                        updated = True
                    break
            if not updated:
                parts.append("""{0}
First occurrence: {1}
Times recorded: 1
{2}- {1}
""".format(err_key, link, latest_occurrences))
                updated = True

            # This comment is already too long
            body = "<hr>".join(parts)
            if len(body) >= 65536:
                break

            # update comment, no need to check others
            return api.patch("issues/comments/{0}".format(comment['id']), { "body": body })

    # create a new comment, since we didn't find one to update

    data = { "body": """{0}\nOoops, it happened again<hr>{1}
First occurrence: {2}
Times recorded: 1
{3}- {2}
""".format(context, err_key, link, latest_occurrences) }
    return api.post("issues/{0}/comments".format(number), data)

def issue_comments(api, number):
    result = [ ]
    page = 1
    count = 100
    while count == 100:
        comments = api.get("issues/{0}/comments?page={1}&per_page={2}".format(number, page, count))
        count = 0
        page += 1
        if comments:
            result += comments
            count = len(comments)
    return result


def post_github(api, number, trace, image):

    # Ignore this if we were not given a token
    if not api or not api.available:
        return

    context = "verify/{0}".format(image)

    # Lookup the link being logged to
    link = None
    revision = os.environ.get("TEST_REVISION", None)
    if revision:
        link = "revision {0}".format(revision)
        statuses = api.get("commits/{0}/statuses".format(revision))
        if statuses:
            for status in statuses:
                if status["context"] == context:
                    link = "revision {0}, [logs]({1})".format(revision, status["target_url"])
                    break
    update_known_issue(api, number, trace, link, context)

if __name__ == '__main__':
    sys.exit(main())