#!/usr/bin/env python
# -*- coding: utf-8 -*-

# This file is part of Cockpit.
#
# Copyright (C) 2013 Red Hat, Inc.
#
# Cockpit is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 2.1 of the License, or
# (at your option) any later version.
#
# Cockpit is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with Cockpit; If not, see <http://www.gnu.org/licenses/>.

CONFIG = "~/.config/image-stores"
DEFAULT = [
    "https://209.132.184.69:8493/",
    "https://209.132.184.41:8493/",
    "http://cockpit-images.verify.svc.cluster.local",
    "https://fedorapeople.org/groups/cockpit/images/"
]

# Days after which images expire if not in use
IMAGE_EXPIRE = 14

import argparse
import os
import shutil
import socket
import stat
import subprocess
import sys
import tempfile
import time
import urlparse

from contextlib import contextmanager

from task import github

BOTS = os.path.dirname(os.path.realpath(__file__))
IMAGES = os.path.join(BOTS, "..", "bots", "images")
DATA = os.path.join(os.environ.get("TEST_DATA", BOTS), "images")

# threshold in G below which unreferenced qcow2 images will be pruned, even if they aren't old
PRUNE_THRESHOLD_G = float(os.environ.get("PRUNE_THRESHOLD_G", 15))
DEVNULL = open("/dev/null", "r+")

def download(link, force, quiet, stores):
    if not os.path.exists(DATA):
        os.makedirs(DATA)

    dest = os.readlink(link)
    relative_dir = os.path.dirname(os.path.abspath(link))
    full_dest = os.path.join(relative_dir, dest)
    while not ".qcow2" in dest and os.path.islink(full_dest):
        link = full_dest
        dest = os.readlink(link)
        relative_dir = os.path.dirname(os.path.abspath(link))
        full_dest = os.path.join(relative_dir, dest)

    dest = os.path.join(DATA, dest)

    # we have the file but there is not valid link
    if os.path.exists(dest) and not os.path.exists(link):
        os.symlink(dest, os.path.join(IMAGES, os.readlink(link)))

    # The image file in the images directory, may be same as dest
    image_file = os.path.join(IMAGES, os.readlink(link))

    # file already exists, double check that symlink in place
    if not force and os.path.exists(dest):
        if not os.path.exists(image_file):
            os.symlink(os.path.abspath(dest), image_file)
        return

    if not stores:
        config = os.path.expanduser(CONFIG)
        if os.path.exists(config):
            with open(config, 'r') as fp:
                stores = fp.read().strip().split("\n")
        else:
            stores = []
        stores += DEFAULT

    name = os.path.basename(dest)
    ca = os.path.join(BOTS, "images", "files", "ca.pem")
    for store in stores:
        url = urlparse.urlparse(store)

        defport = url.scheme == 'http' and 80 or 443

        try:
            ai = socket.getaddrinfo(url.hostname, url.port or defport, socket.AF_INET, 0, socket.IPPROTO_TCP)
        except socket.gaierror:
            ai = [ ]
            message = store

        for (family, socktype, proto, canonname, sockaddr) in ai:
            resolve = "cockpit-tests:{1}:{0}".format(*sockaddr)
            source = urlparse.urljoin("{0}://cockpit-tests:{1}{2}".format(url.scheme, sockaddr[1], url.path), name)
            message = "{scheme}://{0}:{1}{path}".format(*sockaddr, scheme=url.scheme, path=url.path)

            try:
                cmd = ["curl", "--head", "--silent",  "--resolve", resolve, "--fail", "--cacert", ca, source]
                subprocess.check_call(cmd, stdout=DEVNULL)
                break
            except subprocess.CalledProcessError:
                pass
            try:
                cmd = ["curl", "--head", "--silent", "--fail", source]
                subprocess.check_call(cmd, stdout=DEVNULL)
                break
            except subprocess.CalledProcessError:
                pass


        # If the above block found nothing, then continue with next store
        else:
            if not quiet:
                sys.stderr.write(" x {0}\n".format(message))
            continue

        # If the above block found something then break out here too
        if not quiet:
            sys.stderr.write(" > {0}\n".format(urlparse.urljoin(message, name)))
        break

    (fd, temp) = tempfile.mkstemp(suffix=".partial", prefix=os.path.basename(dest), dir=DATA)

    # Adjust the command above that worked to make it visible and download real stuff
    cmd.remove("--head")
    if not quiet:
        cmd.remove("--silent")
        cmd.insert(1, "--progress-bar")

    try:
        curl = subprocess.Popen(cmd, stdout=fd)
        ret = curl.wait()
        if ret != 0:
            raise Exception("curl: unable to download image (returned: %s)" % ret)

        os.close(fd)
        os.chmod(temp, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)
        shutil.move(temp, dest)
    finally:
        # if we had an error and the temp file is left over, delete it
        if os.path.exists(temp):
            os.unlink(temp)

    # Handle alternate TEST_DATA
    if not os.path.exists(image_file):
        os.symlink(os.path.abspath(dest), image_file)

def enough_disk_space():
    """Check if available disk space in our data store is sufficient
    """
    st = os.statvfs(DATA)
    free = st.f_bavail * st.f_frsize / (1024*1024*1024)
    return free >= PRUNE_THRESHOLD_G;

def get_refs(open_pull_requests=True, offline=False):
    """Return dictionary for available refs of the format {'rhel-7.4': 'ad50328990e44c22501bd5e454746d4b5e561b7c'}

       Expects to be called from the top level of the git checkout
       If offline is true, git show-ref is used instead of listing the remote
    """
    # get all remote heads and filter empty lines
    # output of ls-remote has the format
    #
    # d864d3792db442e3de3d1811fa4bc371793a8f4f	refs/heads/master
    # ad50328990e44c22501bd5e454746d4b5e561b7c	refs/heads/rhel-7.4

    refs = { }

    if open_pull_requests:
        if offline:
            raise Exception("Unable to consider open pull requests when in offline mode")
        for p in github.GitHub().pulls():
            refs["pull request #{} ({})".format(p["number"], p["title"])] = p["head"]["sha"]

    git_cmd = "show-ref" if offline else "ls-remote"
    ref_output = subprocess.check_output(["git", git_cmd]).splitlines()
    # filter out the "refs/heads/" prefix and generate a dictionary
    prefix = "refs/heads"
    for ln in ref_output:
        [ref, name] = ln.split()
        if name.startswith(prefix):
            refs[name[len(prefix):]] = ref

    return refs

def get_image_links(ref, git_path):
    """Return all image links for the given git ref

       Expects to be called from the top level of the git checkout
    """
    # get all the links we have first
    # trailing slash on path is important
    if not git_path.endswith("/"):
        git_path = "{0}/".format(git_path)

    try:
        entries = subprocess.check_output(["git", "ls-tree",  "--name-only", ref, git_path]).splitlines()
    except subprocess.CalledProcessError, e:
        if e.returncode == 128:
            sys.stderr.write("Skipping {0} due to tree error.\n".format(ref))
            return []
        raise
    links = map(lambda entry: subprocess.check_output(["git", "show", "{0}:{1}".format(ref, entry)]), entries)
    return [link for link in links if link.endswith(".qcow2")]

@contextmanager
def remember_cwd():
    curdir = os.getcwd()
    try:
        yield
    finally:
        os.chdir(curdir)

def get_image_names(quiet=False, open_pull_requests=True, offline=False):
    """Return all image names used by all branches and optionally in open pull requests
    """
    images = set()
    # iterate over visible refs (mostly branches)
    # this hinges on being in the top level directory of the the git checkout
    with remember_cwd():
        os.chdir(os.path.join(BOTS, ".."))
        refs = get_refs(open_pull_requests, offline)
        # list images present in each branch / pull request
        for name, ref in refs.items():
            if not quiet:
                sys.stderr.write("Considering images from {0} ({1})\n".format(name, ref))
            # DEPRECATED old branches may still have images in test/images
            for link in get_image_links(ref, "test/images"):
                images.add(link)
            for link in get_image_links(ref, "bots/images"):
                images.add(link)

    return images

def prune_images(force, dryrun, quiet=False, open_pull_requests=True, offline=False):
    """Prune images
    """
    now = time.time()

    # everything we want to keep
    targets = get_image_names(quiet, open_pull_requests, offline)

    # what we have in the current checkout might already have been added by its branch, but check anyway
    for filename in os.listdir(IMAGES):
        path = os.path.join(IMAGES, filename)

        # only consider original image entries as trustworthy sources and ignore non-links
        if path.endswith(".qcow2") or path.endswith(".partial") or not os.path.islink(path):
            continue

        target = os.readlink(path)
        if not os.path.isabs(target):
            targets.add(os.path.join(IMAGES, target))
            targets.add(os.path.join(DATA, target))
        else:
            targets.add(target)

    expiry_threshold = now - IMAGE_EXPIRE * 86400
    for filename in os.listdir(DATA):
        path = os.path.join(DATA, filename)
        if not force and (enough_disk_space() and os.lstat(path).st_mtime > expiry_threshold):
            continue
        if os.path.isfile(path) and (path.endswith(".xz") or path.endswith(".qcow2") or path.endswith(".partial")) and path not in targets:
            if not quiet or dryrun:
                sys.stderr.write("Pruning {0}\n".format(filename))
            if not dryrun:
                os.unlink(path)

    # now prune broken links
    for filename in os.listdir(IMAGES):
        path = os.path.join(IMAGES, filename)

        # don't prune original image entries and ignore non-links
        if not path.endswith(".qcow2") or not os.path.islink(path):
            continue

        # if the link isn't valid, prune
        if not os.path.isfile(path):
            if not quiet or dryrun:
                sys.stderr.write("Pruning link {0}\n".format(path))
            if not dryrun:
                os.unlink(path)

def every_image():
    result = []
    for filename in os.listdir(IMAGES):
        link = os.path.join(IMAGES, filename)
        if os.path.islink(link):
            result.append(filename)
    return result

def download_images(image_list, force, quiet, store):
    for image in image_list:
        link = os.path.join(IMAGES, image)
        if not os.path.islink(link):
            raise RuntimeError("image link does not exist: " + image)
        download(link, force, quiet, store)

def main():
    parser = argparse.ArgumentParser(description='Download a virtual machine')
    parser.add_argument("--force", action="store_true", help="Force unnecessary downloads, delete images even if they aren't old")
    parser.add_argument("--store", action="append", help="Where to find images")
    parser.add_argument("--prune", action="store_true", help="Remove unused images and links")
    parser.add_argument("--quiet", action="store_true", help="Make downloading quieter")
    parser.add_argument("-d", "--dry-run-prune", dest="dryrun", action="store_true", help="Don't actually delete images and links")
    parser.add_argument("-b", "--branches-only", dest="branches_only", action="store_true", help="Don't consider pull requests on GitHub, only look at branches")
    parser.add_argument("-o", "--offline", dest="offline", action="store_true", help="Don't access external sources such as GitHub")
    parser.add_argument('image', nargs='*')
    args = parser.parse_args()

    # By default download all links
    if not args.image and not args.prune:
        args.image = every_image()

    try:
        if args.prune:
            prune_images(args.force, args.dryrun, quiet=args.quiet, open_pull_requests=(not args.branches_only), offline=args.offline)
        download_images(args.image, args.force, args.quiet, args.store)
    except RuntimeError, ex:
        sys.stderr.write("image-download: {0}\n".format(str(ex)))
        return 1

    return 0

if __name__ == '__main__':
    sys.exit(main())