#!/usr/bin/env python # -*- coding: utf-8 -*- # This file is part of Cockpit. # # Copyright (C) 2013 Red Hat, Inc. # # Cockpit is free software; you can redistribute it and/or modify it # under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 2.1 of the License, or # (at your option) any later version. # # Cockpit is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with Cockpit; If not, see . CONFIG = "~/.config/image-stores" DEFAULT = [ "https://209.132.184.69:8493/", "https://209.132.184.41:8493/", "http://cockpit-images.verify.svc.cluster.local", "https://fedorapeople.org/groups/cockpit/images/" ] # Days after which images expire if not in use IMAGE_EXPIRE = 14 import argparse import os import shutil import socket import stat import subprocess import sys import tempfile import time import urlparse from contextlib import contextmanager from task import github BOTS = os.path.dirname(os.path.realpath(__file__)) IMAGES = os.path.join(BOTS, "..", "bots", "images") DATA = os.path.join(os.environ.get("TEST_DATA", BOTS), "images") # threshold in G below which unreferenced qcow2 images will be pruned, even if they aren't old PRUNE_THRESHOLD_G = float(os.environ.get("PRUNE_THRESHOLD_G", 15)) DEVNULL = open("/dev/null", "r+") def download(link, force, quiet, stores): if not os.path.exists(DATA): os.makedirs(DATA) dest = os.readlink(link) relative_dir = os.path.dirname(os.path.abspath(link)) full_dest = os.path.join(relative_dir, dest) while not ".qcow2" in dest and os.path.islink(full_dest): link = full_dest dest = os.readlink(link) relative_dir = os.path.dirname(os.path.abspath(link)) full_dest = os.path.join(relative_dir, dest) dest = os.path.join(DATA, dest) # we have the file but there is not valid link if os.path.exists(dest) and not os.path.exists(link): os.symlink(dest, os.path.join(IMAGES, os.readlink(link))) # The image file in the images directory, may be same as dest image_file = os.path.join(IMAGES, os.readlink(link)) # file already exists, double check that symlink in place if not force and os.path.exists(dest): if not os.path.exists(image_file): os.symlink(os.path.abspath(dest), image_file) return if not stores: config = os.path.expanduser(CONFIG) if os.path.exists(config): with open(config, 'r') as fp: stores = fp.read().strip().split("\n") else: stores = [] stores += DEFAULT name = os.path.basename(dest) ca = os.path.join(BOTS, "images", "files", "ca.pem") for store in stores: url = urlparse.urlparse(store) defport = url.scheme == 'http' and 80 or 443 try: ai = socket.getaddrinfo(url.hostname, url.port or defport, socket.AF_INET, 0, socket.IPPROTO_TCP) except socket.gaierror: ai = [ ] message = store for (family, socktype, proto, canonname, sockaddr) in ai: resolve = "cockpit-tests:{1}:{0}".format(*sockaddr) source = urlparse.urljoin("{0}://cockpit-tests:{1}{2}".format(url.scheme, sockaddr[1], url.path), name) message = "{scheme}://{0}:{1}{path}".format(*sockaddr, scheme=url.scheme, path=url.path) try: cmd = ["curl", "--head", "--silent", "--resolve", resolve, "--fail", "--cacert", ca, source] subprocess.check_call(cmd, stdout=DEVNULL) break except subprocess.CalledProcessError: pass try: cmd = ["curl", "--head", "--silent", "--fail", source] subprocess.check_call(cmd, stdout=DEVNULL) break except subprocess.CalledProcessError: pass # If the above block found nothing, then continue with next store else: if not quiet: sys.stderr.write(" x {0}\n".format(message)) continue # If the above block found something then break out here too if not quiet: sys.stderr.write(" > {0}\n".format(urlparse.urljoin(message, name))) break (fd, temp) = tempfile.mkstemp(suffix=".partial", prefix=os.path.basename(dest), dir=DATA) # Adjust the command above that worked to make it visible and download real stuff cmd.remove("--head") if not quiet: cmd.remove("--silent") cmd.insert(1, "--progress-bar") try: curl = subprocess.Popen(cmd, stdout=fd) ret = curl.wait() if ret != 0: raise Exception("curl: unable to download image (returned: %s)" % ret) os.close(fd) os.chmod(temp, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH) shutil.move(temp, dest) finally: # if we had an error and the temp file is left over, delete it if os.path.exists(temp): os.unlink(temp) # Handle alternate TEST_DATA if not os.path.exists(image_file): os.symlink(os.path.abspath(dest), image_file) def enough_disk_space(): """Check if available disk space in our data store is sufficient """ st = os.statvfs(DATA) free = st.f_bavail * st.f_frsize / (1024*1024*1024) return free >= PRUNE_THRESHOLD_G; def get_refs(open_pull_requests=True, offline=False): """Return dictionary for available refs of the format {'rhel-7.4': 'ad50328990e44c22501bd5e454746d4b5e561b7c'} Expects to be called from the top level of the git checkout If offline is true, git show-ref is used instead of listing the remote """ # get all remote heads and filter empty lines # output of ls-remote has the format # # d864d3792db442e3de3d1811fa4bc371793a8f4f refs/heads/master # ad50328990e44c22501bd5e454746d4b5e561b7c refs/heads/rhel-7.4 refs = { } if open_pull_requests: if offline: raise Exception("Unable to consider open pull requests when in offline mode") for p in github.GitHub().pulls(): refs["pull request #{} ({})".format(p["number"], p["title"])] = p["head"]["sha"] git_cmd = "show-ref" if offline else "ls-remote" ref_output = subprocess.check_output(["git", git_cmd]).splitlines() # filter out the "refs/heads/" prefix and generate a dictionary prefix = "refs/heads" for ln in ref_output: [ref, name] = ln.split() if name.startswith(prefix): refs[name[len(prefix):]] = ref return refs def get_image_links(ref, git_path): """Return all image links for the given git ref Expects to be called from the top level of the git checkout """ # get all the links we have first # trailing slash on path is important if not git_path.endswith("/"): git_path = "{0}/".format(git_path) try: entries = subprocess.check_output(["git", "ls-tree", "--name-only", ref, git_path]).splitlines() except subprocess.CalledProcessError, e: if e.returncode == 128: sys.stderr.write("Skipping {0} due to tree error.\n".format(ref)) return [] raise links = map(lambda entry: subprocess.check_output(["git", "show", "{0}:{1}".format(ref, entry)]), entries) return [link for link in links if link.endswith(".qcow2")] @contextmanager def remember_cwd(): curdir = os.getcwd() try: yield finally: os.chdir(curdir) def get_image_names(quiet=False, open_pull_requests=True, offline=False): """Return all image names used by all branches and optionally in open pull requests """ images = set() # iterate over visible refs (mostly branches) # this hinges on being in the top level directory of the the git checkout with remember_cwd(): os.chdir(os.path.join(BOTS, "..")) refs = get_refs(open_pull_requests, offline) # list images present in each branch / pull request for name, ref in refs.items(): if not quiet: sys.stderr.write("Considering images from {0} ({1})\n".format(name, ref)) # DEPRECATED old branches may still have images in test/images for link in get_image_links(ref, "test/images"): images.add(link) for link in get_image_links(ref, "bots/images"): images.add(link) return images def prune_images(force, dryrun, quiet=False, open_pull_requests=True, offline=False): """Prune images """ now = time.time() # everything we want to keep targets = get_image_names(quiet, open_pull_requests, offline) # what we have in the current checkout might already have been added by its branch, but check anyway for filename in os.listdir(IMAGES): path = os.path.join(IMAGES, filename) # only consider original image entries as trustworthy sources and ignore non-links if path.endswith(".qcow2") or path.endswith(".partial") or not os.path.islink(path): continue target = os.readlink(path) if not os.path.isabs(target): targets.add(os.path.join(IMAGES, target)) targets.add(os.path.join(DATA, target)) else: targets.add(target) expiry_threshold = now - IMAGE_EXPIRE * 86400 for filename in os.listdir(DATA): path = os.path.join(DATA, filename) if not force and (enough_disk_space() and os.lstat(path).st_mtime > expiry_threshold): continue if os.path.isfile(path) and (path.endswith(".xz") or path.endswith(".qcow2") or path.endswith(".partial")) and path not in targets: if not quiet or dryrun: sys.stderr.write("Pruning {0}\n".format(filename)) if not dryrun: os.unlink(path) # now prune broken links for filename in os.listdir(IMAGES): path = os.path.join(IMAGES, filename) # don't prune original image entries and ignore non-links if not path.endswith(".qcow2") or not os.path.islink(path): continue # if the link isn't valid, prune if not os.path.isfile(path): if not quiet or dryrun: sys.stderr.write("Pruning link {0}\n".format(path)) if not dryrun: os.unlink(path) def every_image(): result = [] for filename in os.listdir(IMAGES): link = os.path.join(IMAGES, filename) if os.path.islink(link): result.append(filename) return result def download_images(image_list, force, quiet, store): for image in image_list: link = os.path.join(IMAGES, image) if not os.path.islink(link): raise RuntimeError("image link does not exist: " + image) download(link, force, quiet, store) def main(): parser = argparse.ArgumentParser(description='Download a virtual machine') parser.add_argument("--force", action="store_true", help="Force unnecessary downloads, delete images even if they aren't old") parser.add_argument("--store", action="append", help="Where to find images") parser.add_argument("--prune", action="store_true", help="Remove unused images and links") parser.add_argument("--quiet", action="store_true", help="Make downloading quieter") parser.add_argument("-d", "--dry-run-prune", dest="dryrun", action="store_true", help="Don't actually delete images and links") parser.add_argument("-b", "--branches-only", dest="branches_only", action="store_true", help="Don't consider pull requests on GitHub, only look at branches") parser.add_argument("-o", "--offline", dest="offline", action="store_true", help="Don't access external sources such as GitHub") parser.add_argument('image', nargs='*') args = parser.parse_args() # By default download all links if not args.image and not args.prune: args.image = every_image() try: if args.prune: prune_images(args.force, args.dryrun, quiet=args.quiet, open_pull_requests=(not args.branches_only), offline=args.offline) download_images(args.image, args.force, args.quiet, args.store) except RuntimeError, ex: sys.stderr.write("image-download: {0}\n".format(str(ex))) return 1 return 0 if __name__ == '__main__': sys.exit(main())