From 052bf7a7dee421830b09f28a570fc2472e30571c Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Tue, 2 Jul 2019 17:59:54 +0300 Subject: Change: vcsworker.py now provides an HTTP API (also controller) --- vcsworker.py | 321 +++++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 266 insertions(+), 55 deletions(-) diff --git a/vcsworker.py b/vcsworker.py index 2cac2cd..0c7ff9e 100755 --- a/vcsworker.py +++ b/vcsworker.py @@ -1,18 +1,246 @@ #!/usr/bin/env python3 +# +# This implements the various APIs for the various CI components we +# need. Very simplistic and prototype-y. All in one file for +# simplicity - production version will need to be done more carefully. +# +# To use: run this program with one of the following command lines: +# +# ./this controller this.log key.pub +# ./this vcsworker this.log key.pub gitlab.token +# +# where +# +# - this.log is the log file to use +# - key.pub is a public key in ssh format for checking incoming access tokens +# - gitlab.token is the name of a file containing a GitLab access token +# +# NOTE: The GitLab instance and other configuration details are +# hardcoded in this version. This will be fixed, as well as the 418 +# HTTP status code as a generic error code. +# +# NOTE: You should run this behind haproxy or smilar TLS provider, +# which forwards requests to localhost. + import logging import os import shutil import subprocess import sys +import tempfile import urllib.parse +import Crypto.PublicKey.RSA +import bottle +import jwt + + +HOST = 'localhost' +PORT = 2222 + + +class TokenParser: + + '''Parse an incoming access token (signed JWT)''' + + # Note that if we need to, for performance, we can cache the parse + # results here. But there's no point in doing that unless it + # becomes necessary. + + def __init__(self, pubkey): + self._pubkey = pubkey + + def parse_token(self, token_text): + return jwt.decode( + token_text, + key=self._pubkey.exportKey('OpenSSH'), + audience=None, + options={'verify_aud': False}) + + +class AccessChecker: + + '''Given request headers and required scopes, is a request allowed?''' + + def __init__(self, pubkey): + self._parser = TokenParser(pubkey) + + def access_is_allowed(self, headers, required_scopes): + token = self._get_token(headers) + logging.debug('Access token %r', token) + + if token is None and len(required_scopes) != 0: + logging.error('No valid access token') + return False + + if token: + scopes = token.get('scope', '').split() + missing = set(required_scopes).difference(scopes) + if missing: + logging.error( + 'Required scopes that are missing from token: %r', missing) + return False + + return True + + def _get_token(self, headers): + '''Parse an access token or return None if it's bad''' + token_text = self._get_token_text(headers) + if token_text is None: + return None + return self._parser.parse_token(token_text) + + def _get_token_text(self, headers): + '''Extract access token from request headers or None if not there''' + v = headers.get('Authorization', '') + words = v.split() + if len(words) == 2: + keyword, token_text = words + if keyword.lower() == 'bearer': + return token_text + + +class API: + + '''Base class for simple HTTP APIs + + Override the get_routes method to use this. Call setup() method to + set up routes and such, before actually running. + + ''' + + def __init__(self): + self._checker = None + + def setup(self, app, token_pubkey): + self._checker = AccessChecker(token_pubkey) + self._add_routes(app, self.get_routes()) + + def get_routes(self): + raise NotImplementedError() -MAX_CLONE_TIME = 1 -MAX_REMOVE_TIME = 60 -MAX_PUSH_TIME = 60 -GITLAB_DOMAIN = 'wmf-gitlab3.vm.liw.fi' -GITLAB_PROJECT = 'liw' + def _add_routes(self, app, routes): + for route in routes: + func = route.pop('func') + scopes = route.pop('scopes') + assert isinstance(scopes, list) + callback = lambda **kwargs: self.check(func, scopes, kwargs) + route = dict(route) + route['callback'] = callback + app.route(**route) + + def check(self, func, required_scopes, kwargs): + '''Call a callback function, if it's OK to do so''' + r = bottle.request + logging.debug('Checking access for request %s %s', r.method, r.path) + + if self._checker.access_is_allowed(bottle.request.headers, required_scopes): + logging.info('Serving request %s %s', r.method, r.path) + ret = func(**kwargs) + logging.info('Result: %r', ret) + return ret + + logging.error('Request denied %s %s', r.method, r.path) + return bottle.HTTPError(400) + + +class Controller(API): + + '''A dummy controller API''' + + def get_routes(self): + return [ + { + 'method': 'GET', + 'path': '/status', + 'func': self._status, + 'scopes': ['status'], + }, + { + 'method': 'GET', + 'path': '/hello/', + 'func': self._hello, + 'scopes': ['hello'], + }, + ] + + def _status(self): + return { + 'queue': [], + 'running': [], + 'finished': [], + } + + def _hello(self, name=None): + return 'hello {}\n'.format(name) + + +class VCSWorker(API): + + '''A VCSWorker API''' + + MAX_CLONE_TIME = 1 + MAX_REMOVE_TIME = 60 + MAX_PUSH_TIME = 60 + GITLAB_DOMAIN = 'wmf-gitlab3.vm.liw.fi' + GITLAB_PROJECT = 'liw' + + def __init__(self, gitlab_token): + self._token = gitlab_token + self._tmpdir = tempfile.mkdtemp() + logging.info('Workspace: %s', self._tmpdir) + + def get_routes(self): + return [ + { + 'method': 'POST', + 'path': '/updaterepo', + 'func': self._update_repo, + 'scopes': ['update-repo'], + }, + ] + + def _update_repo(self): + spec = bottle.request.json + url = spec['git'] + ref = spec['ref'] + name = spec['gitlab'] + + dirname = os.path.join(self._tmpdir, name) + + if self._clone(url, ref, dirname): + ok = self._remove( + self._token, self.GITLAB_DOMAIN, self.GITLAB_PROJECT, name) + if ok: + ok = self._push( + dirname, self.GITLAB_DOMAIN, self.GITLAB_PROJECT, ref, + name) + if ok: + logging.info('Repository copied successfully') + return 'Repository copied successfully' + + logging.error('Something went wrong when copying repository') + return bottle.HTTPError(418) + + def _clone(self, url, ref, dirname): + if os.path.exists(dirname): + logging.debug('Removing %s', dirname) + shutil.rmtree(dirname) + argv = ['git', 'clone', '-q', '-b', ref, url, dirname] + return runcmd('.', argv, self.MAX_CLONE_TIME) + + def _remove(self, token, gitlab_domain, gitlab_project, name): + snippet = urllib.parse.quote('%s/%s' % (gitlab_project, name), safe='') + url = 'https://%s/api/v4/projects/%s' % (gitlab_domain, snippet) + argv = ['curl', '-HPRIVATE-TOKEN: %s' % token, '-X', 'DELETE', url] + return runcmd('.', argv, self.MAX_REMOVE_TIME) + + def _push(self, dirname, gitlab_domain, gitlab_project, ref, name): + logging.info('Pushing %s to %s as %s', dirname, gitlab_domain, name) + url = 'ssh://git@%s/%s/%s.git' % (gitlab_domain, gitlab_project, name) + argv = ['git', 'push', url, '%s:master' % ref] + return runcmd(dirname, argv, self.MAX_PUSH_TIME) def runcmd(cwd, argv, timeout): @@ -39,63 +267,46 @@ def runcmd(cwd, argv, timeout): return True -def clone(url, ref, dirname): - if os.path.exists(dirname): - logging.debug('Removing %s', dirname) - shutil.rmtree(dirname) - - argv = ['git', 'clone', '-q', '-b', ref, url, dirname] - return runcmd('.', argv, MAX_CLONE_TIME) +def setup_logging(log_filename): + logging.basicConfig( + filename=log_filename, + level=logging.DEBUG, + format='%(asctime)s %(levelname)s %(message)s') + logging.info('API server starting') -def remove(token, gitlab_domain, gitlab_project, name): - snippet = urllib.parse.quote('%s/%s' % (gitlab_project, name), safe='') - url = 'https://%s/api/v4/projects/%s' % (gitlab_domain, snippet) - - argv = ['curl', '-HPRIVATE-TOKEN: %s' % token, '-X', 'DELETE', url] - return runcmd('.', argv, MAX_REMOVE_TIME) - +def get_key_from_file(filename): + with open(filename) as f: + key_text = f.read() + return Crypto.PublicKey.RSA.importKey(key_text) -def push(dirname, gitlab_domain, gitlab_project, ref, name): - logging.info('Pushing %s to %s as %s', dirname, gitlab_domain, name) - url = 'ssh://git@%s/%s/%s.git' % (gitlab_domain, gitlab_project, name) - argv = ['git', 'push', url, '%s:master' % ref] - return runcmd(dirname, argv, MAX_PUSH_TIME) +def get_token_from_file(filename): + with open(filename) as f: + return f.read().strip() def main(): - token = sys.argv[1] + args = sys.argv[1:] + cmd = args.pop(0) + log_filename = args.pop(0) + pubkey_filename = args.pop(0) + + setup_logging(log_filename) + + if cmd == 'controller': + api = Controller() + elif cmd == 'vcsworker': + gitlab_token_filename = args.pop(0) + gitlab_token = get_token_from_file(gitlab_token_filename) + api = VCSWorker(gitlab_token) + else: + sys.exit('Unknown command %s' % cmd) + + app = bottle.Bottle() + pubkey = get_key_from_file(pubkey_filename) + api.setup(app, pubkey) + app.run(host=HOST, port=PORT) - logging.basicConfig( - filename='vcsworker.log', level=logging.DEBUG, - format='%(levelname)s %(message)s') - logging.info('VCS worker starts') - - spec = { - 'git': 'git://git.liw.fi/heippa', - 'ref': 'master', - 'gitlab': 'hithere2', - } - - tmpdir = 'vcstmp' - logging.info('Workspace: %s', tmpdir) - - if not os.path.exists(tmpdir): - os.makedirs(tmpdir) - - url = spec['git'] - ref = spec['ref'] - name = spec['gitlab'] - - dirname = os.path.join(tmpdir, name) - - if clone(url, ref, dirname): - if remove(token, GITLAB_DOMAIN, GITLAB_PROJECT, name): - if push(dirname, GITLAB_DOMAIN, GITLAB_PROJECT, ref, name): - logging.info('Repository copied successfully') - return - - logging.error('Something went wrong when copying repository') main() -- cgit v1.2.1