From b086f7fadac9200d45e1cb4bc42046f090dc9fc3 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Wed, 3 Jul 2019 17:34:12 +0300 Subject: Rename: vcsworker.py to api.py --- api.py | 199 ++++++++++++++++++++++++++++++++++--- vcsworker.py | 312 ----------------------------------------------------------- 2 files changed, 186 insertions(+), 325 deletions(-) mode change 100644 => 100755 api.py delete mode 100755 vcsworker.py diff --git a/api.py b/api.py old mode 100644 new mode 100755 index d593260..5983268 --- a/api.py +++ b/api.py @@ -1,13 +1,53 @@ +#!/usr/bin/env python3 +# +# This implements the various APIs for the various CI components we +# need. Very simplistic and prototype-y. All in one file for +# simplicity - production version will need to be done more carefully. +# +# To use: run this program with one of the following command lines: +# +# ./this controller this.log key.pub +# ./this vcsworker this.log key.pub gitlab.token +# +# where +# +# - this.log is the log file to use +# - key.pub is a public key in ssh format for checking incoming access tokens +# - gitlab.token is the name of a file containing a GitLab access token +# +# NOTE: The GitLab instance and other configuration details are +# hardcoded in this version. This will be fixed, as well as the 418 +# HTTP status code as a generic error code. +# +# NOTE: You should run this behind haproxy or smilar TLS provider, +# which forwards requests to localhost. + + import logging +import os +import shutil +import subprocess import sys +import tempfile +import urllib.parse import Crypto.PublicKey.RSA import bottle import jwt +HOST = 'localhost' +PORT = 2222 + + class TokenParser: + '''Parse an incoming access token (signed JWT)''' + + # Note that if we need to, for performance, we can cache the parse + # results here. But there's no point in doing that unless it + # becomes necessary. + def __init__(self, pubkey): self._pubkey = pubkey @@ -21,12 +61,15 @@ class TokenParser: class AccessChecker: + '''Given request headers and required scopes, is a request allowed?''' + def __init__(self, pubkey): self._parser = TokenParser(pubkey) def access_is_allowed(self, headers, required_scopes): token = self._get_token(headers) logging.debug('Access token %r', token) + if token is None and len(required_scopes) != 0: logging.error('No valid access token') return False @@ -42,12 +85,14 @@ class AccessChecker: return True def _get_token(self, headers): + '''Parse an access token or return None if it's bad''' token_text = self._get_token_text(headers) if token_text is None: return None return self._parser.parse_token(token_text) def _get_token_text(self, headers): + '''Extract access token from request headers or None if not there''' v = headers.get('Authorization', '') words = v.split() if len(words) == 2: @@ -58,7 +103,17 @@ class AccessChecker: class API: - def __init__(self, app, token_pubkey): + '''Base class for simple HTTP APIs + + Override the get_routes method to use this. Call setup() method to + set up routes and such, before actually running. + + ''' + + def __init__(self): + self._checker = None + + def setup(self, app, token_pubkey): self._checker = AccessChecker(token_pubkey) self._add_routes(app, self.get_routes()) @@ -69,17 +124,19 @@ class API: for route in routes: func = route.pop('func') scopes = route.pop('scopes') + assert isinstance(scopes, list) callback = lambda **kwargs: self.check(func, scopes, kwargs) route = dict(route) route['callback'] = callback app.route(**route) def check(self, func, required_scopes, kwargs): + '''Call a callback function, if it's OK to do so''' r = bottle.request - logging.debug('Checking access for request %s %s', r.method, r.path) + logging.debug('New request, checking access: %s %s', r.method, r.path) - if self._checker.access_is_allowed(bottle.request.headers, required_scopes): - logging.info('Serving request %s %s', r.method, r.path) + if self._checker.access_is_allowed(r.headers, required_scopes): + logging.info('Access is allowed: %s %s', r.method, r.path) ret = func(**kwargs) logging.info('Result: %r', ret) return ret @@ -90,6 +147,8 @@ class API: class Controller(API): + '''A dummy controller API''' + def get_routes(self): return [ { @@ -117,23 +176,137 @@ class Controller(API): return 'hello {}\n'.format(name) +class VCSWorker(API): + + '''A VCSWorker API''' + + MAX_CLONE_TIME = 1 + MAX_REMOVE_TIME = 60 + MAX_PUSH_TIME = 60 + GITLAB_DOMAIN = 'wmf-gitlab3.vm.liw.fi' + GITLAB_PROJECT = 'liw' + + def __init__(self, gitlab_token): + self._token = gitlab_token + self._tmpdir = tempfile.mkdtemp() + logging.info('Workspace: %s', self._tmpdir) + + def get_routes(self): + return [ + { + 'method': 'POST', + 'path': '/updaterepo', + 'func': self._update_repo, + 'scopes': ['update-repo'], + }, + ] + + def _update_repo(self): + spec = bottle.request.json + logging.info('Updating repository: %r', spec) + + url = spec['git'] + ref = spec['ref'] + name = spec['gitlab'] + dirname = os.path.join(self._tmpdir, name) + + if self._clone(url, ref, dirname): + ok = self._remove( + self._token, self.GITLAB_DOMAIN, self.GITLAB_PROJECT, name) + if ok: + ok = self._push( + dirname, self.GITLAB_DOMAIN, self.GITLAB_PROJECT, ref, + name) + if ok: + return 'Repository copied successfully' + + logging.error('Something went wrong when copying repository') + return bottle.HTTPError(418) + + def _clone(self, url, ref, dirname): + if os.path.exists(dirname): + logging.debug('Removing %s', dirname) + shutil.rmtree(dirname) + argv = ['git', 'clone', '-q', '-b', ref, url, dirname] + return runcmd('.', argv, self.MAX_CLONE_TIME) + + def _remove(self, token, gitlab_domain, gitlab_project, name): + snippet = urllib.parse.quote('%s/%s' % (gitlab_project, name), safe='') + url = 'https://%s/api/v4/projects/%s' % (gitlab_domain, snippet) + argv = ['curl', '-HPRIVATE-TOKEN: %s' % token, '-X', 'DELETE', url] + return runcmd('.', argv, self.MAX_REMOVE_TIME) + + def _push(self, dirname, gitlab_domain, gitlab_project, ref, name): + logging.info('Pushing %s to %s as %s', dirname, gitlab_domain, name) + url = 'ssh://git@%s/%s/%s.git' % (gitlab_domain, gitlab_project, name) + argv = ['git', 'push', url, '%s:master' % ref] + return runcmd(dirname, argv, self.MAX_PUSH_TIME) + + +def runcmd(cwd, argv, timeout): + logging.info('Running command: %r', argv) + try: + p = subprocess.run( + argv, cwd=cwd, timeout=timeout, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + except subprocess.TimeoutExpired: + logging.error('Command took too long (timeout %r)', timeout) + return False + except Exception as e: + logging.error('Error while running command: %s', str(e)) + return False + + if p.returncode != 0: + logging.error('Command failed: %r', argv) + logging.error('exit code: %d', p.returncode) + logging.error('stdout: %r', p.stdout) + logging.error('stderr: %r', p.stderr) + return False + + logging.info('Command succeeded') + return True + + +def setup_logging(log_filename): + logging.basicConfig( + filename=log_filename, + level=logging.DEBUG, + format='%(asctime)s %(levelname)s %(message)s') + logging.info('API server starting') + + def get_key_from_file(filename): with open(filename) as f: key_text = f.read() return Crypto.PublicKey.RSA.importKey(key_text) - + +def get_token_from_file(filename): + with open(filename) as f: + return f.read().strip() + + def main(): - logging.basicConfig( - filename='api.log', - level=logging.DEBUG, - format='%(levelname)s %(message)s') + args = sys.argv[1:] + cmd = args.pop(0) + log_filename = args.pop(0) + pubkey_filename = args.pop(0) + + setup_logging(log_filename) - filename = sys.argv[1] - key = get_key_from_file(filename) + if cmd == 'controller': + api = Controller() + elif cmd == 'vcsworker': + gitlab_token_filename = args.pop(0) + gitlab_token = get_token_from_file(gitlab_token_filename) + api = VCSWorker(gitlab_token) + else: + sys.exit('Unknown command %s' % cmd) app = bottle.Bottle() - api = Controller(app, key) - app.run(host='localhost', port=2222) + pubkey = get_key_from_file(pubkey_filename) + api.setup(app, pubkey) + app.run(host=HOST, port=PORT) + main() diff --git a/vcsworker.py b/vcsworker.py deleted file mode 100755 index 5983268..0000000 --- a/vcsworker.py +++ /dev/null @@ -1,312 +0,0 @@ -#!/usr/bin/env python3 -# -# This implements the various APIs for the various CI components we -# need. Very simplistic and prototype-y. All in one file for -# simplicity - production version will need to be done more carefully. -# -# To use: run this program with one of the following command lines: -# -# ./this controller this.log key.pub -# ./this vcsworker this.log key.pub gitlab.token -# -# where -# -# - this.log is the log file to use -# - key.pub is a public key in ssh format for checking incoming access tokens -# - gitlab.token is the name of a file containing a GitLab access token -# -# NOTE: The GitLab instance and other configuration details are -# hardcoded in this version. This will be fixed, as well as the 418 -# HTTP status code as a generic error code. -# -# NOTE: You should run this behind haproxy or smilar TLS provider, -# which forwards requests to localhost. - - -import logging -import os -import shutil -import subprocess -import sys -import tempfile -import urllib.parse - -import Crypto.PublicKey.RSA -import bottle -import jwt - - -HOST = 'localhost' -PORT = 2222 - - -class TokenParser: - - '''Parse an incoming access token (signed JWT)''' - - # Note that if we need to, for performance, we can cache the parse - # results here. But there's no point in doing that unless it - # becomes necessary. - - def __init__(self, pubkey): - self._pubkey = pubkey - - def parse_token(self, token_text): - return jwt.decode( - token_text, - key=self._pubkey.exportKey('OpenSSH'), - audience=None, - options={'verify_aud': False}) - - -class AccessChecker: - - '''Given request headers and required scopes, is a request allowed?''' - - def __init__(self, pubkey): - self._parser = TokenParser(pubkey) - - def access_is_allowed(self, headers, required_scopes): - token = self._get_token(headers) - logging.debug('Access token %r', token) - - if token is None and len(required_scopes) != 0: - logging.error('No valid access token') - return False - - if token: - scopes = token.get('scope', '').split() - missing = set(required_scopes).difference(scopes) - if missing: - logging.error( - 'Required scopes that are missing from token: %r', missing) - return False - - return True - - def _get_token(self, headers): - '''Parse an access token or return None if it's bad''' - token_text = self._get_token_text(headers) - if token_text is None: - return None - return self._parser.parse_token(token_text) - - def _get_token_text(self, headers): - '''Extract access token from request headers or None if not there''' - v = headers.get('Authorization', '') - words = v.split() - if len(words) == 2: - keyword, token_text = words - if keyword.lower() == 'bearer': - return token_text - - -class API: - - '''Base class for simple HTTP APIs - - Override the get_routes method to use this. Call setup() method to - set up routes and such, before actually running. - - ''' - - def __init__(self): - self._checker = None - - def setup(self, app, token_pubkey): - self._checker = AccessChecker(token_pubkey) - self._add_routes(app, self.get_routes()) - - def get_routes(self): - raise NotImplementedError() - - def _add_routes(self, app, routes): - for route in routes: - func = route.pop('func') - scopes = route.pop('scopes') - assert isinstance(scopes, list) - callback = lambda **kwargs: self.check(func, scopes, kwargs) - route = dict(route) - route['callback'] = callback - app.route(**route) - - def check(self, func, required_scopes, kwargs): - '''Call a callback function, if it's OK to do so''' - r = bottle.request - logging.debug('New request, checking access: %s %s', r.method, r.path) - - if self._checker.access_is_allowed(r.headers, required_scopes): - logging.info('Access is allowed: %s %s', r.method, r.path) - ret = func(**kwargs) - logging.info('Result: %r', ret) - return ret - - logging.error('Request denied %s %s', r.method, r.path) - return bottle.HTTPError(400) - - -class Controller(API): - - '''A dummy controller API''' - - def get_routes(self): - return [ - { - 'method': 'GET', - 'path': '/status', - 'func': self._status, - 'scopes': ['status'], - }, - { - 'method': 'GET', - 'path': '/hello/', - 'func': self._hello, - 'scopes': ['hello'], - }, - ] - - def _status(self): - return { - 'queue': [], - 'running': [], - 'finished': [], - } - - def _hello(self, name=None): - return 'hello {}\n'.format(name) - - -class VCSWorker(API): - - '''A VCSWorker API''' - - MAX_CLONE_TIME = 1 - MAX_REMOVE_TIME = 60 - MAX_PUSH_TIME = 60 - GITLAB_DOMAIN = 'wmf-gitlab3.vm.liw.fi' - GITLAB_PROJECT = 'liw' - - def __init__(self, gitlab_token): - self._token = gitlab_token - self._tmpdir = tempfile.mkdtemp() - logging.info('Workspace: %s', self._tmpdir) - - def get_routes(self): - return [ - { - 'method': 'POST', - 'path': '/updaterepo', - 'func': self._update_repo, - 'scopes': ['update-repo'], - }, - ] - - def _update_repo(self): - spec = bottle.request.json - logging.info('Updating repository: %r', spec) - - url = spec['git'] - ref = spec['ref'] - name = spec['gitlab'] - dirname = os.path.join(self._tmpdir, name) - - if self._clone(url, ref, dirname): - ok = self._remove( - self._token, self.GITLAB_DOMAIN, self.GITLAB_PROJECT, name) - if ok: - ok = self._push( - dirname, self.GITLAB_DOMAIN, self.GITLAB_PROJECT, ref, - name) - if ok: - return 'Repository copied successfully' - - logging.error('Something went wrong when copying repository') - return bottle.HTTPError(418) - - def _clone(self, url, ref, dirname): - if os.path.exists(dirname): - logging.debug('Removing %s', dirname) - shutil.rmtree(dirname) - argv = ['git', 'clone', '-q', '-b', ref, url, dirname] - return runcmd('.', argv, self.MAX_CLONE_TIME) - - def _remove(self, token, gitlab_domain, gitlab_project, name): - snippet = urllib.parse.quote('%s/%s' % (gitlab_project, name), safe='') - url = 'https://%s/api/v4/projects/%s' % (gitlab_domain, snippet) - argv = ['curl', '-HPRIVATE-TOKEN: %s' % token, '-X', 'DELETE', url] - return runcmd('.', argv, self.MAX_REMOVE_TIME) - - def _push(self, dirname, gitlab_domain, gitlab_project, ref, name): - logging.info('Pushing %s to %s as %s', dirname, gitlab_domain, name) - url = 'ssh://git@%s/%s/%s.git' % (gitlab_domain, gitlab_project, name) - argv = ['git', 'push', url, '%s:master' % ref] - return runcmd(dirname, argv, self.MAX_PUSH_TIME) - - -def runcmd(cwd, argv, timeout): - logging.info('Running command: %r', argv) - try: - p = subprocess.run( - argv, cwd=cwd, timeout=timeout, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - except subprocess.TimeoutExpired: - logging.error('Command took too long (timeout %r)', timeout) - return False - except Exception as e: - logging.error('Error while running command: %s', str(e)) - return False - - if p.returncode != 0: - logging.error('Command failed: %r', argv) - logging.error('exit code: %d', p.returncode) - logging.error('stdout: %r', p.stdout) - logging.error('stderr: %r', p.stderr) - return False - - logging.info('Command succeeded') - return True - - -def setup_logging(log_filename): - logging.basicConfig( - filename=log_filename, - level=logging.DEBUG, - format='%(asctime)s %(levelname)s %(message)s') - logging.info('API server starting') - - -def get_key_from_file(filename): - with open(filename) as f: - key_text = f.read() - return Crypto.PublicKey.RSA.importKey(key_text) - - -def get_token_from_file(filename): - with open(filename) as f: - return f.read().strip() - - -def main(): - args = sys.argv[1:] - cmd = args.pop(0) - log_filename = args.pop(0) - pubkey_filename = args.pop(0) - - setup_logging(log_filename) - - if cmd == 'controller': - api = Controller() - elif cmd == 'vcsworker': - gitlab_token_filename = args.pop(0) - gitlab_token = get_token_from_file(gitlab_token_filename) - api = VCSWorker(gitlab_token) - else: - sys.exit('Unknown command %s' % cmd) - - app = bottle.Bottle() - pubkey = get_key_from_file(pubkey_filename) - api.setup(app, pubkey) - app.run(host=HOST, port=PORT) - - -main() -- cgit v1.2.1