summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <lwirzenius@wikimedia.org>2019-07-02 17:59:54 +0300
committerLars Wirzenius <lwirzenius@wikimedia.org>2019-07-02 17:59:54 +0300
commit052bf7a7dee421830b09f28a570fc2472e30571c (patch)
treecfaa31b242b137e9a4450b7760c87b5c09e08819
parent9042c33e17812eedeafe6a75a88c361b2d2ac916 (diff)
downloadwmf-ci-arch-052bf7a7dee421830b09f28a570fc2472e30571c.tar.gz
Change: vcsworker.py now provides an HTTP API (also controller)
-rwxr-xr-xvcsworker.py321
1 files changed, 266 insertions, 55 deletions
diff --git a/vcsworker.py b/vcsworker.py
index 2cac2cd..0c7ff9e 100755
--- a/vcsworker.py
+++ b/vcsworker.py
@@ -1,18 +1,246 @@
#!/usr/bin/env python3
+#
+# This implements the various APIs for the various CI components we
+# need. Very simplistic and prototype-y. All in one file for
+# simplicity - production version will need to be done more carefully.
+#
+# To use: run this program with one of the following command lines:
+#
+# ./this controller this.log key.pub
+# ./this vcsworker this.log key.pub gitlab.token
+#
+# where
+#
+# - this.log is the log file to use
+# - key.pub is a public key in ssh format for checking incoming access tokens
+# - gitlab.token is the name of a file containing a GitLab access token
+#
+# NOTE: The GitLab instance and other configuration details are
+# hardcoded in this version. This will be fixed, as well as the 418
+# HTTP status code as a generic error code.
+#
+# NOTE: You should run this behind haproxy or smilar TLS provider,
+# which forwards requests to localhost.
+
import logging
import os
import shutil
import subprocess
import sys
+import tempfile
import urllib.parse
+import Crypto.PublicKey.RSA
+import bottle
+import jwt
+
+
+HOST = 'localhost'
+PORT = 2222
+
+
+class TokenParser:
+
+ '''Parse an incoming access token (signed JWT)'''
+
+ # Note that if we need to, for performance, we can cache the parse
+ # results here. But there's no point in doing that unless it
+ # becomes necessary.
+
+ def __init__(self, pubkey):
+ self._pubkey = pubkey
+
+ def parse_token(self, token_text):
+ return jwt.decode(
+ token_text,
+ key=self._pubkey.exportKey('OpenSSH'),
+ audience=None,
+ options={'verify_aud': False})
+
+
+class AccessChecker:
+
+ '''Given request headers and required scopes, is a request allowed?'''
+
+ def __init__(self, pubkey):
+ self._parser = TokenParser(pubkey)
+
+ def access_is_allowed(self, headers, required_scopes):
+ token = self._get_token(headers)
+ logging.debug('Access token %r', token)
+
+ if token is None and len(required_scopes) != 0:
+ logging.error('No valid access token')
+ return False
+
+ if token:
+ scopes = token.get('scope', '').split()
+ missing = set(required_scopes).difference(scopes)
+ if missing:
+ logging.error(
+ 'Required scopes that are missing from token: %r', missing)
+ return False
+
+ return True
+
+ def _get_token(self, headers):
+ '''Parse an access token or return None if it's bad'''
+ token_text = self._get_token_text(headers)
+ if token_text is None:
+ return None
+ return self._parser.parse_token(token_text)
+
+ def _get_token_text(self, headers):
+ '''Extract access token from request headers or None if not there'''
+ v = headers.get('Authorization', '')
+ words = v.split()
+ if len(words) == 2:
+ keyword, token_text = words
+ if keyword.lower() == 'bearer':
+ return token_text
+
+
+class API:
+
+ '''Base class for simple HTTP APIs
+
+ Override the get_routes method to use this. Call setup() method to
+ set up routes and such, before actually running.
+
+ '''
+
+ def __init__(self):
+ self._checker = None
+
+ def setup(self, app, token_pubkey):
+ self._checker = AccessChecker(token_pubkey)
+ self._add_routes(app, self.get_routes())
+
+ def get_routes(self):
+ raise NotImplementedError()
-MAX_CLONE_TIME = 1
-MAX_REMOVE_TIME = 60
-MAX_PUSH_TIME = 60
-GITLAB_DOMAIN = 'wmf-gitlab3.vm.liw.fi'
-GITLAB_PROJECT = 'liw'
+ def _add_routes(self, app, routes):
+ for route in routes:
+ func = route.pop('func')
+ scopes = route.pop('scopes')
+ assert isinstance(scopes, list)
+ callback = lambda **kwargs: self.check(func, scopes, kwargs)
+ route = dict(route)
+ route['callback'] = callback
+ app.route(**route)
+
+ def check(self, func, required_scopes, kwargs):
+ '''Call a callback function, if it's OK to do so'''
+ r = bottle.request
+ logging.debug('Checking access for request %s %s', r.method, r.path)
+
+ if self._checker.access_is_allowed(bottle.request.headers, required_scopes):
+ logging.info('Serving request %s %s', r.method, r.path)
+ ret = func(**kwargs)
+ logging.info('Result: %r', ret)
+ return ret
+
+ logging.error('Request denied %s %s', r.method, r.path)
+ return bottle.HTTPError(400)
+
+
+class Controller(API):
+
+ '''A dummy controller API'''
+
+ def get_routes(self):
+ return [
+ {
+ 'method': 'GET',
+ 'path': '/status',
+ 'func': self._status,
+ 'scopes': ['status'],
+ },
+ {
+ 'method': 'GET',
+ 'path': '/hello/<name>',
+ 'func': self._hello,
+ 'scopes': ['hello'],
+ },
+ ]
+
+ def _status(self):
+ return {
+ 'queue': [],
+ 'running': [],
+ 'finished': [],
+ }
+
+ def _hello(self, name=None):
+ return 'hello {}\n'.format(name)
+
+
+class VCSWorker(API):
+
+ '''A VCSWorker API'''
+
+ MAX_CLONE_TIME = 1
+ MAX_REMOVE_TIME = 60
+ MAX_PUSH_TIME = 60
+ GITLAB_DOMAIN = 'wmf-gitlab3.vm.liw.fi'
+ GITLAB_PROJECT = 'liw'
+
+ def __init__(self, gitlab_token):
+ self._token = gitlab_token
+ self._tmpdir = tempfile.mkdtemp()
+ logging.info('Workspace: %s', self._tmpdir)
+
+ def get_routes(self):
+ return [
+ {
+ 'method': 'POST',
+ 'path': '/updaterepo',
+ 'func': self._update_repo,
+ 'scopes': ['update-repo'],
+ },
+ ]
+
+ def _update_repo(self):
+ spec = bottle.request.json
+ url = spec['git']
+ ref = spec['ref']
+ name = spec['gitlab']
+
+ dirname = os.path.join(self._tmpdir, name)
+
+ if self._clone(url, ref, dirname):
+ ok = self._remove(
+ self._token, self.GITLAB_DOMAIN, self.GITLAB_PROJECT, name)
+ if ok:
+ ok = self._push(
+ dirname, self.GITLAB_DOMAIN, self.GITLAB_PROJECT, ref,
+ name)
+ if ok:
+ logging.info('Repository copied successfully')
+ return 'Repository copied successfully'
+
+ logging.error('Something went wrong when copying repository')
+ return bottle.HTTPError(418)
+
+ def _clone(self, url, ref, dirname):
+ if os.path.exists(dirname):
+ logging.debug('Removing %s', dirname)
+ shutil.rmtree(dirname)
+ argv = ['git', 'clone', '-q', '-b', ref, url, dirname]
+ return runcmd('.', argv, self.MAX_CLONE_TIME)
+
+ def _remove(self, token, gitlab_domain, gitlab_project, name):
+ snippet = urllib.parse.quote('%s/%s' % (gitlab_project, name), safe='')
+ url = 'https://%s/api/v4/projects/%s' % (gitlab_domain, snippet)
+ argv = ['curl', '-HPRIVATE-TOKEN: %s' % token, '-X', 'DELETE', url]
+ return runcmd('.', argv, self.MAX_REMOVE_TIME)
+
+ def _push(self, dirname, gitlab_domain, gitlab_project, ref, name):
+ logging.info('Pushing %s to %s as %s', dirname, gitlab_domain, name)
+ url = 'ssh://git@%s/%s/%s.git' % (gitlab_domain, gitlab_project, name)
+ argv = ['git', 'push', url, '%s:master' % ref]
+ return runcmd(dirname, argv, self.MAX_PUSH_TIME)
def runcmd(cwd, argv, timeout):
@@ -39,63 +267,46 @@ def runcmd(cwd, argv, timeout):
return True
-def clone(url, ref, dirname):
- if os.path.exists(dirname):
- logging.debug('Removing %s', dirname)
- shutil.rmtree(dirname)
-
- argv = ['git', 'clone', '-q', '-b', ref, url, dirname]
- return runcmd('.', argv, MAX_CLONE_TIME)
+def setup_logging(log_filename):
+ logging.basicConfig(
+ filename=log_filename,
+ level=logging.DEBUG,
+ format='%(asctime)s %(levelname)s %(message)s')
+ logging.info('API server starting')
-def remove(token, gitlab_domain, gitlab_project, name):
- snippet = urllib.parse.quote('%s/%s' % (gitlab_project, name), safe='')
- url = 'https://%s/api/v4/projects/%s' % (gitlab_domain, snippet)
-
- argv = ['curl', '-HPRIVATE-TOKEN: %s' % token, '-X', 'DELETE', url]
- return runcmd('.', argv, MAX_REMOVE_TIME)
-
+def get_key_from_file(filename):
+ with open(filename) as f:
+ key_text = f.read()
+ return Crypto.PublicKey.RSA.importKey(key_text)
-def push(dirname, gitlab_domain, gitlab_project, ref, name):
- logging.info('Pushing %s to %s as %s', dirname, gitlab_domain, name)
- url = 'ssh://git@%s/%s/%s.git' % (gitlab_domain, gitlab_project, name)
- argv = ['git', 'push', url, '%s:master' % ref]
- return runcmd(dirname, argv, MAX_PUSH_TIME)
+def get_token_from_file(filename):
+ with open(filename) as f:
+ return f.read().strip()
def main():
- token = sys.argv[1]
+ args = sys.argv[1:]
+ cmd = args.pop(0)
+ log_filename = args.pop(0)
+ pubkey_filename = args.pop(0)
+
+ setup_logging(log_filename)
+
+ if cmd == 'controller':
+ api = Controller()
+ elif cmd == 'vcsworker':
+ gitlab_token_filename = args.pop(0)
+ gitlab_token = get_token_from_file(gitlab_token_filename)
+ api = VCSWorker(gitlab_token)
+ else:
+ sys.exit('Unknown command %s' % cmd)
+
+ app = bottle.Bottle()
+ pubkey = get_key_from_file(pubkey_filename)
+ api.setup(app, pubkey)
+ app.run(host=HOST, port=PORT)
- logging.basicConfig(
- filename='vcsworker.log', level=logging.DEBUG,
- format='%(levelname)s %(message)s')
- logging.info('VCS worker starts')
-
- spec = {
- 'git': 'git://git.liw.fi/heippa',
- 'ref': 'master',
- 'gitlab': 'hithere2',
- }
-
- tmpdir = 'vcstmp'
- logging.info('Workspace: %s', tmpdir)
-
- if not os.path.exists(tmpdir):
- os.makedirs(tmpdir)
-
- url = spec['git']
- ref = spec['ref']
- name = spec['gitlab']
-
- dirname = os.path.join(tmpdir, name)
-
- if clone(url, ref, dirname):
- if remove(token, GITLAB_DOMAIN, GITLAB_PROJECT, name):
- if push(dirname, GITLAB_DOMAIN, GITLAB_PROJECT, ref, name):
- logging.info('Repository copied successfully')
- return
-
- logging.error('Something went wrong when copying repository')
main()