summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <lwirzenius@wikimedia.org>2019-07-03 17:34:12 +0300
committerLars Wirzenius <lwirzenius@wikimedia.org>2019-07-03 17:34:12 +0300
commitb086f7fadac9200d45e1cb4bc42046f090dc9fc3 (patch)
tree37c717eedfcc289c6ba85f43e082328586f49561
parent22713f24e1c97792882a34f036d754e89eb5d0cc (diff)
downloadwmf-ci-arch-b086f7fadac9200d45e1cb4bc42046f090dc9fc3.tar.gz
Rename: vcsworker.py to api.py
-rwxr-xr-x[-rw-r--r--]api.py199
-rwxr-xr-xvcsworker.py312
2 files changed, 186 insertions, 325 deletions
diff --git a/api.py b/api.py
index d593260..5983268 100644..100755
--- a/api.py
+++ b/api.py
@@ -1,13 +1,53 @@
+#!/usr/bin/env python3
+#
+# This implements the various APIs for the various CI components we
+# need. Very simplistic and prototype-y. All in one file for
+# simplicity - production version will need to be done more carefully.
+#
+# To use: run this program with one of the following command lines:
+#
+# ./this controller this.log key.pub
+# ./this vcsworker this.log key.pub gitlab.token
+#
+# where
+#
+# - this.log is the log file to use
+# - key.pub is a public key in ssh format for checking incoming access tokens
+# - gitlab.token is the name of a file containing a GitLab access token
+#
+# NOTE: The GitLab instance and other configuration details are
+# hardcoded in this version. This will be fixed, as well as the 418
+# HTTP status code as a generic error code.
+#
+# NOTE: You should run this behind haproxy or smilar TLS provider,
+# which forwards requests to localhost.
+
+
import logging
+import os
+import shutil
+import subprocess
import sys
+import tempfile
+import urllib.parse
import Crypto.PublicKey.RSA
import bottle
import jwt
+HOST = 'localhost'
+PORT = 2222
+
+
class TokenParser:
+ '''Parse an incoming access token (signed JWT)'''
+
+ # Note that if we need to, for performance, we can cache the parse
+ # results here. But there's no point in doing that unless it
+ # becomes necessary.
+
def __init__(self, pubkey):
self._pubkey = pubkey
@@ -21,12 +61,15 @@ class TokenParser:
class AccessChecker:
+ '''Given request headers and required scopes, is a request allowed?'''
+
def __init__(self, pubkey):
self._parser = TokenParser(pubkey)
def access_is_allowed(self, headers, required_scopes):
token = self._get_token(headers)
logging.debug('Access token %r', token)
+
if token is None and len(required_scopes) != 0:
logging.error('No valid access token')
return False
@@ -42,12 +85,14 @@ class AccessChecker:
return True
def _get_token(self, headers):
+ '''Parse an access token or return None if it's bad'''
token_text = self._get_token_text(headers)
if token_text is None:
return None
return self._parser.parse_token(token_text)
def _get_token_text(self, headers):
+ '''Extract access token from request headers or None if not there'''
v = headers.get('Authorization', '')
words = v.split()
if len(words) == 2:
@@ -58,7 +103,17 @@ class AccessChecker:
class API:
- def __init__(self, app, token_pubkey):
+ '''Base class for simple HTTP APIs
+
+ Override the get_routes method to use this. Call setup() method to
+ set up routes and such, before actually running.
+
+ '''
+
+ def __init__(self):
+ self._checker = None
+
+ def setup(self, app, token_pubkey):
self._checker = AccessChecker(token_pubkey)
self._add_routes(app, self.get_routes())
@@ -69,17 +124,19 @@ class API:
for route in routes:
func = route.pop('func')
scopes = route.pop('scopes')
+ assert isinstance(scopes, list)
callback = lambda **kwargs: self.check(func, scopes, kwargs)
route = dict(route)
route['callback'] = callback
app.route(**route)
def check(self, func, required_scopes, kwargs):
+ '''Call a callback function, if it's OK to do so'''
r = bottle.request
- logging.debug('Checking access for request %s %s', r.method, r.path)
+ logging.debug('New request, checking access: %s %s', r.method, r.path)
- if self._checker.access_is_allowed(bottle.request.headers, required_scopes):
- logging.info('Serving request %s %s', r.method, r.path)
+ if self._checker.access_is_allowed(r.headers, required_scopes):
+ logging.info('Access is allowed: %s %s', r.method, r.path)
ret = func(**kwargs)
logging.info('Result: %r', ret)
return ret
@@ -90,6 +147,8 @@ class API:
class Controller(API):
+ '''A dummy controller API'''
+
def get_routes(self):
return [
{
@@ -117,23 +176,137 @@ class Controller(API):
return 'hello {}\n'.format(name)
+class VCSWorker(API):
+
+ '''A VCSWorker API'''
+
+ MAX_CLONE_TIME = 1
+ MAX_REMOVE_TIME = 60
+ MAX_PUSH_TIME = 60
+ GITLAB_DOMAIN = 'wmf-gitlab3.vm.liw.fi'
+ GITLAB_PROJECT = 'liw'
+
+ def __init__(self, gitlab_token):
+ self._token = gitlab_token
+ self._tmpdir = tempfile.mkdtemp()
+ logging.info('Workspace: %s', self._tmpdir)
+
+ def get_routes(self):
+ return [
+ {
+ 'method': 'POST',
+ 'path': '/updaterepo',
+ 'func': self._update_repo,
+ 'scopes': ['update-repo'],
+ },
+ ]
+
+ def _update_repo(self):
+ spec = bottle.request.json
+ logging.info('Updating repository: %r', spec)
+
+ url = spec['git']
+ ref = spec['ref']
+ name = spec['gitlab']
+ dirname = os.path.join(self._tmpdir, name)
+
+ if self._clone(url, ref, dirname):
+ ok = self._remove(
+ self._token, self.GITLAB_DOMAIN, self.GITLAB_PROJECT, name)
+ if ok:
+ ok = self._push(
+ dirname, self.GITLAB_DOMAIN, self.GITLAB_PROJECT, ref,
+ name)
+ if ok:
+ return 'Repository copied successfully'
+
+ logging.error('Something went wrong when copying repository')
+ return bottle.HTTPError(418)
+
+ def _clone(self, url, ref, dirname):
+ if os.path.exists(dirname):
+ logging.debug('Removing %s', dirname)
+ shutil.rmtree(dirname)
+ argv = ['git', 'clone', '-q', '-b', ref, url, dirname]
+ return runcmd('.', argv, self.MAX_CLONE_TIME)
+
+ def _remove(self, token, gitlab_domain, gitlab_project, name):
+ snippet = urllib.parse.quote('%s/%s' % (gitlab_project, name), safe='')
+ url = 'https://%s/api/v4/projects/%s' % (gitlab_domain, snippet)
+ argv = ['curl', '-HPRIVATE-TOKEN: %s' % token, '-X', 'DELETE', url]
+ return runcmd('.', argv, self.MAX_REMOVE_TIME)
+
+ def _push(self, dirname, gitlab_domain, gitlab_project, ref, name):
+ logging.info('Pushing %s to %s as %s', dirname, gitlab_domain, name)
+ url = 'ssh://git@%s/%s/%s.git' % (gitlab_domain, gitlab_project, name)
+ argv = ['git', 'push', url, '%s:master' % ref]
+ return runcmd(dirname, argv, self.MAX_PUSH_TIME)
+
+
+def runcmd(cwd, argv, timeout):
+ logging.info('Running command: %r', argv)
+ try:
+ p = subprocess.run(
+ argv, cwd=cwd, timeout=timeout, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ except subprocess.TimeoutExpired:
+ logging.error('Command took too long (timeout %r)', timeout)
+ return False
+ except Exception as e:
+ logging.error('Error while running command: %s', str(e))
+ return False
+
+ if p.returncode != 0:
+ logging.error('Command failed: %r', argv)
+ logging.error('exit code: %d', p.returncode)
+ logging.error('stdout: %r', p.stdout)
+ logging.error('stderr: %r', p.stderr)
+ return False
+
+ logging.info('Command succeeded')
+ return True
+
+
+def setup_logging(log_filename):
+ logging.basicConfig(
+ filename=log_filename,
+ level=logging.DEBUG,
+ format='%(asctime)s %(levelname)s %(message)s')
+ logging.info('API server starting')
+
+
def get_key_from_file(filename):
with open(filename) as f:
key_text = f.read()
return Crypto.PublicKey.RSA.importKey(key_text)
-
+
+def get_token_from_file(filename):
+ with open(filename) as f:
+ return f.read().strip()
+
+
def main():
- logging.basicConfig(
- filename='api.log',
- level=logging.DEBUG,
- format='%(levelname)s %(message)s')
+ args = sys.argv[1:]
+ cmd = args.pop(0)
+ log_filename = args.pop(0)
+ pubkey_filename = args.pop(0)
+
+ setup_logging(log_filename)
- filename = sys.argv[1]
- key = get_key_from_file(filename)
+ if cmd == 'controller':
+ api = Controller()
+ elif cmd == 'vcsworker':
+ gitlab_token_filename = args.pop(0)
+ gitlab_token = get_token_from_file(gitlab_token_filename)
+ api = VCSWorker(gitlab_token)
+ else:
+ sys.exit('Unknown command %s' % cmd)
app = bottle.Bottle()
- api = Controller(app, key)
- app.run(host='localhost', port=2222)
+ pubkey = get_key_from_file(pubkey_filename)
+ api.setup(app, pubkey)
+ app.run(host=HOST, port=PORT)
+
main()
diff --git a/vcsworker.py b/vcsworker.py
deleted file mode 100755
index 5983268..0000000
--- a/vcsworker.py
+++ /dev/null
@@ -1,312 +0,0 @@
-#!/usr/bin/env python3
-#
-# This implements the various APIs for the various CI components we
-# need. Very simplistic and prototype-y. All in one file for
-# simplicity - production version will need to be done more carefully.
-#
-# To use: run this program with one of the following command lines:
-#
-# ./this controller this.log key.pub
-# ./this vcsworker this.log key.pub gitlab.token
-#
-# where
-#
-# - this.log is the log file to use
-# - key.pub is a public key in ssh format for checking incoming access tokens
-# - gitlab.token is the name of a file containing a GitLab access token
-#
-# NOTE: The GitLab instance and other configuration details are
-# hardcoded in this version. This will be fixed, as well as the 418
-# HTTP status code as a generic error code.
-#
-# NOTE: You should run this behind haproxy or smilar TLS provider,
-# which forwards requests to localhost.
-
-
-import logging
-import os
-import shutil
-import subprocess
-import sys
-import tempfile
-import urllib.parse
-
-import Crypto.PublicKey.RSA
-import bottle
-import jwt
-
-
-HOST = 'localhost'
-PORT = 2222
-
-
-class TokenParser:
-
- '''Parse an incoming access token (signed JWT)'''
-
- # Note that if we need to, for performance, we can cache the parse
- # results here. But there's no point in doing that unless it
- # becomes necessary.
-
- def __init__(self, pubkey):
- self._pubkey = pubkey
-
- def parse_token(self, token_text):
- return jwt.decode(
- token_text,
- key=self._pubkey.exportKey('OpenSSH'),
- audience=None,
- options={'verify_aud': False})
-
-
-class AccessChecker:
-
- '''Given request headers and required scopes, is a request allowed?'''
-
- def __init__(self, pubkey):
- self._parser = TokenParser(pubkey)
-
- def access_is_allowed(self, headers, required_scopes):
- token = self._get_token(headers)
- logging.debug('Access token %r', token)
-
- if token is None and len(required_scopes) != 0:
- logging.error('No valid access token')
- return False
-
- if token:
- scopes = token.get('scope', '').split()
- missing = set(required_scopes).difference(scopes)
- if missing:
- logging.error(
- 'Required scopes that are missing from token: %r', missing)
- return False
-
- return True
-
- def _get_token(self, headers):
- '''Parse an access token or return None if it's bad'''
- token_text = self._get_token_text(headers)
- if token_text is None:
- return None
- return self._parser.parse_token(token_text)
-
- def _get_token_text(self, headers):
- '''Extract access token from request headers or None if not there'''
- v = headers.get('Authorization', '')
- words = v.split()
- if len(words) == 2:
- keyword, token_text = words
- if keyword.lower() == 'bearer':
- return token_text
-
-
-class API:
-
- '''Base class for simple HTTP APIs
-
- Override the get_routes method to use this. Call setup() method to
- set up routes and such, before actually running.
-
- '''
-
- def __init__(self):
- self._checker = None
-
- def setup(self, app, token_pubkey):
- self._checker = AccessChecker(token_pubkey)
- self._add_routes(app, self.get_routes())
-
- def get_routes(self):
- raise NotImplementedError()
-
- def _add_routes(self, app, routes):
- for route in routes:
- func = route.pop('func')
- scopes = route.pop('scopes')
- assert isinstance(scopes, list)
- callback = lambda **kwargs: self.check(func, scopes, kwargs)
- route = dict(route)
- route['callback'] = callback
- app.route(**route)
-
- def check(self, func, required_scopes, kwargs):
- '''Call a callback function, if it's OK to do so'''
- r = bottle.request
- logging.debug('New request, checking access: %s %s', r.method, r.path)
-
- if self._checker.access_is_allowed(r.headers, required_scopes):
- logging.info('Access is allowed: %s %s', r.method, r.path)
- ret = func(**kwargs)
- logging.info('Result: %r', ret)
- return ret
-
- logging.error('Request denied %s %s', r.method, r.path)
- return bottle.HTTPError(400)
-
-
-class Controller(API):
-
- '''A dummy controller API'''
-
- def get_routes(self):
- return [
- {
- 'method': 'GET',
- 'path': '/status',
- 'func': self._status,
- 'scopes': ['status'],
- },
- {
- 'method': 'GET',
- 'path': '/hello/<name>',
- 'func': self._hello,
- 'scopes': ['hello'],
- },
- ]
-
- def _status(self):
- return {
- 'queue': [],
- 'running': [],
- 'finished': [],
- }
-
- def _hello(self, name=None):
- return 'hello {}\n'.format(name)
-
-
-class VCSWorker(API):
-
- '''A VCSWorker API'''
-
- MAX_CLONE_TIME = 1
- MAX_REMOVE_TIME = 60
- MAX_PUSH_TIME = 60
- GITLAB_DOMAIN = 'wmf-gitlab3.vm.liw.fi'
- GITLAB_PROJECT = 'liw'
-
- def __init__(self, gitlab_token):
- self._token = gitlab_token
- self._tmpdir = tempfile.mkdtemp()
- logging.info('Workspace: %s', self._tmpdir)
-
- def get_routes(self):
- return [
- {
- 'method': 'POST',
- 'path': '/updaterepo',
- 'func': self._update_repo,
- 'scopes': ['update-repo'],
- },
- ]
-
- def _update_repo(self):
- spec = bottle.request.json
- logging.info('Updating repository: %r', spec)
-
- url = spec['git']
- ref = spec['ref']
- name = spec['gitlab']
- dirname = os.path.join(self._tmpdir, name)
-
- if self._clone(url, ref, dirname):
- ok = self._remove(
- self._token, self.GITLAB_DOMAIN, self.GITLAB_PROJECT, name)
- if ok:
- ok = self._push(
- dirname, self.GITLAB_DOMAIN, self.GITLAB_PROJECT, ref,
- name)
- if ok:
- return 'Repository copied successfully'
-
- logging.error('Something went wrong when copying repository')
- return bottle.HTTPError(418)
-
- def _clone(self, url, ref, dirname):
- if os.path.exists(dirname):
- logging.debug('Removing %s', dirname)
- shutil.rmtree(dirname)
- argv = ['git', 'clone', '-q', '-b', ref, url, dirname]
- return runcmd('.', argv, self.MAX_CLONE_TIME)
-
- def _remove(self, token, gitlab_domain, gitlab_project, name):
- snippet = urllib.parse.quote('%s/%s' % (gitlab_project, name), safe='')
- url = 'https://%s/api/v4/projects/%s' % (gitlab_domain, snippet)
- argv = ['curl', '-HPRIVATE-TOKEN: %s' % token, '-X', 'DELETE', url]
- return runcmd('.', argv, self.MAX_REMOVE_TIME)
-
- def _push(self, dirname, gitlab_domain, gitlab_project, ref, name):
- logging.info('Pushing %s to %s as %s', dirname, gitlab_domain, name)
- url = 'ssh://git@%s/%s/%s.git' % (gitlab_domain, gitlab_project, name)
- argv = ['git', 'push', url, '%s:master' % ref]
- return runcmd(dirname, argv, self.MAX_PUSH_TIME)
-
-
-def runcmd(cwd, argv, timeout):
- logging.info('Running command: %r', argv)
- try:
- p = subprocess.run(
- argv, cwd=cwd, timeout=timeout, stdout=subprocess.PIPE,
- stderr=subprocess.PIPE)
- except subprocess.TimeoutExpired:
- logging.error('Command took too long (timeout %r)', timeout)
- return False
- except Exception as e:
- logging.error('Error while running command: %s', str(e))
- return False
-
- if p.returncode != 0:
- logging.error('Command failed: %r', argv)
- logging.error('exit code: %d', p.returncode)
- logging.error('stdout: %r', p.stdout)
- logging.error('stderr: %r', p.stderr)
- return False
-
- logging.info('Command succeeded')
- return True
-
-
-def setup_logging(log_filename):
- logging.basicConfig(
- filename=log_filename,
- level=logging.DEBUG,
- format='%(asctime)s %(levelname)s %(message)s')
- logging.info('API server starting')
-
-
-def get_key_from_file(filename):
- with open(filename) as f:
- key_text = f.read()
- return Crypto.PublicKey.RSA.importKey(key_text)
-
-
-def get_token_from_file(filename):
- with open(filename) as f:
- return f.read().strip()
-
-
-def main():
- args = sys.argv[1:]
- cmd = args.pop(0)
- log_filename = args.pop(0)
- pubkey_filename = args.pop(0)
-
- setup_logging(log_filename)
-
- if cmd == 'controller':
- api = Controller()
- elif cmd == 'vcsworker':
- gitlab_token_filename = args.pop(0)
- gitlab_token = get_token_from_file(gitlab_token_filename)
- api = VCSWorker(gitlab_token)
- else:
- sys.exit('Unknown command %s' % cmd)
-
- app = bottle.Bottle()
- pubkey = get_key_from_file(pubkey_filename)
- api.setup(app, pubkey)
- app.run(host=HOST, port=PORT)
-
-
-main()