summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <lwirzenius@wikimedia.org>2019-06-30 19:27:34 +0300
committerLars Wirzenius <lwirzenius@wikimedia.org>2019-06-30 19:27:34 +0300
commitc639e5b36d688953393d7e801204193e11515cde (patch)
tree1e3f708a8d19206759dafd5f38d089bfb37f23ad
parent008943cd6dd10ef1d5dd83a8f5d724658f6a00ad (diff)
downloadwmf-ci-arch-c639e5b36d688953393d7e801204193e11515cde.tar.gz
Add: document CI-around-GitLab, first iteration
-rw-r--r--Makefile22
-rw-r--r--buildseq.uml42
-rw-r--r--component.dot51
-rw-r--r--gitlab.md157
4 files changed, 269 insertions, 3 deletions
diff --git a/Makefile b/Makefile
index a35b869..3bb09b4 100644
--- a/Makefile
+++ b/Makefile
@@ -1,17 +1,25 @@
version = $(shell git describe --dirty --tags)
-.SUFFIXES: .html .mdwn .pdf .dot .svg
+.SUFFIXES: .html .mdwn .md .pdf .dot .svg .uml
-all: ci-arch.html ci-arch.pdf
+all: ci-arch.html ci-arch.pdf gitlab.pdf
ci-arch.pdf ci-arch.html: ci-arch.mdwn ecosystem.svg pipeline.svg arch.svg
+gitlab.pdf: gitlab.md component.svg buildseq.svg
+
.dot.svg:
dot -Tsvg -o$@ $<
+.uml.svg:
+ plantuml -tsvg $<
+
.mdwn.html:
pandoc --standalone --toc -o $@ $<
+.md.html:
+ pandoc --standalone --toc -o $@ $<
+
.mdwn.pdf:
pandoc --toc -f markdown+implicit_figures -Vdocumentclass:report \
"-Vdate:$(version)" \
@@ -22,4 +30,12 @@ ci-arch.pdf ci-arch.html: ci-arch.mdwn ecosystem.svg pipeline.svg arch.svg
-Vgeometry:top=2cm,bottom=2.5cm,left=5cm,right=2cm \
-o $@ $<
-
+.md.pdf:
+ pandoc --toc -f markdown+implicit_figures \
+ "-Vdate:$(version)" \
+ -Vfontsize:12pt \
+ -Vmainfont:FreeSerif \
+ -Vsansfont:FreeSans \
+ -Vmonofont:FreeMonoBold \
+ -Vgeometry:top=2cm,bottom=2.5cm,left=5cm,right=2cm \
+ -o $@ $<
diff --git a/buildseq.uml b/buildseq.uml
new file mode 100644
index 0000000..c6d905d
--- /dev/null
+++ b/buildseq.uml
@@ -0,0 +1,42 @@
+@startuml
+
+participant gerrit as "Gerrit\n(git server)"
+participant controller as "Controller"
+participant vcsworker as "VCS\nworker"
+participant gitlab as "GitLab"
+participant runner as "Gitlab\nRunner"
+participant artifacts as "Artifact\nstore"
+participant deployer as "Deployment\nworker"
+participant env as "Test\nenvironment"
+
+gerrit -> controller : notify of change
+activate controller
+
+controller -> vcsworker : copy repo to gitlab
+activate vcsworker
+vcsworker -> gerrit : git clone
+gerrit -> vcsworker
+vcsworker -> gitlab : git push
+activate gitlab
+vcsworker -> controller
+deactivate vcsworker
+
+gitlab -> runner : build .gitlab-ci.yml
+activate runner
+runner -> artifacts : upload artifacts
+runner -> gitlab : build finished
+deactivate runner
+gitlab -> controller : webhook: build finished
+deactivate gitlab
+
+controller -> deployer : deploy artifacts
+activate deployer
+deployer -> artifacts : request artifacts
+artifacts -> deployer
+deployer -> env : copy artifacts to test env
+deployer -> controller : deployment done
+deactivate deployer
+
+controller -> gerrit : notify build result
+deactivate controller
+@enduml
diff --git a/component.dot b/component.dot
new file mode 100644
index 0000000..ef09706
--- /dev/null
+++ b/component.dot
@@ -0,0 +1,51 @@
+digraph components {
+ gerrit [label="gerrit"];
+ gerrit [shape="folder"];
+ gerrit [fillcolor="white" style="filled"];
+
+ controller [label="CI controller"];
+ controller [shape="box"];
+ controller [fillcolor="pink" style="filled"];
+
+ vcs_worker [label="VCS worker"];
+ vcs_worker [shape="ellipse"];
+ vcs_worker [fillcolor="grey" style="filled"];
+
+ gitlab [label="GitLab"];
+ gitlab [shape="box"];
+ gitlab [fillcolor="pink" style="filled"];
+
+ runner [label="GitLab runner"];
+ runner [shape="ellipse"];
+ runner [fillcolor="grey" style="filled"];
+
+ deployment_worker [label="deployment worker"];
+ deployment_worker [shape="ellipse"];
+ deployment_worker [fillcolor="grey" style="filled"];
+
+ artifacts [label="artifact store"];
+ artifacts [shape="cylinder"];
+ artifacts [fillcolor="pink" style="filled"];
+
+ env [label="test\nenvironment"];
+ env [shape="octagon"];
+ env [fillcolor="white" style="filled"];
+
+ gerrit -> controller [label="1."];
+ controller -> vcs_worker [label="2."];
+ gerrit -> vcs_worker [label="3."];
+ vcs_worker -> gitlab [label="4."];
+ gitlab -> runner [label="5."];
+ runner -> artifacts [label="6."];
+ gitlab -> controller [label="7."];
+
+ controller -> deployment_worker [label="8."];
+ deployment_worker -> artifacts [label="9."];
+ deployment_worker -> env [label="10."];
+ deployment_worker -> controller [label="11."];
+
+ controller -> gitlab [label="12."];
+ gitlab -> runner [label="13."];
+ runner -> env [label="14."];
+ gitlab -> controller [label="15."];
+}
diff --git a/gitlab.md b/gitlab.md
new file mode 100644
index 0000000..c596b78
--- /dev/null
+++ b/gitlab.md
@@ -0,0 +1,157 @@
+---
+title: Implementing CI/CD around GitLab
+author: Lars Wirzenius
+documentclass: article
+...
+
+# Introduction
+
+![components](component.svg)\
+
+This is the plan for the **first iteration** of implementing a CI/CD
+system around GitLab for WMF. This iteration tries to do the least
+amount of work to prove that the planned architecture is workable.
+There are some small differences to the architecture in the planning
+document.
+
+The components are:
+
+* **Gerrit** (or any git server): this is the canonical location for
+ the source code. It emits *events* that the controller reacts to.
+ Also, the controller sends message to it. This will be mocked in the
+ first iteration by using any dumb git server.
+* **controller**: this orcestrates builds and deployments; in the
+ first iteration, it won't be listening to Gerri events, and will
+ instead have an HTTP API that will be used instead
+* **GitLab**: this is used only for it's CI/CD functionality; a
+ secondary copy of the git repository is kept here, because GitLab
+ requires it
+* **Runner**: this is used by GitLab to run builds and tests; it
+ uploads built binaries to the artifact store; this corresponds to
+ "build worker" in the planning document
+* **artifact store**: this stores binaries or other build artifacts so
+ they persist when the Runner goes away, given the Runner is a Docker
+ container and has no persistency
+* **VCS worker**: this retrieves source code from Gerrit (or other git
+ server) and pushes is to GitLab; it's a separate system so it can be
+ given credentials to access non-public git repositories
+* **deployment worker**: this gets binaries from the artifact store
+ and deploys them to the test environment
+* **test environment**: this mocks a production-like environment for
+ running sites and services
+
+Differences from the planning document:
+
+* There's GitLab to run builds, rather then the controller commanding
+ build workers directly.
+* There's no log store. This isn't necessary for the first iteration.
+* There's only one environment, the test environment, and it won't be
+ running sites or services. Deployment is simulated by merely
+ publishing the build artifacts in the test environment.
+
+# New components
+
+There will need to be some new components. We'll keep them as simple
+as possible. Most will have a simple HTTP API. We'll use signed JWT
+access tokens, which will be generated staticlly and installed when
+the components are set up. (This is highly inadequtate for production,
+but this is just the first iteration.)
+
+All HTTP APIs will served over HTTPS using a TLS certificate from
+Let's Encrypt, for the first iteration.
+
+None of the components will aim to be fast or to serve many clients,
+in the first iteration. They'll be implemented using haproxy (TLS),
+bottle.py, and some custom Python code. All of this may change after
+the first iteration, but these tools are familiar to me so I can just
+use them, and don't have to learn stuff to get started.
+
+## Controller
+
+* Simple HTTP API
+* Endpoint: POST /cd, body specifies which repo and ref to build and
+ deploy; queues the build
+* Endpoint: GET /status, which lists what jobs (posts to /cd) are
+ queued, or running, or finished
+
+## VCS worker
+
+* Simple HTTP API
+* Endpoint: POST /repo, body specifies repo and ref to fetch; git
+ clone (or pulls) that, and pushes to GitLab repo with same name
+ (creates it if necessary), but using only the master branch; returns
+ info of how things went
+
+## Artifact store
+
+* Simple HTTP API
+* We can probably use the Ick artifact store for this, at least
+ initially
+* Endpoint: PUT /blobs/NAME, stores body as blob named NAME,
+ overwriting it if it existed already
+* Endpoing: GET /blobs/NAME, returns blob, with a generic content
+ type, or 404 if not found
+
+## Deployment worker
+
+* Simple HTTP API
+* Endpoint: POST /deploy, body specifies name in artifact store,
+ copies that to the test environment using SSH
+
+## Test environment
+
+* SSH server with HTTP server serving a directory
+* Deployment is mocked by copying blob from artifact store to the
+ directory being serveed by HTTP server
+
+# How a build works
+
+![Build sequence](buildseq.svg)\
+
+
+The build has the following steps:
+
+1. Gerrit notifies the controller of a change in a git repo.
+
+2. The controller tells the VCS worker to copy the repo to GitLab, by
+ doing a "POST /repo" HTTP request.
+
+3. The VCS worker git clones the repo from Gerrit. It may do a git
+ pull instead to update an existing clone, but that's an
+ optimisation, whiche we'll implement when it's needed. The git
+ operation may require credentials (e.g., security embargoed
+ repositories), which the vCS worker has: they're installed when the
+ host is deployed. No other CI host has those credentials.
+
+4. The VCS worker pushed the repository to GitLab. This may again
+ require credentials. The push triggers GitLab CI to run the commit
+ stage build and test command specified in `.gitlab-ci.yml`.
+
+5. The VCS worker responds to the POST request from the controller
+ with results.
+
+6. GitLab tells the Runner host to run the build and test commands.
+ The runner does that.
+
+7. The Runner uploads any binaries it builds to the artifact store.
+
+8. The Runner tells GitLab it's finished.
+
+9. GitLab tells the controller via a webhook that a build has
+ finished.
+
+0. The controller tells the deployer to start a deployment, via a
+ "POST /deploy" HTTP API call.
+
+0. The deployer fetches the artifacts it has been told to deploy
+ from the artifact store.
+
+0. The deployer copyies the artifacts to the test environment. (In a
+ future iteration this will be a more sophisticated deployment
+ process.)
+
+0. The deployer responds to the HTTP request from the controller with
+ the results.
+
+0. The controller notifies Gerrit of a build and deployment having
+ been finished. (Except not in the first iteration.)