From f685d78774da6c8d9c376fcf3478860ade87aa3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20B=C3=BChler?= Date: Wed, 19 Jun 2024 13:18:49 +0200 Subject: [PATCH] initial --- README.md | 58 +++++++ example.yaml | 14 ++ git-build-triggers.py | 358 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 430 insertions(+) create mode 100644 README.md create mode 100644 example.yaml create mode 100755 git-build-triggers.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..59b3863 --- /dev/null +++ b/README.md @@ -0,0 +1,58 @@ +# git-build-triggers + +Provides webhooks to deploy from git repositories. + +* Setup `git-build-triggers.py` with config, providing a http backend +* Put some reverse proxy in front (apache, nginx) +* Configure webhook URL with bearer token in your favorite git hosting to trigger when a certain branch is pushed + +Only one build per configured repository will run at a time; when more builds are triggered while a build is already running, it will only trigger a single further build after the current one. + +A rebuild will first update the repository: + +``` +git clean force -d -x +git remote update origin +git reset --hard origin/$(git rev-parse --abbrev-ref HEAD) +``` + +If the top commit (`git rev-parse HEAD`) is the same as for the last build it won't trigger the build script. + +Otherwise it will run the build script and store its output. + +For consistency `git clean force -d -x` is run after the build again (a "recheck" would do that too without necessarily building anything, so this must not break your build results). + +You can visit the webhook URL in a browser to get the output of the last finished run (this also triggers a check for new commits); it won't wait for the current build to finish. + +`git-build-triggers.py` uses `http.server.HTTPServer` (https://docs.python.org/3/library/http.server.html), which isn't recommended for production, but should be good enough for this. But you might want to restrict access to somewhat trusted IP ranges. + +## Install + +Dependencies: + +* python3 (probably >= 3.11) +* pyyaml (https://github.com/yaml/pyyaml) +* trio (https://github.com/python-trio/trio) +* git + +Put the script `git-build-triggers.py` where you want (e.g. `~/bin` or `/usr/local/bin`). + +## Config + +See `example.yaml` in this repo for basic structure. + +The configured repositories correspond to (different!) git checkouts on the local disk. +The repository name is used with the `base-path` to build the full URL. +A `base-path` of "/trigger-it", a repository name of "example" and `port: 8000` would provide `http://127.0.0.1:8000/trigger-it/example` as webhook. + +Tokens must be at least 16 characters long. + +A reverse proxy should be configured to add https. + +## mypy linting + +``` +virtualenv --system-site-packages venv +./venv/bin/pip install trio-typing # and perhaps other dependencies +./venv/bin/python3 -m mypy git-build-triggers.py +``` diff --git a/example.yaml b/example.yaml new file mode 100644 index 0000000..37d9c0e --- /dev/null +++ b/example.yaml @@ -0,0 +1,14 @@ +--- +address: "127.0.0.1" # default to 127.0.0.1 +port: 8000 +parallel-jobs: 1 # defaults to 1 +repositories: # list of repositories + test: # append to base-path to build webhook URL + workdir: /nonlocal/foo # path on disk with repository + # token: accepted as `Authorization: Bearer $token` or as password with any username in a + # `Authorization: Basic ...` header for use in browsers + token: xxE8E3e2fK7FJhVhH6H7XV9SupfXBsJH88FjN3vQ7ggEvS4nPuJ7jBnVB3aeV8PvX3Us5mu95q4EJGWTXd3mr5rDts8txx + command: echo hello world # build command, split with `shlex.split` into arguments +base-path: /trigger-foo # defaults to / +# optional admin-token: accepted for all repositories as if it'd be the token of the repository +admin-token: E8E3e2fK7FJhVhH6H7XV9SupfXBsJH88FjN3vQ7ggEvS4nPuJ7jBnVB3aeV8PvX3Us5mu95q4EJGWTXd3mr5rDts8t diff --git a/git-build-triggers.py b/git-build-triggers.py new file mode 100755 index 0000000..5257966 --- /dev/null +++ b/git-build-triggers.py @@ -0,0 +1,358 @@ +#!/usr/bin/env python3 + +from __future__ import annotations + +import base64 +import dataclasses +import fcntl +import hmac +import http +import http.server +import logging +import math +import os +import os.path +import shutil +import shlex +import signal +import subprocess +import sys +import traceback +import urllib.parse + +import trio +import yaml + + +_log = logging.getLogger('git-build-triggers') +logging.basicConfig( + format='%(asctime)s: %(levelname)s: %(message)s', + level=logging.INFO, +) + + +class UnixFileLock: + __slots__ = ('_path', '_fd') + + def __init__(self, path: str) -> None: + self._path = path + self._fd: int | None = None + + def acquire(self) -> bool: + if not self._fd is None: + raise RuntimeError(f"UnixFileLock({self._path!r}) already locked; re-entry not allowed") + + fd = os.open(self._path, os.O_RDWR | os.O_CREAT | os.O_TRUNC) + try: + fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + except OSError: + os.close(fd) + return False + else: + self._fd = fd + return True + + def release(self) -> None: + fd, self._fd = self._fd, None + if not fd is None: + fcntl.flock(fd, fcntl.LOCK_UN) + os.close(fd) + + +@dataclasses.dataclass(slots=True, kw_only=True) +class Job: + repository: Repository + lock: UnixFileLock + + +@dataclasses.dataclass(slots=True) +class JobQueue: + parallel: int + _queue: trio.MemorySendChannel[Job] + _rx: trio.MemoryReceiveChannel[Job] + + def __init__(self, *, parallel: int = 1) -> None: + self.parallel = parallel + self._queue, self._rx = trio.open_memory_channel(math.inf) + + async def run(self) -> None: + limit = trio.CapacityLimiter(self.parallel) + + async def work(job: Job) -> None: + try: + async with limit: + await job.repository.update() + try: + os.remove(job.repository._path_rebuild) + except FileNotFoundError: + return + # build again + await self._queue.send(job) + finally: + job.lock.release() + + async with trio.open_nursery() as nursery: + job: Job + async for job in self._rx: + nursery.start_soon(work, job) + + def queue(self, job: Job) -> None: + trio.from_thread.run(lambda: self._queue.send(job)) + + def stop(self) -> None: + self._queue.close() + + +JOB_QUEUE: JobQueue + + +@dataclasses.dataclass(slots=True, kw_only=True) +class Repository: + name: str + workdir: str + token: str = dataclasses.field(repr=False) + command: str + config: Config = dataclasses.field(repr=False) + _path_gitdir: str = dataclasses.field(init=False) + _path_lockfile: str = dataclasses.field(init=False) + _path_lastbuild: str = dataclasses.field(init=False) + _path_rebuild: str = dataclasses.field(init=False) + + def __post_init__(self) -> None: + self._path_gitdir = gitdir = os.path.join(self.workdir, ".git") + self._path_lockfile = os.path.join(gitdir, "build.lock") + self._path_lastbuild = os.path.join(gitdir, "build.status") + self._path_rebuild = os.path.join(gitdir, "rebuild_flag") + + def _writestatus(self, commit: str, message: str|bytes) -> None: + if isinstance(message, str): + message = message.encode() + tmpname = self._path_lastbuild + ".tmp" + try: + with open(tmpname, "wb") as lastbuild: + lastbuild.write(commit.encode() + b"\n" + message) + os.rename(tmpname, self._path_lastbuild) + except OSError as e: + _log.error(f"{self.name}: Failed to update {self._path_lastbuild}: {e}") + + async def _run_git(self, cmd: list[str]) -> bytes: + result: subprocess.CompletedProcess[bytes] = await trio.run_process( + [self.config.git_path] + cmd, + cwd=self.workdir, + capture_stdout=True, + ) + return result.stdout + + async def _update(self, last_commit: str) -> None: + _log.info(f"{self.name}: Updating git") + # remove all ignored/untracked files and directories: + await self._run_git(["clean", "--force", "-d", "-x"]) + await self._run_git(["remote", "update", "origin"]) + branch_name = (await self._run_git(["rev-parse", "--abbrev-ref", "HEAD"])).decode().strip() + await self._run_git(["reset", "--hard", f"origin/{branch_name}"]) + commit_id = (await self._run_git(["rev-parse", "HEAD"])).decode().strip() + if last_commit == commit_id: + _log.info(f"{self.name}: No changes (still {last_commit})") + return # no changes + + build: subprocess.CompletedProcess[bytes] = await trio.run_process( + shlex.split(self.command), + cwd=self.workdir, + capture_stdout=True, + stderr=subprocess.STDOUT, + check=False, + ) + self._writestatus(commit_id, f"Exit code: {build.returncode}\n".encode() + build.stdout) + # again: remove all ignored/untracked files and directories: + await self._run_git(["clean", "--force", "-d", "-x"]) + _log.info(f"{self.name}: Built {commit_id} with exit status {build.returncode}") + + async def update(self) -> None: + """should only be called while holding lock""" + try: + with open(self._path_lastbuild, "rb") as lastbuild: + last_commit = lastbuild.readline().strip().decode() + except FileNotFoundError: + last_commit = "none" + try: + await self._update(last_commit) + except Exception as e: + _log.error(f"{self.name}: Failed {last_commit}: {e}") + self._writestatus(last_commit, str(e)) + + def check(self) -> tuple[int, bytes | str]: + if not os.path.isdir(self._path_gitdir): + return (500, "Missing .git directory") + + lock = UnixFileLock(self._path_lockfile) + if lock.acquire(): + JOB_QUEUE.queue(Job(repository=self, lock=lock)) + else: + # tell current job to restart when finished: + with open(self._path_rebuild, "w"): + pass + + # if current job finished before seeing our trigger, + # remove the trigger and just run it ourself + if lock.acquire(): + try: + os.remove(self._path_rebuild) + except FileNotFoundError: + # something saw the trigger and handled it + # - no need to build again + pass + else: + JOB_QUEUE.queue(Job(repository=self, lock=lock)) + + try: + with open(self._path_lastbuild) as f: + return (200, f.read()) + except FileNotFoundError: + return (200, "never built yet") + + +@dataclasses.dataclass(slots=True, kw_only=True) +class Config: + repositories: dict[str, Repository] + address: str + port: int + basepath: str + admin_token: str + git_path: str + parallel_jobs: int + + def handle(self, req_path: str, auth: str) -> tuple[int, bytes | str]: + url = urllib.parse.urlparse(req_path) + if auth.startswith("Basic "): + creds = base64.decodebytes(auth.removeprefix("Basic ").strip().encode()) + token = creds.split(b":", maxsplit=1)[1].decode() + elif auth.startswith("Bearer "): + token = auth.removeprefix("Bearer ").strip() + else: + return (401, "Missing authentication") + name = url.path.removeprefix(self.basepath).strip("/") + if not name in self.repositories: + return (404, "Not found") + repo = self.repositories[name] + if ( + not hmac.compare_digest(token, repo.token) + and not (self.admin_token and hmac.compare_digest(token, self.admin_token)) + ): + return (401, "Invalid token") + return repo.check() + + +def load_config(path: str) -> Config: + with open(path) as f: + data = yaml.safe_load(f) + + assert isinstance(data, dict) + data_repositories = data.pop('repositories') + assert isinstance(data_repositories, dict) + + address = data.pop("address", "127.0.0.1") + assert isinstance(address, str) + + port = data.pop("port") + assert isinstance(port, int) + + parallel_jobs = data.pop("parallel-jobs", 1) + assert isinstance(parallel_jobs, int) + + basepath = data.pop("base-path", "/") + assert isinstance(basepath, str) + assert not basepath or basepath.startswith("/") + + admin_token = data.pop("admin-token", "") + assert not admin_token or len(admin_token) >= 16 + + git_path = GIT = shutil.which("git") + if not git_path: + raise RuntimeError("Missing git binary") + + config = Config( + repositories={}, + address=address, + port=port, + basepath=basepath, + git_path=git_path, + admin_token=admin_token, + parallel_jobs=parallel_jobs, + ) + for repo_name, repo_data in data_repositories.items(): + workdir = repo_data.pop("workdir") + assert isinstance(workdir, str) + token = repo_data.pop("token") + assert isinstance(token, str) and len(token) >= 16 + command = repo_data.pop("command") + assert isinstance(command, str) and command + config.repositories[repo_name] = Repository(name=repo_name, config=config, workdir=workdir, token=token, command=command) + + return config + + +CONFIG: Config + + +class RequestHandler(http.server.BaseHTTPRequestHandler): + server_version = "BuildTrigger" + + def do_POST(self) -> None: + status: int + body: bytes | str + + auth = self.headers.get("Authorization", "") + try: + status, body = CONFIG.handle(self.path, auth) + except Exception as e: + status = 500 + body = str(e) + traceback.print_exception(e) + if isinstance(body, str): + raw_body = body.encode() + else: + assert isinstance(body, bytes) + raw_body = body + self.send_response(status) + self.send_header("Cache-Control", "no-store") + self.send_header("Content-Type", "text/plain; charset=utf-8") + self.send_header("Content-Length", str(len(raw_body))) + if status == 401: + self.send_header("WWW-Authenticate", "Basic realm=\"trigger\"") + self.end_headers() + self.wfile.write(raw_body) + + do_GET = do_POST + + +def run(): + server = http.server.HTTPServer((CONFIG.address, CONFIG.port), RequestHandler) + + def shutdown(signum, frame) -> None: + _log.info("Shutdown") + server.shutdown() + JOB_QUEUE.stop() + + signal.signal(signal.SIGINT, shutdown) + + async def go() -> None: + async with trio.open_nursery() as nursery: + nursery.start_soon(lambda: trio.to_thread.run_sync(server.serve_forever)) + nursery.start_soon(JOB_QUEUE.run) + + trio.run(go) + + +def main(): + global CONFIG, JOB_QUEUE + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--config', required=True, help="Path to YAML config file") + args = parser.parse_args() + CONFIG = load_config(args.config) + JOB_QUEUE = JobQueue(parallel=CONFIG.parallel_jobs) + + run() + + +main()