2022-04-13 15:24:55 +02:00
|
|
|
module daemon
|
|
|
|
|
|
|
|
import git
|
|
|
|
import time
|
|
|
|
import log
|
2022-04-13 16:12:22 +02:00
|
|
|
import datatypes { MinHeap }
|
|
|
|
import cron.expression { CronExpression, parse_expression }
|
2022-04-30 10:40:29 +02:00
|
|
|
import math
|
2022-04-30 17:56:35 +02:00
|
|
|
import build
|
2022-04-30 18:38:24 +02:00
|
|
|
import docker
|
2022-04-13 15:24:55 +02:00
|
|
|
|
2022-05-01 09:14:33 +02:00
|
|
|
// How many seconds to wait before retrying to update API if failed
|
|
|
|
const api_update_retry_timeout = 5
|
|
|
|
|
|
|
|
// How many seconds to wait before retrying to rebuild image if failed
|
|
|
|
const rebuild_base_image_retry_timout = 30
|
|
|
|
|
2022-04-13 15:24:55 +02:00
|
|
|
struct ScheduledBuild {
|
2022-04-13 16:12:22 +02:00
|
|
|
pub:
|
|
|
|
repo_id string
|
2022-04-13 15:24:55 +02:00
|
|
|
repo git.GitRepo
|
|
|
|
timestamp time.Time
|
|
|
|
}
|
|
|
|
|
2022-04-30 20:22:03 +02:00
|
|
|
// Overloaded operator for comparing ScheduledBuild objects
|
2022-04-13 15:24:55 +02:00
|
|
|
fn (r1 ScheduledBuild) < (r2 ScheduledBuild) bool {
|
|
|
|
return r1.timestamp < r2.timestamp
|
|
|
|
}
|
|
|
|
|
|
|
|
pub struct Daemon {
|
|
|
|
mut:
|
2022-04-30 17:56:35 +02:00
|
|
|
address string
|
|
|
|
api_key string
|
|
|
|
base_image string
|
2022-04-30 18:38:24 +02:00
|
|
|
builder_images []string
|
2022-04-30 17:56:35 +02:00
|
|
|
global_schedule CronExpression
|
|
|
|
api_update_frequency int
|
|
|
|
image_rebuild_frequency int
|
2022-04-13 15:24:55 +02:00
|
|
|
// Repos currently loaded from API.
|
|
|
|
repos_map map[string]git.GitRepo
|
|
|
|
// At what point to update the list of repositories.
|
2022-04-30 17:56:35 +02:00
|
|
|
api_update_timestamp time.Time
|
|
|
|
image_build_timestamp time.Time
|
|
|
|
queue MinHeap<ScheduledBuild>
|
2022-04-13 15:24:55 +02:00
|
|
|
// Which builds are currently running
|
2022-04-30 11:31:14 +02:00
|
|
|
builds []ScheduledBuild
|
2022-04-13 15:24:55 +02:00
|
|
|
// Atomic variables used to detect when a build has finished; length is the
|
|
|
|
// same as builds
|
|
|
|
atomics []u64
|
2022-04-13 16:12:22 +02:00
|
|
|
logger shared log.Log
|
2022-04-13 15:24:55 +02:00
|
|
|
}
|
|
|
|
|
2022-04-14 20:38:14 +02:00
|
|
|
// init_daemon initializes a new Daemon object. It renews the repositories &
|
|
|
|
// populates the build queue for the first time.
|
2022-04-30 17:56:35 +02:00
|
|
|
pub fn init_daemon(logger log.Log, address string, api_key string, base_image string, global_schedule CronExpression, max_concurrent_builds int, api_update_frequency int, image_rebuild_frequency int) ?Daemon {
|
2022-04-13 16:12:22 +02:00
|
|
|
mut d := Daemon{
|
|
|
|
address: address
|
|
|
|
api_key: api_key
|
|
|
|
base_image: base_image
|
|
|
|
global_schedule: global_schedule
|
|
|
|
api_update_frequency: api_update_frequency
|
2022-04-30 17:56:35 +02:00
|
|
|
image_rebuild_frequency: image_rebuild_frequency
|
2022-04-13 16:12:22 +02:00
|
|
|
atomics: []u64{len: max_concurrent_builds}
|
2022-04-30 11:31:14 +02:00
|
|
|
builds: []ScheduledBuild{len: max_concurrent_builds}
|
2022-04-13 16:12:22 +02:00
|
|
|
logger: logger
|
2022-04-13 15:24:55 +02:00
|
|
|
}
|
|
|
|
|
2022-04-13 16:12:22 +02:00
|
|
|
// Initialize the repos & queue
|
2022-05-01 09:14:33 +02:00
|
|
|
d.renew_repos()
|
|
|
|
d.renew_queue()
|
|
|
|
if !d.rebuild_base_image() {
|
|
|
|
return error('The base image failed to build. The Vieter cron daemon cannot run without an initial builder image.')
|
|
|
|
}
|
2022-04-13 16:12:22 +02:00
|
|
|
|
|
|
|
return d
|
|
|
|
}
|
|
|
|
|
2022-04-14 20:38:14 +02:00
|
|
|
// run starts the actual daemon process. It runs builds when possible &
|
|
|
|
// periodically refreshes the list of repositories to ensure we stay in sync.
|
2022-05-01 09:14:33 +02:00
|
|
|
pub fn (mut d Daemon) run() {
|
2022-04-14 21:20:10 +02:00
|
|
|
for {
|
2022-05-01 09:14:33 +02:00
|
|
|
finished_builds := d.clean_finished_builds()
|
2022-04-30 16:08:35 +02:00
|
|
|
|
2022-04-30 10:40:29 +02:00
|
|
|
// Update the API's contents if needed & renew the queue
|
|
|
|
if time.now() >= d.api_update_timestamp {
|
2022-05-01 09:14:33 +02:00
|
|
|
d.renew_repos()
|
|
|
|
d.renew_queue()
|
2022-04-30 10:40:29 +02:00
|
|
|
}
|
2022-04-30 16:08:35 +02:00
|
|
|
// The finished builds should only be rescheduled if the API contents
|
|
|
|
// haven't been renewed.
|
|
|
|
else {
|
|
|
|
for sb in finished_builds {
|
2022-05-01 09:14:33 +02:00
|
|
|
d.schedule_build(sb.repo_id, sb.repo)
|
2022-04-30 16:08:35 +02:00
|
|
|
}
|
|
|
|
}
|
2022-04-30 10:40:29 +02:00
|
|
|
|
2022-04-30 17:56:35 +02:00
|
|
|
// TODO remove old builder images.
|
|
|
|
// This issue is less trivial than it sounds, because a build could
|
|
|
|
// still be running when the image has to be rebuilt. That would
|
|
|
|
// prevent the image from being removed. Therefore, we will need to
|
|
|
|
// keep track of a list or something & remove an image once we have
|
|
|
|
// made sure it isn't being used anymore.
|
|
|
|
if time.now() >= d.image_build_timestamp {
|
2022-05-01 09:14:33 +02:00
|
|
|
d.rebuild_base_image()
|
2022-04-30 18:38:24 +02:00
|
|
|
// In theory, executing this function here allows an old builder
|
|
|
|
// image to exist for at most image_rebuild_frequency minutes.
|
|
|
|
d.clean_old_base_images()
|
2022-04-30 17:56:35 +02:00
|
|
|
}
|
2022-04-30 10:40:29 +02:00
|
|
|
|
2022-04-30 11:31:14 +02:00
|
|
|
// Schedules new builds when possible
|
2022-05-01 09:14:33 +02:00
|
|
|
d.start_new_builds()
|
2022-04-30 16:08:35 +02:00
|
|
|
|
|
|
|
// If there are builds currently running, the daemon should refresh
|
|
|
|
// every second to clean up any finished builds & start new ones.
|
|
|
|
mut delay := time.Duration(1 * time.second)
|
2022-04-30 11:31:14 +02:00
|
|
|
|
2022-04-30 10:40:29 +02:00
|
|
|
// Sleep either until we have to refresh the repos or when the next
|
|
|
|
// build has to start, with a minimum of 1 second.
|
2022-04-30 16:08:35 +02:00
|
|
|
if d.current_build_count() == 0 {
|
|
|
|
now := time.now()
|
|
|
|
delay = d.api_update_timestamp - now
|
2022-04-30 10:40:29 +02:00
|
|
|
|
2022-04-30 16:08:35 +02:00
|
|
|
if d.queue.len() > 0 {
|
2022-05-01 09:14:33 +02:00
|
|
|
elem := d.queue.peek() or {
|
|
|
|
d.lerror("queue.peek() unexpectedly returned an error. This shouldn't happen.")
|
|
|
|
|
|
|
|
// This is just a fallback option. In theory, queue.peek()
|
|
|
|
// should *never* return an error or none, because we check
|
|
|
|
// its len beforehand.
|
|
|
|
time.sleep(1)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
time_until_next_job := elem.timestamp - now
|
2022-04-30 10:40:29 +02:00
|
|
|
|
2022-04-30 16:08:35 +02:00
|
|
|
delay = math.min(delay, time_until_next_job)
|
|
|
|
}
|
2022-04-30 10:40:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// We sleep for at least one second. This is to prevent the program
|
|
|
|
// from looping agressively when a cronjob can be scheduled, but
|
|
|
|
// there's no spots free for it to be started.
|
2022-04-30 16:08:35 +02:00
|
|
|
delay = math.max(delay, 1 * time.second)
|
|
|
|
|
|
|
|
d.ldebug('Sleeping for ${delay}...')
|
|
|
|
|
|
|
|
time.sleep(delay)
|
2022-04-30 11:31:14 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// schedule_build adds the next occurence of the given repo build to the queue.
|
2022-05-01 09:14:33 +02:00
|
|
|
fn (mut d Daemon) schedule_build(repo_id string, repo git.GitRepo) {
|
2022-04-30 20:48:49 +02:00
|
|
|
ce := if repo.schedule != '' {
|
|
|
|
parse_expression(repo.schedule) or {
|
|
|
|
// TODO This shouldn't return an error if the expression is empty.
|
|
|
|
d.lerror("Error while parsing cron expression '$repo.schedule' ($repo_id): $err.msg()")
|
2022-04-30 11:31:14 +02:00
|
|
|
|
2022-04-30 20:48:49 +02:00
|
|
|
d.global_schedule
|
|
|
|
}
|
|
|
|
} else {
|
2022-04-30 11:31:14 +02:00
|
|
|
d.global_schedule
|
2022-04-14 21:20:10 +02:00
|
|
|
}
|
2022-04-30 20:48:49 +02:00
|
|
|
|
2022-04-30 11:31:14 +02:00
|
|
|
// A repo that can't be scheduled will just be skipped for now
|
2022-05-01 09:14:33 +02:00
|
|
|
timestamp := ce.next_from_now() or {
|
|
|
|
d.lerror("Couldn't calculate next timestamp from '$repo.schedule'; skipping")
|
|
|
|
return
|
|
|
|
}
|
2022-04-30 11:31:14 +02:00
|
|
|
|
|
|
|
d.queue.insert(ScheduledBuild{
|
|
|
|
repo_id: repo_id
|
|
|
|
repo: repo
|
|
|
|
timestamp: timestamp
|
|
|
|
})
|
2022-04-13 15:24:55 +02:00
|
|
|
}
|
|
|
|
|
2022-04-30 20:22:03 +02:00
|
|
|
// renew_repos requests the newest list of Git repos from the server & replaces
|
|
|
|
// the old one.
|
2022-05-01 09:14:33 +02:00
|
|
|
fn (mut d Daemon) renew_repos() {
|
2022-04-30 18:38:24 +02:00
|
|
|
d.linfo('Renewing repos...')
|
2022-05-01 09:14:33 +02:00
|
|
|
|
|
|
|
mut new_repos := git.get_repos(d.address, d.api_key) or {
|
|
|
|
d.lerror('Failed to renew repos. Retrying in ${daemon.api_update_retry_timeout}s...')
|
|
|
|
d.api_update_timestamp = time.now().add_seconds(daemon.api_update_retry_timeout)
|
|
|
|
|
|
|
|
return
|
|
|
|
}
|
2022-04-13 15:24:55 +02:00
|
|
|
|
|
|
|
d.repos_map = new_repos.move()
|
2022-04-13 16:12:22 +02:00
|
|
|
|
|
|
|
d.api_update_timestamp = time.now().add_seconds(60 * d.api_update_frequency)
|
2022-04-13 15:24:55 +02:00
|
|
|
}
|
|
|
|
|
2022-04-13 16:12:22 +02:00
|
|
|
// renew_queue replaces the old queue with a new one that reflects the newest
|
|
|
|
// values in repos_map.
|
2022-05-01 09:14:33 +02:00
|
|
|
fn (mut d Daemon) renew_queue() {
|
2022-04-30 18:38:24 +02:00
|
|
|
d.linfo('Renewing queue...')
|
2022-04-13 16:12:22 +02:00
|
|
|
mut new_queue := MinHeap<ScheduledBuild>{}
|
|
|
|
|
|
|
|
// Move any jobs that should have already started from the old queue onto
|
|
|
|
// the new one
|
|
|
|
now := time.now()
|
|
|
|
|
2022-04-14 20:38:14 +02:00
|
|
|
// For some reason, using
|
|
|
|
// ```v
|
|
|
|
// for d.queue.len() > 0 && d.queue.peek() ?.timestamp < now {
|
|
|
|
//```
|
|
|
|
// here causes the function to prematurely just exit, without any errors or anything, very weird
|
|
|
|
// https://github.com/vlang/v/issues/14042
|
|
|
|
for d.queue.len() > 0 {
|
2022-05-01 09:14:33 +02:00
|
|
|
elem := d.queue.pop() or {
|
|
|
|
d.lerror("queue.pop() returned an error. This shouldn't happen.")
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if elem.timestamp < now {
|
|
|
|
new_queue.insert(elem)
|
2022-04-14 20:38:14 +02:00
|
|
|
} else {
|
|
|
|
break
|
|
|
|
}
|
2022-04-13 16:12:22 +02:00
|
|
|
}
|
|
|
|
|
2022-04-30 11:31:14 +02:00
|
|
|
d.queue = new_queue
|
|
|
|
|
2022-04-13 16:12:22 +02:00
|
|
|
// For each repository in repos_map, parse their cron expression (or use
|
|
|
|
// the default one if not present) & add them to the queue
|
|
|
|
for id, repo in d.repos_map {
|
2022-05-01 09:14:33 +02:00
|
|
|
d.schedule_build(id, repo)
|
2022-04-13 16:12:22 +02:00
|
|
|
}
|
2022-04-13 15:24:55 +02:00
|
|
|
}
|
2022-04-30 17:56:35 +02:00
|
|
|
|
2022-04-30 20:22:03 +02:00
|
|
|
// rebuild_base_image recreates the builder image.
|
2022-05-01 09:14:33 +02:00
|
|
|
fn (mut d Daemon) rebuild_base_image() bool {
|
2022-04-30 18:38:24 +02:00
|
|
|
d.linfo('Rebuilding builder image....')
|
2022-04-30 17:56:35 +02:00
|
|
|
|
2022-05-01 09:14:33 +02:00
|
|
|
d.builder_images << build.create_build_image(d.base_image) or {
|
|
|
|
d.lerror('Failed to rebuild base image. Retrying in ${daemon.rebuild_base_image_retry_timout}s...')
|
|
|
|
d.image_build_timestamp = time.now().add_seconds(daemon.rebuild_base_image_retry_timout)
|
|
|
|
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2022-04-30 17:56:35 +02:00
|
|
|
d.image_build_timestamp = time.now().add_seconds(60 * d.image_rebuild_frequency)
|
2022-05-01 09:14:33 +02:00
|
|
|
|
|
|
|
return true
|
2022-04-30 17:56:35 +02:00
|
|
|
}
|
2022-04-30 18:38:24 +02:00
|
|
|
|
2022-04-30 20:22:03 +02:00
|
|
|
// clean_old_base_images tries to remove any old but still present builder
|
|
|
|
// images.
|
2022-04-30 18:38:24 +02:00
|
|
|
fn (mut d Daemon) clean_old_base_images() {
|
|
|
|
mut i := 0
|
|
|
|
|
|
|
|
for i < d.builder_images.len - 1 {
|
|
|
|
// For each builder image, we try to remove it by calling the Docker
|
|
|
|
// API. If the function returns an error or false, that means the image
|
|
|
|
// wasn't deleted. Therefore, we move the index over. If the function
|
|
|
|
// returns true, the array's length has decreased by one so we don't
|
|
|
|
// move the index.
|
|
|
|
if !docker.remove_image(d.builder_images[i]) or { false } {
|
|
|
|
i += 1
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|