vieter/src/agent/daemon.v

198 lines
5.7 KiB
V

module agent
import log
import sync.stdatomic
import build
import models { BuildConfig }
import client
import time
import os
const (
build_empty = 0
build_running = 1
build_done = 2
)
struct AgentDaemon {
logger shared log.Log
conf Config
client client.Client
mut:
images ImageManager
// Atomic variables used to detect when a build has finished; length is
// conf.max_concurrent_builds. This approach is used as the difference
// between a recently finished build and an empty build slot is important
// for knowing whether the agent is currently "active".
atomics []u64
// Channel used to send builds to worker threads
build_channel chan BuildConfig
}
// agent_init initializes a new agent
fn agent_init(logger log.Log, conf Config) AgentDaemon {
mut d := AgentDaemon{
logger: logger
client: client.new(conf.address, conf.api_key)
conf: conf
images: new_image_manager(conf.image_rebuild_frequency * 60)
atomics: []u64{len: conf.max_concurrent_builds}
build_channel: chan BuildConfig{cap: conf.max_concurrent_builds}
}
return d
}
// run starts the actual agent daemon. This function will run forever.
pub fn (mut d AgentDaemon) run() {
// Spawn worker threads
for builder_index in 0 .. d.conf.max_concurrent_builds {
spawn d.builder_thread(d.build_channel, builder_index)
}
// This is just so that the very first time the loop is ran, the jobs are
// always polled
mut last_poll_time := time.now().add_seconds(-d.conf.polling_frequency)
mut sleep_time := 0 * time.second
mut finished, mut empty, mut running := 0, 0, 0
for {
if sleep_time > 0 {
d.ldebug('Sleeping for ${sleep_time}')
time.sleep(sleep_time)
}
finished, empty = d.update_atomics()
running = d.conf.max_concurrent_builds - finished - empty
// No new finished builds and no free slots, so there's nothing to be
// done
if finished + empty == 0 {
sleep_time = 1 * time.second
continue
}
// Builds have finished, so old builder images might have freed up.
// TODO this might query the docker daemon too frequently.
if finished > 0 {
d.images.clean_old_images()
}
// The agent will always poll for new jobs after at most
// `polling_frequency` seconds. However, when jobs have finished, the
// agent will also poll for new jobs. This is because jobs are often
// clustered together (especially when mostly using the global cron
// schedule), so there's a much higher chance jobs are available.
if finished > 0 || time.now() >= last_poll_time.add_seconds(d.conf.polling_frequency) {
d.ldebug('Polling for new jobs')
new_configs := d.client.poll_jobs(d.conf.arch, finished + empty) or {
d.lerror('Failed to poll jobs: ${err.msg()}')
// TODO pick a better delay here
sleep_time = 5 * time.second
continue
}
d.ldebug('Received ${new_configs.len} jobs')
last_poll_time = time.now()
for config in new_configs {
// Make sure a recent build base image is available for
// building the config
if !d.images.up_to_date(config.base_image) {
d.linfo('Building builder image from base image ${config.base_image}')
// TODO handle this better than to just skip the config
d.images.refresh_image(config.base_image) or {
d.lerror(err.msg())
continue
}
}
// It's technically still possible that the build image is
// removed in the very short period between building the
// builder image and starting a build container with it. If
// this happens, fate really just didn't want you to do this
// build.
d.build_channel <- config
running++
}
}
// The agent is not doing anything, so we just wait until the next poll
// time
if running == 0 {
sleep_time = last_poll_time.add_seconds(d.conf.polling_frequency) - time.now()
} else {
sleep_time = 1 * time.second
}
}
}
// update_atomics checks for each build whether it's completed, and sets it to
// empty again if so. The return value is a tuple `(finished, empty)` where
// `finished` is how many builds were just finished and thus set to empty, and
// `empty` is how many build slots were already empty. The amount of running
// builds can then be calculated by substracting these two values from the
// total allowed concurrent builds.
fn (mut d AgentDaemon) update_atomics() (int, int) {
mut finished := 0
mut empty := 0
for i in 0 .. d.atomics.len {
if stdatomic.load_u64(&d.atomics[i]) == agent.build_done {
stdatomic.store_u64(&d.atomics[i], agent.build_empty)
finished++
} else if stdatomic.load_u64(&d.atomics[i]) == agent.build_empty {
empty++
}
}
return finished, empty
}
// run_build actually starts the build process for a given target.
fn (mut d AgentDaemon) run_build(build_index int, config BuildConfig) {
d.linfo('started build: ${config}')
// 0 means success, 1 means failure
mut status := 0
new_config := BuildConfig{
...config
base_image: d.images.get(config.base_image)
}
res := build.build_config(d.client.address, d.client.api_key, new_config) or {
d.ldebug('build_config error: ${err.msg()}')
status = 1
build.BuildResult{}
}
if status == 0 {
d.linfo('Uploading build logs for ${config}')
// TODO use the arch value here
build_arch := os.uname().machine
d.client.add_build_log(config.target_id, res.start_time, res.end_time, build_arch,
res.exit_code, res.logs) or { d.lerror('Failed to upload logs for ${config}') }
} else {
d.lwarn('an error occurred during build: ${config}')
}
stdatomic.store_u64(&d.atomics[build_index], agent.build_done)
}
// builder_thread is a thread that constantly listens for builds to process
fn (mut d AgentDaemon) builder_thread(ch chan BuildConfig, builder_index int) {
for {
build_config := <-ch or { break }
d.run_build(builder_index, build_config)
}
}