2022-12-06 13:50:25 +01:00
|
|
|
module agent
|
|
|
|
|
|
|
|
import log
|
|
|
|
import sync.stdatomic
|
|
|
|
import build { BuildConfig }
|
|
|
|
import client
|
2022-12-12 22:09:57 +01:00
|
|
|
import time
|
|
|
|
import os
|
2022-12-06 13:50:25 +01:00
|
|
|
|
|
|
|
const (
|
|
|
|
build_empty = 0
|
|
|
|
build_running = 1
|
|
|
|
build_done = 2
|
|
|
|
)
|
|
|
|
|
|
|
|
struct AgentDaemon {
|
|
|
|
logger shared log.Log
|
2022-12-06 14:11:17 +01:00
|
|
|
conf Config
|
2022-12-13 17:42:49 +01:00
|
|
|
client client.Client
|
2022-12-12 22:09:57 +01:00
|
|
|
mut:
|
2022-12-12 21:50:34 +01:00
|
|
|
images ImageManager
|
2022-12-13 17:42:49 +01:00
|
|
|
// Which builds are currently running; length is conf.max_concurrent_builds
|
2022-12-06 13:50:25 +01:00
|
|
|
builds []BuildConfig
|
2022-12-13 17:42:49 +01:00
|
|
|
// Atomic variables used to detect when a build has finished; length is
|
|
|
|
// conf.max_concurrent_builds
|
2022-12-06 13:50:25 +01:00
|
|
|
atomics []u64
|
|
|
|
}
|
|
|
|
|
2022-12-13 17:42:49 +01:00
|
|
|
// agent_init initializes a new agent
|
2022-12-06 13:50:25 +01:00
|
|
|
fn agent_init(logger log.Log, conf Config) AgentDaemon {
|
|
|
|
mut d := AgentDaemon{
|
|
|
|
logger: logger
|
|
|
|
client: client.new(conf.address, conf.api_key)
|
|
|
|
conf: conf
|
2022-12-12 21:50:34 +01:00
|
|
|
images: new_image_manager(conf.image_rebuild_frequency)
|
2022-12-06 13:50:25 +01:00
|
|
|
builds: []BuildConfig{len: conf.max_concurrent_builds}
|
|
|
|
atomics: []u64{len: conf.max_concurrent_builds}
|
|
|
|
}
|
|
|
|
|
|
|
|
return d
|
|
|
|
}
|
|
|
|
|
2022-12-13 17:42:49 +01:00
|
|
|
// run starts the actual agent daemon. This function will run forever.
|
2022-12-06 13:50:25 +01:00
|
|
|
pub fn (mut d AgentDaemon) run() {
|
2022-12-12 22:58:43 +01:00
|
|
|
// This is just so that the very first time the loop is ran, the jobs are
|
|
|
|
// always polled
|
|
|
|
mut last_poll_time := time.now().add_seconds(-d.conf.polling_frequency)
|
2022-12-13 17:42:49 +01:00
|
|
|
mut sleep_time := 1 * time.second
|
|
|
|
mut finished, mut empty := 0, 0
|
2022-12-12 22:58:43 +01:00
|
|
|
|
2022-12-06 13:50:25 +01:00
|
|
|
for {
|
2022-12-13 17:42:49 +01:00
|
|
|
finished, empty = d.update_atomics()
|
2022-12-06 13:50:25 +01:00
|
|
|
|
2022-12-13 17:42:49 +01:00
|
|
|
// No new finished builds and no free slots, so there's nothing to be
|
|
|
|
// done
|
|
|
|
if finished + empty == 0 {
|
2022-12-12 22:09:57 +01:00
|
|
|
time.sleep(1 * time.second)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// Builds have finished, so old builder images might have freed up.
|
2022-12-13 17:42:49 +01:00
|
|
|
// TODO this might query the docker daemon too frequently.
|
|
|
|
if finished > 0 {
|
|
|
|
d.images.clean_old_images()
|
|
|
|
}
|
2022-12-12 22:09:57 +01:00
|
|
|
|
2022-12-13 17:42:49 +01:00
|
|
|
// The agent will always poll for new jobs after at most
|
|
|
|
// `polling_frequency` seconds. However, when jobs have finished, the
|
|
|
|
// agent will also poll for new jobs. This is because jobs are often
|
|
|
|
// clustered together (especially when mostly using the global cron
|
|
|
|
// schedule), so there's a much higher chance jobs are available.
|
|
|
|
if finished > 0 || time.now() >= last_poll_time.add_seconds(d.conf.polling_frequency) {
|
|
|
|
new_configs := d.client.poll_jobs(d.conf.arch, finished + empty) or {
|
2022-12-12 22:58:43 +01:00
|
|
|
d.lerror('Failed to poll jobs: $err.msg()')
|
|
|
|
|
2022-12-13 17:42:49 +01:00
|
|
|
// TODO pick a better delay here
|
2022-12-12 22:58:43 +01:00
|
|
|
time.sleep(5 * time.second)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
last_poll_time = time.now()
|
|
|
|
|
|
|
|
for config in new_configs {
|
|
|
|
// TODO handle this better than to just skip the config
|
2022-12-13 17:42:49 +01:00
|
|
|
// Make sure a recent build base image is available for
|
|
|
|
// building the config
|
2022-12-12 22:58:43 +01:00
|
|
|
d.images.refresh_image(config.base_image) or {
|
|
|
|
d.lerror(err.msg())
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
d.start_build(config)
|
|
|
|
}
|
2022-12-12 22:09:57 +01:00
|
|
|
|
2022-12-13 17:42:49 +01:00
|
|
|
// No new jobs were scheduled and the agent isn't doing anything,
|
|
|
|
// so we just wait until the next polling period.
|
|
|
|
if new_configs.len == 0 && finished + empty == d.conf.max_concurrent_builds {
|
|
|
|
sleep_time = time.now() - last_poll_time
|
|
|
|
}
|
2022-12-12 22:58:43 +01:00
|
|
|
}
|
|
|
|
// The agent is not doing anything, so we just wait until the next poll
|
|
|
|
// time
|
2022-12-13 17:42:49 +01:00
|
|
|
else if finished + empty == d.conf.max_concurrent_builds {
|
|
|
|
sleep_time = time.now() - last_poll_time
|
2022-12-06 14:11:17 +01:00
|
|
|
}
|
2022-12-13 17:42:49 +01:00
|
|
|
|
|
|
|
time.sleep(sleep_time)
|
2022-12-06 13:50:25 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-12-12 21:21:58 +01:00
|
|
|
// update_atomics checks for each build whether it's completed, and sets it to
|
2022-12-13 17:42:49 +01:00
|
|
|
// empty again if so. The return value is a tuple `(finished, empty)` where
|
|
|
|
// `finished` is how many builds were just finished and thus set to empty, and
|
|
|
|
// `empty` is how many build slots were already empty. The amount of running
|
|
|
|
// builds can then be calculated by substracting these two values from the
|
|
|
|
// total allowed concurrent builds.
|
|
|
|
fn (mut d AgentDaemon) update_atomics() (int, int) {
|
|
|
|
mut finished := 0
|
|
|
|
mut empty := 0
|
2022-12-06 13:50:25 +01:00
|
|
|
|
|
|
|
for i in 0 .. d.atomics.len {
|
|
|
|
if stdatomic.load_u64(&d.atomics[i]) == agent.build_done {
|
|
|
|
stdatomic.store_u64(&d.atomics[i], agent.build_empty)
|
2022-12-13 17:42:49 +01:00
|
|
|
finished++
|
2022-12-06 13:50:25 +01:00
|
|
|
} else if stdatomic.load_u64(&d.atomics[i]) == agent.build_empty {
|
2022-12-13 17:42:49 +01:00
|
|
|
empty++
|
2022-12-06 13:50:25 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-12-13 17:42:49 +01:00
|
|
|
return finished, empty
|
2022-12-06 13:50:25 +01:00
|
|
|
}
|
2022-12-12 22:09:57 +01:00
|
|
|
|
2022-12-13 17:42:49 +01:00
|
|
|
// start_build starts a build for the given BuildConfig.
|
2022-12-12 22:09:57 +01:00
|
|
|
fn (mut d AgentDaemon) start_build(config BuildConfig) bool {
|
|
|
|
for i in 0 .. d.atomics.len {
|
|
|
|
if stdatomic.load_u64(&d.atomics[i]) == agent.build_empty {
|
|
|
|
stdatomic.store_u64(&d.atomics[i], agent.build_running)
|
|
|
|
d.builds[i] = config
|
|
|
|
|
|
|
|
go d.run_build(i, config)
|
|
|
|
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
// run_build actually starts the build process for a given target.
|
|
|
|
fn (mut d AgentDaemon) run_build(build_index int, config BuildConfig) {
|
|
|
|
d.linfo('started build: $config.url -> $config.repo')
|
|
|
|
|
|
|
|
// 0 means success, 1 means failure
|
|
|
|
mut status := 0
|
|
|
|
|
|
|
|
new_config := BuildConfig{
|
|
|
|
...config
|
|
|
|
base_image: d.images.get(config.base_image)
|
|
|
|
}
|
|
|
|
|
|
|
|
res := build.build_config(d.client.address, d.client.api_key, new_config) or {
|
|
|
|
d.ldebug('build_config error: $err.msg()')
|
|
|
|
status = 1
|
|
|
|
|
|
|
|
build.BuildResult{}
|
|
|
|
}
|
|
|
|
|
|
|
|
if status == 0 {
|
|
|
|
d.linfo('finished build: $config.url -> $config.repo; uploading logs...')
|
|
|
|
|
2022-12-13 17:42:49 +01:00
|
|
|
// TODO use the arch value here
|
2022-12-12 22:09:57 +01:00
|
|
|
build_arch := os.uname().machine
|
|
|
|
d.client.add_build_log(config.target_id, res.start_time, res.end_time, build_arch,
|
|
|
|
res.exit_code, res.logs) or {
|
|
|
|
d.lerror('Failed to upload logs for build: $config.url -> $config.repo')
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
d.linfo('an error occured during build: $config.url -> $config.repo')
|
|
|
|
}
|
|
|
|
|
|
|
|
stdatomic.store_u64(&d.atomics[build_index], agent.build_done)
|
|
|
|
}
|