forked from vieter-v/vieter
feat(agent): clean up code a bit; add frequent polling when active
parent
6342789921
commit
5cbfc0ebcb
|
@ -2,12 +2,12 @@ module agent
|
||||||
|
|
||||||
import log
|
import log
|
||||||
import os
|
import os
|
||||||
|
import util
|
||||||
|
|
||||||
const log_file_name = 'vieter.agent.log'
|
const log_file_name = 'vieter.agent.log'
|
||||||
|
|
||||||
// agent start an agent service
|
// agent starts an agent service
|
||||||
pub fn agent(conf Config) ! {
|
pub fn agent(conf Config) ! {
|
||||||
// Configure logger
|
|
||||||
log_level := log.level_from_tag(conf.log_level) or {
|
log_level := log.level_from_tag(conf.log_level) or {
|
||||||
return error('Invalid log level. The allowed values are FATAL, ERROR, WARN, INFO & DEBUG.')
|
return error('Invalid log level. The allowed values are FATAL, ERROR, WARN, INFO & DEBUG.')
|
||||||
}
|
}
|
||||||
|
@ -16,6 +16,8 @@ pub fn agent(conf Config) ! {
|
||||||
level: log_level
|
level: log_level
|
||||||
}
|
}
|
||||||
|
|
||||||
|
os.mkdir_all(conf.data_dir) or { util.exit_with_message(1, 'Failed to create data directory.') }
|
||||||
|
|
||||||
log_file := os.join_path_single(conf.data_dir, agent.log_file_name)
|
log_file := os.join_path_single(conf.data_dir, agent.log_file_name)
|
||||||
logger.set_full_logpath(log_file)
|
logger.set_full_logpath(log_file)
|
||||||
logger.log_to_console_too()
|
logger.log_to_console_too()
|
||||||
|
|
|
@ -13,8 +13,6 @@ pub:
|
||||||
data_dir string
|
data_dir string
|
||||||
max_concurrent_builds int = 1
|
max_concurrent_builds int = 1
|
||||||
polling_frequency int = 30
|
polling_frequency int = 30
|
||||||
// Architecture of agent
|
|
||||||
// arch string
|
|
||||||
image_rebuild_frequency int = 1440
|
image_rebuild_frequency int = 1440
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -22,7 +20,7 @@ pub:
|
||||||
pub fn cmd() cli.Command {
|
pub fn cmd() cli.Command {
|
||||||
return cli.Command{
|
return cli.Command{
|
||||||
name: 'agent'
|
name: 'agent'
|
||||||
description: 'Start an agent service & start polling for new builds.'
|
description: 'Start an agent daemon.'
|
||||||
execute: fn (cmd cli.Command) ! {
|
execute: fn (cmd cli.Command) ! {
|
||||||
config_file := cmd.flags.get_string('config-file')!
|
config_file := cmd.flags.get_string('config-file')!
|
||||||
conf := vconf.load<Config>(prefix: 'VIETER_', default_path: config_file)!
|
conf := vconf.load<Config>(prefix: 'VIETER_', default_path: config_file)!
|
||||||
|
|
|
@ -16,17 +16,17 @@ const (
|
||||||
struct AgentDaemon {
|
struct AgentDaemon {
|
||||||
logger shared log.Log
|
logger shared log.Log
|
||||||
conf Config
|
conf Config
|
||||||
|
client client.Client
|
||||||
mut:
|
mut:
|
||||||
images ImageManager
|
images ImageManager
|
||||||
// Which builds are currently running; length is same as
|
// Which builds are currently running; length is conf.max_concurrent_builds
|
||||||
// conf.max_concurrent_builds
|
|
||||||
builds []BuildConfig
|
builds []BuildConfig
|
||||||
// Atomic variables used to detect when a build has finished; length is the
|
// Atomic variables used to detect when a build has finished; length is
|
||||||
// same as conf.max_concurrent_builds
|
// conf.max_concurrent_builds
|
||||||
client client.Client
|
|
||||||
atomics []u64
|
atomics []u64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// agent_init initializes a new agent
|
||||||
fn agent_init(logger log.Log, conf Config) AgentDaemon {
|
fn agent_init(logger log.Log, conf Config) AgentDaemon {
|
||||||
mut d := AgentDaemon{
|
mut d := AgentDaemon{
|
||||||
logger: logger
|
logger: logger
|
||||||
|
@ -40,37 +40,49 @@ fn agent_init(logger log.Log, conf Config) AgentDaemon {
|
||||||
return d
|
return d
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// run starts the actual agent daemon. This function will run forever.
|
||||||
pub fn (mut d AgentDaemon) run() {
|
pub fn (mut d AgentDaemon) run() {
|
||||||
// This is just so that the very first time the loop is ran, the jobs are
|
// This is just so that the very first time the loop is ran, the jobs are
|
||||||
// always polled
|
// always polled
|
||||||
mut last_poll_time := time.now().add_seconds(-d.conf.polling_frequency)
|
mut last_poll_time := time.now().add_seconds(-d.conf.polling_frequency)
|
||||||
|
mut sleep_time := 1 * time.second
|
||||||
|
mut finished, mut empty := 0, 0
|
||||||
|
|
||||||
for {
|
for {
|
||||||
free_builds := d.update_atomics()
|
finished, empty = d.update_atomics()
|
||||||
|
|
||||||
// All build slots are taken, so there's nothing to be done
|
// No new finished builds and no free slots, so there's nothing to be
|
||||||
if free_builds == 0 {
|
// done
|
||||||
|
if finished + empty == 0 {
|
||||||
time.sleep(1 * time.second)
|
time.sleep(1 * time.second)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// Builds have finished, so old builder images might have freed up.
|
// Builds have finished, so old builder images might have freed up.
|
||||||
|
// TODO this might query the docker daemon too frequently.
|
||||||
|
if finished > 0 {
|
||||||
d.images.clean_old_images()
|
d.images.clean_old_images()
|
||||||
|
}
|
||||||
|
|
||||||
// Poll for new jobs
|
// The agent will always poll for new jobs after at most
|
||||||
if time.now() >= last_poll_time.add_seconds(d.conf.polling_frequency) {
|
// `polling_frequency` seconds. However, when jobs have finished, the
|
||||||
new_configs := d.client.poll_jobs(d.conf.arch, free_builds) or {
|
// agent will also poll for new jobs. This is because jobs are often
|
||||||
|
// clustered together (especially when mostly using the global cron
|
||||||
|
// schedule), so there's a much higher chance jobs are available.
|
||||||
|
if finished > 0 || time.now() >= last_poll_time.add_seconds(d.conf.polling_frequency) {
|
||||||
|
new_configs := d.client.poll_jobs(d.conf.arch, finished + empty) or {
|
||||||
d.lerror('Failed to poll jobs: $err.msg()')
|
d.lerror('Failed to poll jobs: $err.msg()')
|
||||||
|
|
||||||
|
// TODO pick a better delay here
|
||||||
time.sleep(5 * time.second)
|
time.sleep(5 * time.second)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
last_poll_time = time.now()
|
last_poll_time = time.now()
|
||||||
|
|
||||||
// Schedule new jobs
|
|
||||||
for config in new_configs {
|
for config in new_configs {
|
||||||
// TODO handle this better than to just skip the config
|
// TODO handle this better than to just skip the config
|
||||||
// Make sure a recent build base image is available for building the config
|
// Make sure a recent build base image is available for
|
||||||
|
// building the config
|
||||||
d.images.refresh_image(config.base_image) or {
|
d.images.refresh_image(config.base_image) or {
|
||||||
d.lerror(err.msg())
|
d.lerror(err.msg())
|
||||||
continue
|
continue
|
||||||
|
@ -78,40 +90,45 @@ pub fn (mut d AgentDaemon) run() {
|
||||||
d.start_build(config)
|
d.start_build(config)
|
||||||
}
|
}
|
||||||
|
|
||||||
time.sleep(1 * time.second)
|
// No new jobs were scheduled and the agent isn't doing anything,
|
||||||
|
// so we just wait until the next polling period.
|
||||||
|
if new_configs.len == 0 && finished + empty == d.conf.max_concurrent_builds {
|
||||||
|
sleep_time = time.now() - last_poll_time
|
||||||
}
|
}
|
||||||
// Builds are running, so check again after one second
|
|
||||||
else if free_builds < d.conf.max_concurrent_builds {
|
|
||||||
time.sleep(1 * time.second)
|
|
||||||
}
|
}
|
||||||
// The agent is not doing anything, so we just wait until the next poll
|
// The agent is not doing anything, so we just wait until the next poll
|
||||||
// time
|
// time
|
||||||
else {
|
else if finished + empty == d.conf.max_concurrent_builds {
|
||||||
time_until_next_poll := time.now() - last_poll_time
|
sleep_time = time.now() - last_poll_time
|
||||||
time.sleep(time_until_next_poll)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
time.sleep(sleep_time)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// update_atomics checks for each build whether it's completed, and sets it to
|
// update_atomics checks for each build whether it's completed, and sets it to
|
||||||
// free again if so. The return value is how many build slots are currently
|
// empty again if so. The return value is a tuple `(finished, empty)` where
|
||||||
// free.
|
// `finished` is how many builds were just finished and thus set to empty, and
|
||||||
fn (mut d AgentDaemon) update_atomics() int {
|
// `empty` is how many build slots were already empty. The amount of running
|
||||||
mut count := 0
|
// builds can then be calculated by substracting these two values from the
|
||||||
|
// total allowed concurrent builds.
|
||||||
|
fn (mut d AgentDaemon) update_atomics() (int, int) {
|
||||||
|
mut finished := 0
|
||||||
|
mut empty := 0
|
||||||
|
|
||||||
for i in 0 .. d.atomics.len {
|
for i in 0 .. d.atomics.len {
|
||||||
if stdatomic.load_u64(&d.atomics[i]) == agent.build_done {
|
if stdatomic.load_u64(&d.atomics[i]) == agent.build_done {
|
||||||
stdatomic.store_u64(&d.atomics[i], agent.build_empty)
|
stdatomic.store_u64(&d.atomics[i], agent.build_empty)
|
||||||
count++
|
finished++
|
||||||
} else if stdatomic.load_u64(&d.atomics[i]) == agent.build_empty {
|
} else if stdatomic.load_u64(&d.atomics[i]) == agent.build_empty {
|
||||||
count++
|
empty++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return count
|
return finished, empty
|
||||||
}
|
}
|
||||||
|
|
||||||
// start_build starts a build for the given BuildConfig object.
|
// start_build starts a build for the given BuildConfig.
|
||||||
fn (mut d AgentDaemon) start_build(config BuildConfig) bool {
|
fn (mut d AgentDaemon) start_build(config BuildConfig) bool {
|
||||||
for i in 0 .. d.atomics.len {
|
for i in 0 .. d.atomics.len {
|
||||||
if stdatomic.load_u64(&d.atomics[i]) == agent.build_empty {
|
if stdatomic.load_u64(&d.atomics[i]) == agent.build_empty {
|
||||||
|
@ -149,6 +166,7 @@ fn (mut d AgentDaemon) run_build(build_index int, config BuildConfig) {
|
||||||
if status == 0 {
|
if status == 0 {
|
||||||
d.linfo('finished build: $config.url -> $config.repo; uploading logs...')
|
d.linfo('finished build: $config.url -> $config.repo; uploading logs...')
|
||||||
|
|
||||||
|
// TODO use the arch value here
|
||||||
build_arch := os.uname().machine
|
build_arch := os.uname().machine
|
||||||
d.client.add_build_log(config.target_id, res.start_time, res.end_time, build_arch,
|
d.client.add_build_log(config.target_id, res.start_time, res.end_time, build_arch,
|
||||||
res.exit_code, res.logs) or {
|
res.exit_code, res.logs) or {
|
||||||
|
|
|
@ -4,29 +4,42 @@ import time
|
||||||
import docker
|
import docker
|
||||||
import build
|
import build
|
||||||
|
|
||||||
|
// An ImageManager is a utility that creates builder images from given base
|
||||||
|
// images, updating these builder images if they've become too old. This
|
||||||
|
// structure can manage images from any number of base images, paving the way
|
||||||
|
// for configurable base images per target/repository.
|
||||||
struct ImageManager {
|
struct ImageManager {
|
||||||
mut:
|
mut:
|
||||||
refresh_frequency int
|
max_image_age int [required]
|
||||||
|
// For each base images, one or more builder images can exist at the same
|
||||||
|
// time
|
||||||
images map[string][]string [required]
|
images map[string][]string [required]
|
||||||
|
// For each base image, we track when its newest image was built
|
||||||
timestamps map[string]time.Time [required]
|
timestamps map[string]time.Time [required]
|
||||||
}
|
}
|
||||||
|
|
||||||
fn new_image_manager(refresh_frequency int) ImageManager {
|
// new_image_manager initializes a new image manager.
|
||||||
|
fn new_image_manager(max_image_age int) ImageManager {
|
||||||
return ImageManager{
|
return ImageManager{
|
||||||
refresh_frequency: refresh_frequency
|
max_image_age: max_image_age
|
||||||
images: map[string][]string{}
|
images: map[string][]string{}
|
||||||
timestamps: map[string]time.Time{}
|
timestamps: map[string]time.Time{}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// get returns the name of the newest image for the given base image. Note that
|
||||||
|
// this function should only be called *after* a first call to `refresh_image`.
|
||||||
pub fn (m &ImageManager) get(base_image string) string {
|
pub fn (m &ImageManager) get(base_image string) string {
|
||||||
return m.images[base_image].last()
|
return m.images[base_image].last()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// refresh_image builds a new builder image from the given base image if the
|
||||||
|
// previous builder image is too old or non-existent. This function will do
|
||||||
|
// nothing if these conditions aren't met, so it's safe to call it every time
|
||||||
|
// you want to ensure an image is up to date.
|
||||||
fn (mut m ImageManager) refresh_image(base_image string) ! {
|
fn (mut m ImageManager) refresh_image(base_image string) ! {
|
||||||
// No need to refresh the image if the previous one is still new enough
|
|
||||||
if base_image in m.timestamps
|
if base_image in m.timestamps
|
||||||
&& m.timestamps[base_image].add_seconds(m.refresh_frequency) > time.now() {
|
&& m.timestamps[base_image].add_seconds(m.max_image_age) > time.now() {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -39,7 +52,9 @@ fn (mut m ImageManager) refresh_image(base_image string) ! {
|
||||||
m.timestamps[base_image] = time.now()
|
m.timestamps[base_image] = time.now()
|
||||||
}
|
}
|
||||||
|
|
||||||
// clean_old_images tries to remove any old but still present builder images.
|
// clean_old_images removes all older builder images that are no longer in use.
|
||||||
|
// The function will always leave at least one builder image, namely the newest
|
||||||
|
// one.
|
||||||
fn (mut m ImageManager) clean_old_images() {
|
fn (mut m ImageManager) clean_old_images() {
|
||||||
mut dd := docker.new_conn() or { return }
|
mut dd := docker.new_conn() or { return }
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@ module agent
|
||||||
|
|
||||||
import log
|
import log
|
||||||
|
|
||||||
// log reate a log message with the given level
|
// log a message with the given level
|
||||||
pub fn (mut d AgentDaemon) log(msg string, level log.Level) {
|
pub fn (mut d AgentDaemon) log(msg string, level log.Level) {
|
||||||
lock d.logger {
|
lock d.logger {
|
||||||
d.logger.send_output(msg, level)
|
d.logger.send_output(msg, level)
|
||||||
|
|
Loading…
Reference in New Issue