vieter/src/agent/daemon.v

module agent

import log
import sync.stdatomic
import build { BuildConfig }
import client
import time
import os

const (
	build_empty   = 0
	build_running = 1
	build_done    = 2
)

struct AgentDaemon {
	logger shared log.Log
	conf   Config
	client client.Client
mut:
	images ImageManager
	// Which builds are currently running; length is conf.max_concurrent_builds
	builds []BuildConfig
	// Atomic variables used to detect when a build has finished; length is
	// conf.max_concurrent_builds
	atomics []u64
}

// agent_init initializes a new agent
fn agent_init(logger log.Log, conf Config) AgentDaemon {
	mut d := AgentDaemon{
		logger: logger
		client: client.new(conf.address, conf.api_key)
		conf: conf
		images: new_image_manager(conf.image_rebuild_frequency)
		builds: []BuildConfig{len: conf.max_concurrent_builds}
		atomics: []u64{len: conf.max_concurrent_builds}
	}

	return d
}

// run starts the actual agent daemon. This function will run forever.
pub fn (mut d AgentDaemon) run() {
	// This is just so that the very first time the loop is ran, the jobs are
	// always polled
	mut last_poll_time := time.now().add_seconds(-d.conf.polling_frequency)
	mut sleep_time := 1 * time.second
	mut finished, mut empty := 0, 0

	for {
		finished, empty = d.update_atomics()

		// No new finished builds and no free slots, so there's nothing to be
		// done
		if finished + empty == 0 {
			time.sleep(1 * time.second)
			continue
		}

		// Builds have finished, so old builder images might have freed up.
		// TODO this might query the docker daemon too frequently.
		if finished > 0 {
			d.images.clean_old_images()
		}

		// The agent will always poll for new jobs after at most
		// `polling_frequency` seconds. However, when jobs have finished, the
		// agent will also poll for new jobs. This is because jobs are often
		// clustered together (especially when mostly using the global cron
		// schedule), so there's a much higher chance jobs are available.
		if finished > 0 || time.now() >= last_poll_time.add_seconds(d.conf.polling_frequency) {
			new_configs := d.client.poll_jobs(d.conf.arch, finished + empty) or {
				d.lerror('Failed to poll jobs: $err.msg()')

				// TODO pick a better delay here
				time.sleep(5 * time.second)
				continue
			}
			last_poll_time = time.now()

			for config in new_configs {
				// TODO handle this better than to just skip the config
				// Make sure a recent build base image is available for
				// building the config
				d.images.refresh_image(config.base_image) or {
					d.lerror(err.msg())
					continue
				}
				d.start_build(config)
			}

			// No new jobs were scheduled and the agent isn't doing anything,
			// so we just wait until the next polling period.
			if new_configs.len == 0 && finished + empty == d.conf.max_concurrent_builds {
				sleep_time = time.now() - last_poll_time
			}
		}
		// The agent is not doing anything, so we just wait until the next poll
		// time
		else if finished + empty == d.conf.max_concurrent_builds {
			sleep_time = time.now() - last_poll_time
		}

		time.sleep(sleep_time)
	}
}

// update_atomics checks for each build whether it's completed, and sets it to
// empty again if so. The return value is a tuple `(finished, empty)` where
// `finished` is how many builds were just finished and thus set to empty, and
// `empty` is how many build slots were already empty. The amount of running
// builds can then be calculated by substracting these two values from the
// total allowed concurrent builds.
fn (mut d AgentDaemon) update_atomics() (int, int) {
	mut finished := 0
	mut empty := 0

	for i in 0 .. d.atomics.len {
		if stdatomic.load_u64(&d.atomics[i]) == agent.build_done {
			stdatomic.store_u64(&d.atomics[i], agent.build_empty)
			finished++
		} else if stdatomic.load_u64(&d.atomics[i]) == agent.build_empty {
			empty++
		}
	}

	return finished, empty
}

// start_build starts a build for the given BuildConfig.
fn (mut d AgentDaemon) start_build(config BuildConfig) bool {
	for i in 0 .. d.atomics.len {
		if stdatomic.load_u64(&d.atomics[i]) == agent.build_empty {
			stdatomic.store_u64(&d.atomics[i], agent.build_running)
			d.builds[i] = config

			go d.run_build(i, config)

			return true
		}
	}

	return false
}

// run_build actually starts the build process for a given target.
fn (mut d AgentDaemon) run_build(build_index int, config BuildConfig) {
	d.linfo('started build: $config.url -> $config.repo')

	// 0 means success, 1 means failure
	mut status := 0

	new_config := BuildConfig{
		...config
		base_image: d.images.get(config.base_image)
	}

	res := build.build_config(d.client.address, d.client.api_key, new_config) or {
		d.ldebug('build_config error: $err.msg()')
		status = 1

		build.BuildResult{}
	}

	if status == 0 {
		d.linfo('finished build: $config.url -> $config.repo; uploading logs...')

		// TODO use the arch value here
		build_arch := os.uname().machine
		d.client.add_build_log(config.target_id, res.start_time, res.end_time, build_arch,
			res.exit_code, res.logs) or {
			d.lerror('Failed to upload logs for build: $config.url -> $config.repo')
		}
	} else {
		d.linfo('an error occured during build: $config.url -> $config.repo')
	}

	stdatomic.store_u64(&d.atomics[build_index], agent.build_done)
}
feat: start of agent code 2022-12-06 13:50:25 +01:00			`module agent`

			`import log`
			`import sync.stdatomic`
			`import build { BuildConfig }`
			`import client`
feat(agent): partially wrote daemon code 2022-12-12 22:09:57 +01:00			`import time`
			`import os`
feat: start of agent code 2022-12-06 13:50:25 +01:00
			`const (`
			`build_empty = 0`
			`build_running = 1`
			`build_done = 2`
			`)`

			`struct AgentDaemon {`
			`logger shared log.Log`
feat(build): start of server-side job queue 2022-12-06 14:11:17 +01:00			`conf Config`
feat(agent): clean up code a bit; add frequent polling when active 2022-12-13 17:42:49 +01:00			`client client.Client`
feat(agent): partially wrote daemon code 2022-12-12 22:09:57 +01:00			`mut:`
feat(agent): wrote ImageManager 2022-12-12 21:50:34 +01:00			`images ImageManager`
feat(agent): clean up code a bit; add frequent polling when active 2022-12-13 17:42:49 +01:00			`// Which builds are currently running; length is conf.max_concurrent_builds`
feat: start of agent code 2022-12-06 13:50:25 +01:00			`builds []BuildConfig`
feat(agent): clean up code a bit; add frequent polling when active 2022-12-13 17:42:49 +01:00			`// Atomic variables used to detect when a build has finished; length is`
			`// conf.max_concurrent_builds`
feat: start of agent code 2022-12-06 13:50:25 +01:00			`atomics []u64`
			`}`

feat(agent): clean up code a bit; add frequent polling when active 2022-12-13 17:42:49 +01:00			`// agent_init initializes a new agent`
feat: start of agent code 2022-12-06 13:50:25 +01:00			`fn agent_init(logger log.Log, conf Config) AgentDaemon {`
			`mut d := AgentDaemon{`
			`logger: logger`
			`client: client.new(conf.address, conf.api_key)`
			`conf: conf`
feat(agent): wrote ImageManager 2022-12-12 21:50:34 +01:00			`images: new_image_manager(conf.image_rebuild_frequency)`
feat: start of agent code 2022-12-06 13:50:25 +01:00			`builds: []BuildConfig{len: conf.max_concurrent_builds}`
			`atomics: []u64{len: conf.max_concurrent_builds}`
			`}`

			`return d`
			`}`

feat(agent): clean up code a bit; add frequent polling when active 2022-12-13 17:42:49 +01:00			`// run starts the actual agent daemon. This function will run forever.`
feat: start of agent code 2022-12-06 13:50:25 +01:00			`pub fn (mut d AgentDaemon) run() {`
feat(agent): initial working version 2022-12-12 22:58:43 +01:00			`// This is just so that the very first time the loop is ran, the jobs are`
			`// always polled`
			`mut last_poll_time := time.now().add_seconds(-d.conf.polling_frequency)`
feat(agent): clean up code a bit; add frequent polling when active 2022-12-13 17:42:49 +01:00			`mut sleep_time := 1 * time.second`
			`mut finished, mut empty := 0, 0`
feat(agent): initial working version 2022-12-12 22:58:43 +01:00
feat: start of agent code 2022-12-06 13:50:25 +01:00			`for {`
feat(agent): clean up code a bit; add frequent polling when active 2022-12-13 17:42:49 +01:00			`finished, empty = d.update_atomics()`
feat: start of agent code 2022-12-06 13:50:25 +01:00
feat(agent): clean up code a bit; add frequent polling when active 2022-12-13 17:42:49 +01:00			`// No new finished builds and no free slots, so there's nothing to be`
			`// done`
			`if finished + empty == 0 {`
feat(agent): partially wrote daemon code 2022-12-12 22:09:57 +01:00			`time.sleep(1 * time.second)`
			`continue`
			`}`

			`// Builds have finished, so old builder images might have freed up.`
feat(agent): clean up code a bit; add frequent polling when active 2022-12-13 17:42:49 +01:00			`// TODO this might query the docker daemon too frequently.`
			`if finished > 0 {`
			`d.images.clean_old_images()`
			`}`
feat(agent): partially wrote daemon code 2022-12-12 22:09:57 +01:00
feat(agent): clean up code a bit; add frequent polling when active 2022-12-13 17:42:49 +01:00			`// The agent will always poll for new jobs after at most`
			// `polling_frequency` seconds. However, when jobs have finished, the
			`// agent will also poll for new jobs. This is because jobs are often`
			`// clustered together (especially when mostly using the global cron`
			`// schedule), so there's a much higher chance jobs are available.`
			`if finished > 0 \|\| time.now() >= last_poll_time.add_seconds(d.conf.polling_frequency) {`
			`new_configs := d.client.poll_jobs(d.conf.arch, finished + empty) or {`
feat(agent): initial working version 2022-12-12 22:58:43 +01:00			`d.lerror('Failed to poll jobs: $err.msg()')`

feat(agent): clean up code a bit; add frequent polling when active 2022-12-13 17:42:49 +01:00			`// TODO pick a better delay here`
feat(agent): initial working version 2022-12-12 22:58:43 +01:00			`time.sleep(5 * time.second)`
			`continue`
			`}`
			`last_poll_time = time.now()`

			`for config in new_configs {`
			`// TODO handle this better than to just skip the config`
feat(agent): clean up code a bit; add frequent polling when active 2022-12-13 17:42:49 +01:00			`// Make sure a recent build base image is available for`
			`// building the config`
feat(agent): initial working version 2022-12-12 22:58:43 +01:00			`d.images.refresh_image(config.base_image) or {`
			`d.lerror(err.msg())`
			`continue`
			`}`
			`d.start_build(config)`
			`}`
feat(agent): partially wrote daemon code 2022-12-12 22:09:57 +01:00
feat(agent): clean up code a bit; add frequent polling when active 2022-12-13 17:42:49 +01:00			`// No new jobs were scheduled and the agent isn't doing anything,`
			`// so we just wait until the next polling period.`
			`if new_configs.len == 0 && finished + empty == d.conf.max_concurrent_builds {`
			`sleep_time = time.now() - last_poll_time`
			`}`
feat(agent): initial working version 2022-12-12 22:58:43 +01:00			`}`
			`// The agent is not doing anything, so we just wait until the next poll`
			`// time`
feat(agent): clean up code a bit; add frequent polling when active 2022-12-13 17:42:49 +01:00			`else if finished + empty == d.conf.max_concurrent_builds {`
			`sleep_time = time.now() - last_poll_time`
feat(build): start of server-side job queue 2022-12-06 14:11:17 +01:00			`}`
feat(agent): clean up code a bit; add frequent polling when active 2022-12-13 17:42:49 +01:00
			`time.sleep(sleep_time)`
feat: start of agent code 2022-12-06 13:50:25 +01:00			`}`
			`}`

feat(agent): begin reforming for new api 2022-12-12 21:21:58 +01:00			`// update_atomics checks for each build whether it's completed, and sets it to`
feat(agent): clean up code a bit; add frequent polling when active 2022-12-13 17:42:49 +01:00			// empty again if so. The return value is a tuple `(finished, empty)` where
			// `finished` is how many builds were just finished and thus set to empty, and
			// `empty` is how many build slots were already empty. The amount of running
			`// builds can then be calculated by substracting these two values from the`
			`// total allowed concurrent builds.`
			`fn (mut d AgentDaemon) update_atomics() (int, int) {`
			`mut finished := 0`
			`mut empty := 0`
feat: start of agent code 2022-12-06 13:50:25 +01:00
			`for i in 0 .. d.atomics.len {`
			`if stdatomic.load_u64(&d.atomics[i]) == agent.build_done {`
			`stdatomic.store_u64(&d.atomics[i], agent.build_empty)`
feat(agent): clean up code a bit; add frequent polling when active 2022-12-13 17:42:49 +01:00			`finished++`
feat: start of agent code 2022-12-06 13:50:25 +01:00			`} else if stdatomic.load_u64(&d.atomics[i]) == agent.build_empty {`
feat(agent): clean up code a bit; add frequent polling when active 2022-12-13 17:42:49 +01:00			`empty++`
feat: start of agent code 2022-12-06 13:50:25 +01:00			`}`
			`}`

feat(agent): clean up code a bit; add frequent polling when active 2022-12-13 17:42:49 +01:00			`return finished, empty`
feat: start of agent code 2022-12-06 13:50:25 +01:00			`}`
feat(agent): partially wrote daemon code 2022-12-12 22:09:57 +01:00
feat(agent): clean up code a bit; add frequent polling when active 2022-12-13 17:42:49 +01:00			`// start_build starts a build for the given BuildConfig.`
feat(agent): partially wrote daemon code 2022-12-12 22:09:57 +01:00			`fn (mut d AgentDaemon) start_build(config BuildConfig) bool {`
			`for i in 0 .. d.atomics.len {`
			`if stdatomic.load_u64(&d.atomics[i]) == agent.build_empty {`
			`stdatomic.store_u64(&d.atomics[i], agent.build_running)`
			`d.builds[i] = config`

			`go d.run_build(i, config)`

			`return true`
			`}`
			`}`

			`return false`
			`}`

			`// run_build actually starts the build process for a given target.`
			`fn (mut d AgentDaemon) run_build(build_index int, config BuildConfig) {`
			`d.linfo('started build: $config.url -> $config.repo')`

			`// 0 means success, 1 means failure`
			`mut status := 0`

			`new_config := BuildConfig{`
			`...config`
			`base_image: d.images.get(config.base_image)`
			`}`

			`res := build.build_config(d.client.address, d.client.api_key, new_config) or {`
			`d.ldebug('build_config error: $err.msg()')`
			`status = 1`

			`build.BuildResult{}`
			`}`

			`if status == 0 {`
			`d.linfo('finished build: $config.url -> $config.repo; uploading logs...')`

feat(agent): clean up code a bit; add frequent polling when active 2022-12-13 17:42:49 +01:00			`// TODO use the arch value here`
feat(agent): partially wrote daemon code 2022-12-12 22:09:57 +01:00			`build_arch := os.uname().machine`
			`d.client.add_build_log(config.target_id, res.start_time, res.end_time, build_arch,`
			`res.exit_code, res.logs) or {`
			`d.lerror('Failed to upload logs for build: $config.url -> $config.repo')`
			`}`
			`} else {`
			`d.linfo('an error occured during build: $config.url -> $config.repo')`
			`}`

			`stdatomic.store_u64(&d.atomics[build_index], agent.build_done)`
			`}`