sync: implement pool.work_on_items to process a list of items in parallel

pull/3937/head
Alexander Medvednikov 2020-03-04 20:28:42 +01:00 committed by GitHub
parent 136aa763a3
commit b0ece3a9d8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 423 additions and 215 deletions

View File

@ -5,7 +5,6 @@ import (
term term
benchmark benchmark
filepath filepath
runtime
sync sync
v.pref v.pref
) )
@ -17,10 +16,6 @@ pub mut:
vargs string vargs string
failed bool failed bool
benchmark benchmark.Benchmark benchmark benchmark.Benchmark
ntask int // writing to this should be locked by mu.
ntask_mtx &sync.Mutex
waitgroup &sync.WaitGroup
show_ok_tests bool show_ok_tests bool
} }
@ -28,11 +23,6 @@ pub fn new_test_session(vargs string) TestSession {
return TestSession{ return TestSession{
vexe: pref.vexe_path() vexe: pref.vexe_path()
vargs: vargs vargs: vargs
ntask: 0
ntask_mtx: sync.new_mutex()
waitgroup: sync.new_waitgroup()
show_ok_tests: !vargs.contains('-silent') show_ok_tests: !vargs.contains('-silent')
} }
} }
@ -69,51 +59,38 @@ pub fn (ts mut TestSession) test() {
} }
remaining_files << dot_relative_file remaining_files << dot_relative_file
} }
ts.files = remaining_files ts.files = remaining_files
ts.benchmark.set_total_expected_steps(remaining_files.len) ts.benchmark.set_total_expected_steps(remaining_files.len)
mut pool_of_test_runners := sync.new_pool_processor({
mut njobs := runtime.nr_jobs() callback: worker_trunner
})
pool_of_test_runners.set_shared_context(ts)
$if msvc { $if msvc {
// NB: MSVC can not be launched in parallel, without giving it // NB: MSVC can not be launched in parallel, without giving it
// the option /FS because it uses a shared PDB file, which should // the option /FS because it uses a shared PDB file, which should
// be locked, but that makes writing slower... // be locked, but that makes writing slower...
// See: https://docs.microsoft.com/en-us/cpp/build/reference/fs-force-synchronous-pdb-writes?view=vs-2019 // See: https://docs.microsoft.com/en-us/cpp/build/reference/fs-force-synchronous-pdb-writes?view=vs-2019
// Instead, just run tests on 1 core for now. // Instead, just run tests on 1 core for now.
njobs = 1 pool_of_test_runners.set_max_jobs(1)
} }
ts.waitgroup.add( njobs ) pool_of_test_runners.work_on_pointers(remaining_files.pointers())
for i:=0; i < njobs; i++ {
go process_in_thread(ts)
}
ts.waitgroup.wait()
ts.benchmark.stop() ts.benchmark.stop()
eprintln(term.h_divider('-')) eprintln(term.h_divider('-'))
} }
fn worker_trunner(p mut sync.PoolProcessor, idx int, thread_id int) voidptr {
fn process_in_thread(ts mut TestSession){ mut ts := &TestSession(p.get_shared_context())
ts.process_files()
ts.waitgroup.done()
}
fn (ts mut TestSession) process_files() {
tmpd := os.tmpdir() tmpd := os.tmpdir()
show_stats := '-stats' in ts.vargs.split(' ') show_stats := '-stats' in ts.vargs.split(' ')
// tls_bench is used to format the step messages/timings
mut tls_bench := benchmark.new_benchmark() // tls_bench is used to format the step messages/timings mut tls_bench := &benchmark.Benchmark(p.get_thread_context(idx))
tls_bench.set_total_expected_steps( ts.benchmark.nexpected_steps ) if isnil(tls_bench) {
for { tls_bench = benchmark.new_benchmark_pointer()
tls_bench.set_total_expected_steps(ts.benchmark.nexpected_steps)
ts.ntask_mtx.lock() p.set_thread_context(idx, tls_bench)
ts.ntask++ }
idx := ts.ntask-1
ts.ntask_mtx.unlock()
if idx >= ts.files.len { break }
tls_bench.cstep = idx tls_bench.cstep = idx
dot_relative_file := p.get_string_item(idx)
dot_relative_file := ts.files[ idx ]
relative_file := dot_relative_file.replace('./', '') relative_file := dot_relative_file.replace('./', '')
file := os.realpath(relative_file) file := os.realpath(relative_file)
// Ensure that the generated binaries will be stored in the temporary folder. // Ensure that the generated binaries will be stored in the temporary folder.
@ -143,7 +120,7 @@ fn (ts mut TestSession) process_files() {
ts.failed = true ts.failed = true
ts.benchmark.fail() ts.benchmark.fail()
tls_bench.fail() tls_bench.fail()
continue return sync.no_result
} }
} }
else { else {
@ -152,7 +129,7 @@ fn (ts mut TestSession) process_files() {
ts.benchmark.fail() ts.benchmark.fail()
tls_bench.fail() tls_bench.fail()
eprintln(tls_bench.step_message_fail(relative_file)) eprintln(tls_bench.step_message_fail(relative_file))
continue return sync.no_result
} }
if r.exit_code != 0 { if r.exit_code != 0 {
ts.failed = true ts.failed = true
@ -171,7 +148,7 @@ fn (ts mut TestSession) process_files() {
if os.exists(generated_binary_fpath) { if os.exists(generated_binary_fpath) {
os.rm(generated_binary_fpath) os.rm(generated_binary_fpath)
} }
} return sync.no_result
} }
pub fn vlib_should_be_present(parent_dir string) { pub fn vlib_should_be_present(parent_dir string) {
@ -193,17 +170,17 @@ pub fn v_build_failing(zargs string, folder string) bool {
eprintln('v compiler args: "$vargs"') eprintln('v compiler args: "$vargs"')
mut session := new_test_session(vargs) mut session := new_test_session(vargs)
files := os.walk_ext(filepath.join(parent_dir,folder), '.v') files := os.walk_ext(filepath.join(parent_dir,folder), '.v')
mut mains := files.filter(!it.contains('modules') && !it.contains('preludes')) mut mains := []string
for f in files {
if !f.contains('modules') && !f.contains('preludes') {
$if windows { $if windows {
// skip pico example on windows // skip pico example on windows
// there was a bug using filter here if f.ends_with('examples\\pico\\pico.v') {
mut mains_filtered := []string continue
for file in mains {
if !file.ends_with('examples\\pico\\pico.v') {
mains_filtered << file
} }
} }
mains = mains_filtered mains << f
}
} }
session.files << mains session.files << mains
session.test() session.test()
@ -257,9 +234,9 @@ pub fn building_any_v_binaries_failed() bool {
} }
pub fn eheader(msg string) { pub fn eheader(msg string) {
eprintln(term.header(msg,'-')) eprintln(term.header(msg, '-'))
} }
pub fn header(msg string) { pub fn header(msg string) {
println(term.header(msg,'-')) println(term.header(msg, '-'))
} }

View File

@ -5,47 +5,26 @@ import net.http
import json import json
import sync import sync
const (
nr_threads = 4
)
struct Story { struct Story {
title string title string
url string url string
} }
struct Fetcher { fn worker_fetch(p &sync.PoolProcessor, cursor int, worker_id int) voidptr {
mut: id := p.get_item<int>(cursor)
mu &sync.Mutex
ids []int
cursor int
wg &sync.WaitGroup
}
fn (f mut Fetcher) fetch() {
for {
if f.cursor >= f.ids.len {
return
}
id := f.ids[f.cursor]
f.mu.lock()
f.cursor++
f.mu.unlock()
cursor := f.cursor
resp := http.get('https://hacker-news.firebaseio.com/v0/item/${id}.json') or { resp := http.get('https://hacker-news.firebaseio.com/v0/item/${id}.json') or {
println('failed to fetch data from /v0/item/${id}.json') println('failed to fetch data from /v0/item/${id}.json')
exit(1) return sync.no_result
} }
story := json.decode(Story,resp.text) or { story := json.decode(Story,resp.text) or {
println('failed to decode a story') println('failed to decode a story')
exit(1) return sync.no_result
}
println('#$cursor) $story.title | $story.url')
f.wg.done()
} }
println('# $cursor) $story.title | $story.url')
return sync.no_result
} }
// Fetches top HN stories in 4 coroutines // Fetches top HN stories in parallel, depending on how many cores you have
fn main() { fn main() {
resp := http.get('https://hacker-news.firebaseio.com/v0/topstories.json') or { resp := http.get('https://hacker-news.firebaseio.com/v0/topstories.json') or {
println('failed to fetch data from /v0/topstories.json') println('failed to fetch data from /v0/topstories.json')
@ -56,22 +35,15 @@ fn main() {
return return
} }
if ids.len > 10 { if ids.len > 10 {
// ids = ids[:10] ids = ids[0..10]
mut tmp := [0].repeat(10)
for i in 0..10 {
tmp[i] = ids[i]
} }
ids = tmp mut fetcher_pool := sync.new_pool_processor({
} callback: worker_fetch
mut fetcher := &Fetcher{ })
ids: ids // NB: if you do not call set_max_jobs, the pool will try to use an optimal
mu: sync.new_mutex() // number of threads, one per each core in your system, which in most
wg: sync.new_waitgroup() // cases is what you want anyway... You can override the automatic choice
} // by setting the VJOBS environment variable too.
fetcher.wg.add(ids.len) // fetcher_pool.set_max_jobs( 4 )
for i in 0..nr_threads { fetcher_pool.work_on_items<int>(ids)
go fetcher.fetch()
}
fetcher.wg.wait()
} }

View File

@ -74,6 +74,13 @@ pub fn new_benchmark() Benchmark {
} }
} }
pub fn new_benchmark_pointer() &Benchmark {
return &Benchmark{
bench_start_time: benchmark.now()
verbose: true
}
}
pub fn (b mut Benchmark) set_total_expected_steps(n int) { pub fn (b mut Benchmark) set_total_expected_steps(n int) {
b.nexpected_steps = n b.nexpected_steps = n
} }

View File

@ -577,3 +577,13 @@ pub fn compare_f32(a, b &f32) int {
} }
return 0 return 0
} }
// a.pointers() returns a new array, where each element
// is the address of the corresponding element in a.
pub fn (a array) pointers() []voidptr {
mut res := []voidptr
for i in 0..a.len {
res << a.data + i * a.element_size
}
return res
}

View File

@ -3,7 +3,6 @@ module main
import os import os
import compiler.tests.repl.runner import compiler.tests.repl.runner
import benchmark import benchmark
import runtime
import sync import sync
import filepath import filepath
@ -30,75 +29,63 @@ struct Session {
mut: mut:
options runner.RunnerOptions options runner.RunnerOptions
bmark benchmark.Benchmark bmark benchmark.Benchmark
ntask int
ntask_mtx &sync.Mutex
waitgroup &sync.WaitGroup
} }
fn test_all_v_repl_files() { fn test_all_v_repl_files() {
mut session := &Session{ mut session := &Session{
options: runner.new_options() options: runner.new_options()
bmark: benchmark.new_benchmark() bmark: benchmark.new_benchmark()
ntask: 0
ntask_mtx: sync.new_mutex()
waitgroup: sync.new_waitgroup()
} }
// warmup, and ensure that the vrepl is compiled in single threaded mode if it does not exist // warmup, and ensure that the vrepl is compiled in single threaded mode if it does not exist
runner.run_repl_file(os.cachedir(), session.options.vexec, 'vlib/compiler/tests/repl/nothing.repl') or { runner.run_repl_file(os.cachedir(), session.options.vexec, 'vlib/compiler/tests/repl/nothing.repl') or {
panic(err) panic(err)
} }
session.bmark.set_total_expected_steps(session.options.files.len)
session.bmark.set_total_expected_steps( session.options.files.len ) mut pool_repl := sync.new_pool_processor({
mut ncpus := 0 callback: worker_repl
ncpus = runtime.nr_cpus() })
pool_repl.set_shared_context(session)
$if windows { $if windows {
// See: https://docs.microsoft.com/en-us/cpp/build/reference/fs-force-synchronous-pdb-writes?view=vs-2019 // See: https://docs.microsoft.com/en-us/cpp/build/reference/fs-force-synchronous-pdb-writes?view=vs-2019
ncpus = 1 pool_repl.set_max_jobs(1)
} }
session.waitgroup.add( ncpus ) pool_repl.work_on_items<string>(session.options.files)
for i:=0; i < ncpus; i++ {
go process_in_thread(session,i)
}
session.waitgroup.wait()
session.bmark.stop() session.bmark.stop()
println(session.bmark.total_message('total time spent running REPL files')) println(session.bmark.total_message('total time spent running REPL files'))
} }
fn process_in_thread( session mut Session, thread_id int ){ fn worker_repl(p mut sync.PoolProcessor, idx int, thread_id int) voidptr {
cdir := os.cachedir() cdir := os.cachedir()
mut tls_bench := benchmark.new_benchmark() mut session := &Session(p.get_shared_context())
tls_bench.set_total_expected_steps( session.bmark.nexpected_steps ) mut tls_bench := &benchmark.Benchmark(p.get_thread_context(idx))
for { if isnil(tls_bench) {
session.ntask_mtx.lock() tls_bench = benchmark.new_benchmark_pointer()
session.ntask++ tls_bench.set_total_expected_steps(session.bmark.nexpected_steps)
idx := session.ntask-1 p.set_thread_context(idx, tls_bench)
session.ntask_mtx.unlock()
if idx >= session.options.files.len { break }
tls_bench.cstep = idx
tfolder := filepath.join( cdir, 'vrepl_tests_$idx')
if os.is_dir( tfolder ) {
os.rmdir_all( tfolder )
} }
os.mkdir( tfolder ) or { panic(err) } tls_bench.cstep = idx
tfolder := filepath.join(cdir,'vrepl_tests_$idx')
file := session.options.files[ idx ] if os.is_dir(tfolder) {
os.rmdir_all(tfolder)
}
os.mkdir(tfolder) or {
panic(err)
}
file := p.get_string_item(idx)
session.bmark.step() session.bmark.step()
tls_bench.step() tls_bench.step()
fres := runner.run_repl_file(tfolder, session.options.vexec, file) or { fres := runner.run_repl_file(tfolder, session.options.vexec, file) or {
session.bmark.fail() session.bmark.fail()
tls_bench.fail() tls_bench.fail()
os.rmdir_all( tfolder ) os.rmdir_all(tfolder)
eprintln(tls_bench.step_message_fail(err)) eprintln(tls_bench.step_message_fail(err))
assert false assert false
continue return sync.no_result
} }
session.bmark.ok() session.bmark.ok()
tls_bench.ok() tls_bench.ok()
os.rmdir_all( tfolder ) os.rmdir_all(tfolder)
println(tls_bench.step_message_ok(fres)) println(tls_bench.step_message_ok(fres))
assert true assert true
} return sync.no_result
session.waitgroup.done()
} }

197
vlib/sync/pool.v 100644
View File

@ -0,0 +1,197 @@
module sync
// * Goal: this file provides a convenient way to run identical tasks over a list
// * of items in parallel, without worrying about waitgroups, mutexes and so on.
// *
// * Usage example:
// * pool := sync.new_pool_processor({ callback: worker_cb })
// * //pool.work_on_items<string>(['a','b','c']) // TODO: vfmt and generics
// * pool.work_on_pointers(['a','b','c'].pointers())
// * // optionally, you can iterate over the results too:
// * for x in pool.get_results<IResult>() {
// * // do stuff with x
// * }
// *
// * See https://github.com/vlang/v/blob/master/vlib/sync/pool_test.v for a
// * more detailed usage example.
// *
// * After all the work is done in parallel by the worker threads in the pool,
// * pool.work_on_items will return, and you can then call
// * pool.get_results<Result>() to retrieve a list of all the results,
// * that the worker callbacks returned for each item that you passed.
// * The parameters of new_pool_processor are:
// * context.maxjobs: when 0 (the default), the PoolProcessor will use an
// * optimal for your system number of threads to process your items
// * context.callback: this should be a callback function, that each worker
// * thread in the pool will run for each item.
// * The callback function will receive as parameters:
// * 1) the PoolProcessor instance, so it can call
// * p.get_item<int>(idx) to get the actual item at index idx
// * NB: for now, you are better off calling p.get_string_item(idx)
// * or p.get_int_item(idx) ; TODO: vfmt and generics
// * 2) idx - the index of the currently processed item
// * 3) task_id - the index of the worker thread in which the callback
// * function is running.
import runtime
pub const (
no_result = voidptr(0)
)
pub struct PoolProcessor {
thread_cb voidptr
mut:
njobs int
items []voidptr
results []voidptr
ntask int // writing to this should be locked by ntask_mtx.
ntask_mtx &sync.Mutex
waitgroup &sync.WaitGroup
shared_context voidptr
thread_contexts []voidptr
}
pub type ThreadCB fn(p &PoolProcessor, idx int, task_id int)voidptr
pub struct PoolProcessorConfig {
maxjobs int
callback ThreadCB
}
// new_pool_processor returns a new PoolProcessor instance.
pub fn new_pool_processor(context PoolProcessorConfig) &PoolProcessor {
if isnil(context.callback) {
panic('You need to pass a valid callback to new_pool_processor.')
}
// TODO: remove this call.
// It prevents a V warning about unused module runtime.
runtime.nr_jobs()
pool := &PoolProcessor {
items: []
results: []
shared_context: voidptr(0)
thread_contexts: []
njobs: context.maxjobs
ntask: 0
ntask_mtx: sync.new_mutex()
waitgroup: sync.new_waitgroup()
thread_cb: context.callback
}
return pool
}
// set_max_jobs gives you the ability to override the number
// of jobs *after* the PoolProcessor had been created already.
pub fn (pool mut PoolProcessor) set_max_jobs(njobs int) {
pool.njobs = njobs
}
// work_on_items receives a list of items of type T,
// then starts a work pool of pool.njobs threads, each running
// pool.thread_cb in a loop, untill all items in the list,
// are processed.
// When pool.njobs is 0, the number of jobs is determined
// by the number of available cores on the system.
// work_on_items returns *after* all threads finish.
// You can optionally call get_results after that.
pub fn (pool mut PoolProcessor) work_on_items<T>(items []T) {
pool.work_on_pointers( items.pointers() )
}
pub fn (pool mut PoolProcessor) work_on_pointers(items []voidptr) {
mut njobs := runtime.nr_jobs()
if pool.njobs > 0 {
njobs = pool.njobs
}
pool.items = []
pool.results = []
pool.thread_contexts = []
pool.items << items
pool.results = [voidptr(0)].repeat(pool.items.len)
pool.thread_contexts << [voidptr(0)].repeat(pool.items.len)
pool.waitgroup.add(njobs)
for i := 0; i < njobs; i++ {
go process_in_thread(pool,i)
}
pool.waitgroup.wait()
}
// process_in_thread does the actual work of worker thread.
// It is a workaround for the current inability to pass a
// method in a callback.
fn process_in_thread(pool mut PoolProcessor, task_id int) {
cb := ThreadCB(pool.thread_cb)
mut idx := 0
ilen := pool.items.len
for {
if pool.ntask >= ilen {
break
}
pool.ntask_mtx.lock()
idx = pool.ntask
pool.ntask++
pool.ntask_mtx.unlock()
pool.results[idx] = cb(pool, idx, task_id)
}
pool.waitgroup.done()
}
// get_item - called by the worker callback.
// Retrieves a type safe instance of the currently processed item
pub fn (pool &PoolProcessor) get_item<T>(idx int) T {
return *(&T(pool.items[idx]))
}
// get_string_item - called by the worker callback.
// It does not use generics so it does not mess up vfmt.
// TODO: remove the need for this when vfmt becomes smarter.
pub fn (pool &PoolProcessor) get_string_item(idx int) string {
return *(&string(pool.items[idx]))
}
// get_int_item - called by the worker callback.
// It does not use generics so it does not mess up vfmt.
// TODO: remove the need for this when vfmt becomes smarter.
pub fn (pool &PoolProcessor) get_int_item(idx int) int {
return *(&int(pool.items[idx]))
}
pub fn (pool &PoolProcessor) get_result<T>(idx int) T {
return *(&T(pool.results[idx]))
}
// get_results - can be called to get a list of type safe results.
pub fn (pool &PoolProcessor) get_results<T>() []T {
mut res := []T
for i in 0 .. pool.results.len {
res << *(&T(pool.results[i]))
}
return res
}
// set_shared_context - can be called during the setup so that you can
// provide a context that is shared between all worker threads, like
// common options/settings.
pub fn (pool mut PoolProcessor) set_shared_context(context voidptr) {
pool.shared_context = context
}
// get_shared_context - can be called in each worker callback, to get
// the context set by pool.set_shared_context
pub fn (pool &PoolProcessor) get_shared_context() voidptr {
return pool.shared_context
}
// set_thread_context - can be called during the setup at the start of
// each worker callback, so that the worker callback can have some thread
// local storage area where it can write/read information that is private
// to the given thread, without worrying that it will get overwritten by
// another thread
pub fn (pool mut PoolProcessor) set_thread_context(idx int, context voidptr) {
pool.thread_contexts[idx] = context
}
// get_thread_context - returns a pointer, that was set with
// pool.set_thread_context . This pointer is private to each thread.
pub fn (pool &PoolProcessor) get_thread_context(idx int) voidptr {
return pool.thread_contexts[idx]
}

View File

@ -0,0 +1,58 @@
import sync
import time
import rand
struct SResult {
s string
}
fn worker_s(p &sync.PoolProcessor, idx int, worker_id int) voidptr {
// TODO: this works, but confuses vfmt. It should be used instead of
// p.get_int_item when vfmt becomes smarter.
// item := p.get_item<string>(idx)
item := p.get_string_item(idx)
println('worker_s worker_id: $worker_id | idx: $idx | item: ${item}')
time.sleep_ms(rand.next(3))
return &SResult{item + item}
}
struct IResult {
i int
}
fn worker_i(p &sync.PoolProcessor, idx int, worker_id int) voidptr {
// TODO: this works, but confuses vfmt. See the comment above.
// item := p.get_item<int>(idx)
item := p.get_int_item(idx)
println('worker_i worker_id: $worker_id | idx: $idx | item: ${item}')
time.sleep_ms(rand.next(5))
return &IResult{item * 1000}
}
fn test_work_on_strings() {
rand.seed(0)
mut pool_s := sync.new_pool_processor({
callback: worker_s
maxjobs: 8
})
pool_s.work_on_items<string>(['a','b','c','d','e','f','g','h','i','j'])
for x in pool_s.get_results<SResult>() {
println( x.s )
assert x.s.len > 1
}
}
fn test_work_on_ints() {
rand.seed(0)
// NB: since maxjobs is left empty here,
// the pool processor will use njobs = runtime.nr_jobs so that
// it will work optimally without overloading the system
mut pool_i := sync.new_pool_processor({
callback: worker_i
})
pool_i.work_on_items<int>([1,2,3,4,5,6,7,8])
for x in pool_i.get_results<IResult>() {
println( x.i )
assert x.i > 100
}
}