From 949379616004e093e45fc506011b4862f57ec682 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Mon, 12 Sep 2022 13:59:52 +0200 Subject: [PATCH 1/3] refactor(package): split module into two files --- src/package/README.md | 5 ++ src/package/format.v | 103 ++++++++++++++++++++++++++++++++++++++++++ src/package/package.v | 101 ----------------------------------------- 3 files changed, 108 insertions(+), 101 deletions(-) create mode 100644 src/package/README.md create mode 100644 src/package/format.v diff --git a/src/package/README.md b/src/package/README.md new file mode 100644 index 0000000..b2bcbd7 --- /dev/null +++ b/src/package/README.md @@ -0,0 +1,5 @@ +# package + +This module handles both parsing the published Arch tarballs & the contents of +their `.PKGINFO` files, as well as generating the contents of the database +archives' `desc` & `files` files. diff --git a/src/package/format.v b/src/package/format.v new file mode 100644 index 0000000..a81d327 --- /dev/null +++ b/src/package/format.v @@ -0,0 +1,103 @@ +module package + +// format_entry returns a string properly formatted to be added to a desc file. +[inline] +fn format_entry(key string, value string) string { + return '\n%$key%\n$value\n' +} + +// full_name returns the properly formatted name for the package, including +// version & architecture +pub fn (pkg &Pkg) full_name() string { + p := pkg.info + return '$p.name-$p.version-$p.arch' +} + +// filename returns the correct filename of the package file +pub fn (pkg &Pkg) filename() string { + ext := match pkg.compression { + 0 { '.tar' } + 1 { '.tar.gz' } + 6 { '.tar.xz' } + 14 { '.tar.zst' } + else { panic("Another compression code shouldn't be possible. Faulty code: $pkg.compression") } + } + + return '${pkg.full_name()}.pkg$ext' +} + +// to_desc returns a desc file valid string representation +pub fn (pkg &Pkg) to_desc() !string { + p := pkg.info + + // filename + mut desc := '%FILENAME%\n$pkg.filename()\n' + + desc += format_entry('NAME', p.name) + desc += format_entry('BASE', p.base) + desc += format_entry('VERSION', p.version) + + if p.description.len > 0 { + desc += format_entry('DESC', p.description) + } + + if p.groups.len > 0 { + desc += format_entry('GROUPS', p.groups.join_lines()) + } + + desc += format_entry('CSIZE', p.csize.str()) + desc += format_entry('ISIZE', p.size.str()) + + sha256sum := pkg.checksum()! + + desc += format_entry('SHA256SUM', sha256sum) + + // TODO add pgpsig stuff + + if p.url.len > 0 { + desc += format_entry('URL', p.url) + } + + if p.licenses.len > 0 { + desc += format_entry('LICENSE', p.licenses.join_lines()) + } + + desc += format_entry('ARCH', p.arch) + desc += format_entry('BUILDDATE', p.build_date.str()) + desc += format_entry('PACKAGER', p.packager) + + if p.replaces.len > 0 { + desc += format_entry('REPLACES', p.replaces.join_lines()) + } + + if p.conflicts.len > 0 { + desc += format_entry('CONFLICTS', p.conflicts.join_lines()) + } + + if p.provides.len > 0 { + desc += format_entry('PROVIDES', p.provides.join_lines()) + } + + if p.depends.len > 0 { + desc += format_entry('DEPENDS', p.depends.join_lines()) + } + + if p.optdepends.len > 0 { + desc += format_entry('OPTDEPENDS', p.optdepends.join_lines()) + } + + if p.makedepends.len > 0 { + desc += format_entry('MAKEDEPENDS', p.makedepends.join_lines()) + } + + if p.checkdepends.len > 0 { + desc += format_entry('CHECKDEPENDS', p.checkdepends.join_lines()) + } + + return '$desc\n' +} + +// to_files returns a files file valid string representation +pub fn (pkg &Pkg) to_files() string { + return '%FILES%\n$pkg.files.join_lines()\n' +} diff --git a/src/package/package.v b/src/package/package.v index aadf6f2..4518ffd 100644 --- a/src/package/package.v +++ b/src/package/package.v @@ -174,104 +174,3 @@ pub fn read_pkg_archive(pkg_path string) !Pkg { compression: compression_code } } - -// format_entry returns a string properly formatted to be added to a desc file. -fn format_entry(key string, value string) string { - return '\n%$key%\n$value\n' -} - -// full_name returns the properly formatted name for the package, including -// version & architecture -pub fn (pkg &Pkg) full_name() string { - p := pkg.info - return '$p.name-$p.version-$p.arch' -} - -// filename returns the correct filename of the package file -pub fn (pkg &Pkg) filename() string { - ext := match pkg.compression { - 0 { '.tar' } - 1 { '.tar.gz' } - 6 { '.tar.xz' } - 14 { '.tar.zst' } - else { panic("Another compression code shouldn't be possible. Faulty code: $pkg.compression") } - } - - return '${pkg.full_name()}.pkg$ext' -} - -// to_desc returns a desc file valid string representation -pub fn (pkg &Pkg) to_desc() !string { - p := pkg.info - - // filename - mut desc := '%FILENAME%\n$pkg.filename()\n' - - desc += format_entry('NAME', p.name) - desc += format_entry('BASE', p.base) - desc += format_entry('VERSION', p.version) - - if p.description.len > 0 { - desc += format_entry('DESC', p.description) - } - - if p.groups.len > 0 { - desc += format_entry('GROUPS', p.groups.join_lines()) - } - - desc += format_entry('CSIZE', p.csize.str()) - desc += format_entry('ISIZE', p.size.str()) - - sha256sum := pkg.checksum()! - - desc += format_entry('SHA256SUM', sha256sum) - - // TODO add pgpsig stuff - - if p.url.len > 0 { - desc += format_entry('URL', p.url) - } - - if p.licenses.len > 0 { - desc += format_entry('LICENSE', p.licenses.join_lines()) - } - - desc += format_entry('ARCH', p.arch) - desc += format_entry('BUILDDATE', p.build_date.str()) - desc += format_entry('PACKAGER', p.packager) - - if p.replaces.len > 0 { - desc += format_entry('REPLACES', p.replaces.join_lines()) - } - - if p.conflicts.len > 0 { - desc += format_entry('CONFLICTS', p.conflicts.join_lines()) - } - - if p.provides.len > 0 { - desc += format_entry('PROVIDES', p.provides.join_lines()) - } - - if p.depends.len > 0 { - desc += format_entry('DEPENDS', p.depends.join_lines()) - } - - if p.optdepends.len > 0 { - desc += format_entry('OPTDEPENDS', p.optdepends.join_lines()) - } - - if p.makedepends.len > 0 { - desc += format_entry('MAKEDEPENDS', p.makedepends.join_lines()) - } - - if p.checkdepends.len > 0 { - desc += format_entry('CHECKDEPENDS', p.checkdepends.join_lines()) - } - - return '$desc\n' -} - -// to_files returns a files file valid string representation -pub fn (pkg &Pkg) to_files() string { - return '%FILES%\n$pkg.files.join_lines()\n' -} From 54f40b76385ad02e970114380f4d9a316420e7ba Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Mon, 12 Sep 2022 14:30:17 +0200 Subject: [PATCH 2/3] chore(repo): added readme --- src/repo/README.md | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 src/repo/README.md diff --git a/src/repo/README.md b/src/repo/README.md new file mode 100644 index 0000000..f06b1d3 --- /dev/null +++ b/src/repo/README.md @@ -0,0 +1,43 @@ +# repo + +This module manages the contents of the various repositories stored within a +Vieter instance. + +## Terminology + +* Arch-repository (arch-repo): specific architecture of a given repository. This is what + Pacman actually uses as a repository, and contains its own `.db` & `.files` + files. +* Repository (repo): a collection of arch-repositories. A single repository can + contain packages of different architectures, with each package being stored + in that specific architecture' arch-repository. +* Repository group (repo-group): a collection of repositories. Each Vieter + instance consists of a single repository group, which manages all underlying + repositories & arch-repositories. + +## Arch-repository layout + +An arch-repository (aka a regular Pacman repository) consists of a directory +with the following files (`{repo}` should be replaced with the name of the +repository): + +* One or more package directories. These directories follow the naming scheme + `${pkgname}-${pkgver}-${pkgrel}`. Each of these directories contains two + files, `desc` & `files`. The `desc` file is a list of the package's metadata, + while `files` contains a list of all files that the package contains. The + latter is used when using `pacman -F`. +* `{repo}.db` & `{repo}.db.tar.gz`: the database file of the repository. This + is just a compressed tarball of all package directories, but only their + `desc` files. Both these files should have the same content (`repo-add` + creates a symlink, but Vieter just serves the same file for both routes) +* `{repo}.files` & `{repo}.files.tar.gz`: the same as the `.db` file, but this + also contains the `files` files, instead of just the `desc` files. + +## Filesystem layout + +The repository part of Vieter consists of two directories. One is the `repos` +directory inside the configured `data_dir`, while the other is the configured +`pkg_dir`. `repos` contains only the repository group, while `pkg_dir` contains +the actual package archives. `pkg_dir` is the directory that can take up a +significant amount of memory, while `repos` solely consists of small text +files. From 71c77e90bcfa2390ed5f92d19ee3d70d065f4dad Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Tue, 4 Oct 2022 16:45:28 +0200 Subject: [PATCH 3/3] refactor(cron): expression parser now uses bitfields (closes #148) --- CHANGELOG.md | 1 + src/cron/expression/expression.v | 139 ------------------- src/cron/expression/expression_parse.v | 146 ++++++++++++++++++++ src/cron/expression/expression_parse_test.v | 39 ++---- 4 files changed, 162 insertions(+), 163 deletions(-) create mode 100644 src/cron/expression/expression_parse.v diff --git a/CHANGELOG.md b/CHANGELOG.md index a550524..d2dd760 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed * Migrated codebase to V 0.3.2 +* Cron expression parser now uses bitfields instead of bool arrays ### Fixed diff --git a/src/cron/expression/expression.v b/src/cron/expression/expression.v index 438805d..c3ff8c5 100644 --- a/src/cron/expression/expression.v +++ b/src/cron/expression/expression.v @@ -134,142 +134,3 @@ pub fn (ce &CronExpression) next_n(ref time.Time, n int) ![]time.Time { return times } - -// parse_range parses a given string into a range of sorted integers, if -// possible. -fn parse_range(s string, min int, max int, mut bitv []bool) ! { - mut start := min - mut end := max - mut interval := 1 - - exps := s.split('/') - - if exps.len > 2 { - return error('Invalid expression.') - } - - if exps[0] != '*' { - dash_parts := exps[0].split('-') - - if dash_parts.len > 2 { - return error('Invalid expression.') - } - - start = dash_parts[0].int() - - // The builtin parsing functions return zero if the string can't be - // parsed into a number, so we have to explicitely check whether they - // actually entered zero or if it's an invalid number. - if start == 0 && dash_parts[0] != '0' { - return error('Invalid number.') - } - - // Check whether the start value is out of range - if start < min || start > max { - return error('Out of range.') - } - - if dash_parts.len == 2 { - end = dash_parts[1].int() - - if end == 0 && dash_parts[1] != '0' { - return error('Invalid number.') - } - - if end < start || end > max { - return error('Out of range.') - } - } - } - - if exps.len > 1 { - interval = exps[1].int() - - // interval being zero is always invalid, but we want to check why - // it's invalid for better error messages. - if interval == 0 { - if exps[1] != '0' { - return error('Invalid number.') - } else { - return error('Step size zero not allowed.') - } - } - - if interval > max - min { - return error('Step size too large.') - } - } - // Here, s solely consists of a number, so that's the only value we - // should return. - else if exps[0] != '*' && !exps[0].contains('-') { - bitv[start - min] = true - return - } - - for start <= end { - bitv[start - min] = true - start += interval - } -} - -// bitv_to_ints converts a bit vector into an array containing the -// corresponding values. -fn bitv_to_ints(bitv []bool, min int) []int { - mut out := []int{} - - for i in 0 .. bitv.len { - if bitv[i] { - out << min + i - } - } - - return out -} - -// parse_part parses a given part of a cron expression & returns the -// corresponding array of ints. -fn parse_part(s string, min int, max int) ![]int { - mut bitv := []bool{len: max - min + 1, init: false} - - for range in s.split(',') { - parse_range(range, min, max, mut bitv)! - } - - return bitv_to_ints(bitv, min) -} - -// parse_expression parses an entire cron expression string into a -// CronExpression object, if possible. -pub fn parse_expression(exp string) !CronExpression { - // The filter allows for multiple spaces between parts - mut parts := exp.split(' ').filter(it != '') - - if parts.len < 2 || parts.len > 4 { - return error('Expression must contain between 2 and 4 space-separated parts.') - } - - // For ease of use, we allow the user to only specify as many parts as they - // need. - for parts.len < 4 { - parts << '*' - } - - mut part_results := [][]int{} - - mins := [0, 0, 1, 1] - maxs := [59, 23, 31, 12] - - // This for loop allows us to more clearly propagate the error to the user. - for i, min in mins { - part_results << parse_part(parts[i], min, maxs[i]) or { - return error('An error occurred with part $i: $err.msg()') - } - } - - return CronExpression{ - minutes: part_results[0] - hours: part_results[1] - days: part_results[2] - months: part_results[3] - } -} diff --git a/src/cron/expression/expression_parse.v b/src/cron/expression/expression_parse.v new file mode 100644 index 0000000..4aaec5b --- /dev/null +++ b/src/cron/expression/expression_parse.v @@ -0,0 +1,146 @@ +module expression + +import bitfield + +// parse_range parses a given string into a range of sorted integers. Its +// result is a BitField with set bits for all numbers in the result. +fn parse_range(s string, min int, max int) !bitfield.BitField { + mut start := min + mut end := max + mut interval := 1 + mut bf := bitfield.new(max - min + 1) + + exps := s.split('/') + + if exps.len > 2 { + return error('Invalid expression.') + } + + if exps[0] != '*' { + dash_parts := exps[0].split('-') + + if dash_parts.len > 2 { + return error('Invalid expression.') + } + + start = dash_parts[0].int() + + // The builtin parsing functions return zero if the string can't be + // parsed into a number, so we have to explicitely check whether they + // actually entered zero or if it's an invalid number. + if start == 0 && dash_parts[0] != '0' { + return error('Invalid number.') + } + + // Check whether the start value is out of range + if start < min || start > max { + return error('Out of range.') + } + + if dash_parts.len == 2 { + end = dash_parts[1].int() + + if end == 0 && dash_parts[1] != '0' { + return error('Invalid number.') + } + + if end < start || end > max { + return error('Out of range.') + } + } + } + + if exps.len > 1 { + interval = exps[1].int() + + // interval being zero is always invalid, but we want to check why + // it's invalid for better error messages. + if interval == 0 { + if exps[1] != '0' { + return error('Invalid number.') + } else { + return error('Step size zero not allowed.') + } + } + + if interval > max - min { + return error('Step size too large.') + } + } + // Here, s solely consists of a number, so that's the only value we + // should return. + else if exps[0] != '*' && !exps[0].contains('-') { + bf.set_bit(start - min) + return bf + } + + for start <= end { + bf.set_bit(start - min) + start += interval + } + + return bf +} + +// bf_to_ints takes a BitField and converts it into the expected list of actual +// integers. +fn bf_to_ints(bf bitfield.BitField, min int) []int { + mut out := []int{} + + for i in 0 .. bf.get_size() { + if bf.get_bit(i) == 1 { + out << min + i + } + } + + return out +} + +// parse_part parses a given part of a cron expression & returns the +// corresponding array of ints. +fn parse_part(s string, min int, max int) ![]int { + mut bf := bitfield.new(max - min + 1) + + for range in s.split(',') { + bf2 := parse_range(range, min, max)! + bf = bitfield.bf_or(bf, bf2) + } + + return bf_to_ints(bf, min) +} + +// parse_expression parses an entire cron expression string into a +// CronExpression object, if possible. +pub fn parse_expression(exp string) !CronExpression { + // The filter allows for multiple spaces between parts + mut parts := exp.split(' ').filter(it != '') + + if parts.len < 2 || parts.len > 4 { + return error('Expression must contain between 2 and 4 space-separated parts.') + } + + // For ease of use, we allow the user to only specify as many parts as they + // need. + for parts.len < 4 { + parts << '*' + } + + mut part_results := [][]int{} + + mins := [0, 0, 1, 1] + maxs := [59, 23, 31, 12] + + // This for loop allows us to more clearly propagate the error to the user. + for i, min in mins { + part_results << parse_part(parts[i], min, maxs[i]) or { + return error('An error occurred with part $i: $err.msg()') + } + } + + return CronExpression{ + minutes: part_results[0] + hours: part_results[1] + days: part_results[2] + months: part_results[3] + } +} diff --git a/src/cron/expression/expression_parse_test.v b/src/cron/expression/expression_parse_test.v index 5c12329..92e8291 100644 --- a/src/cron/expression/expression_parse_test.v +++ b/src/cron/expression/expression_parse_test.v @@ -3,26 +3,22 @@ module expression // parse_range_error returns the returned error message. If the result is '', // that means the function didn't error. fn parse_range_error(s string, min int, max int) string { - mut bitv := []bool{len: max - min + 1, init: false} - - parse_range(s, min, max, mut bitv) or { return err.msg } + parse_range(s, min, max) or { return err.msg } return '' } // =====parse_range===== fn test_range_star_range() ! { - mut bitv := []bool{len: 6, init: false} - parse_range('*', 0, 5, mut bitv)! + bf := parse_range('*', 0, 5)! - assert bitv == [true, true, true, true, true, true] + assert bf_to_ints(bf, 0) == [0, 1, 2, 3, 4, 5] } fn test_range_number() ! { - mut bitv := []bool{len: 6, init: false} - parse_range('4', 0, 5, mut bitv)! + bf := parse_range('4', 0, 5)! - assert bitv_to_ints(bitv, 0) == [4] + assert bf_to_ints(bf, 0) == [4] } fn test_range_number_too_large() ! { @@ -38,17 +34,15 @@ fn test_range_number_invalid() ! { } fn test_range_step_star_1() ! { - mut bitv := []bool{len: 21, init: false} - parse_range('*/4', 0, 20, mut bitv)! + bf := parse_range('*/4', 0, 20)! - assert bitv_to_ints(bitv, 0) == [0, 4, 8, 12, 16, 20] + assert bf_to_ints(bf, 0) == [0, 4, 8, 12, 16, 20] } fn test_range_step_star_2() ! { - mut bitv := []bool{len: 8, init: false} - parse_range('*/3', 1, 8, mut bitv)! + bf := parse_range('*/3', 1, 8)! - assert bitv_to_ints(bitv, 1) == [1, 4, 7] + assert bf_to_ints(bf, 1) == [1, 4, 7] } fn test_range_step_star_too_large() ! { @@ -60,10 +54,9 @@ fn test_range_step_zero() ! { } fn test_range_step_number() ! { - mut bitv := []bool{len: 21, init: false} - parse_range('5/4', 2, 22, mut bitv)! + bf := parse_range('5/4', 2, 22)! - assert bitv_to_ints(bitv, 2) == [5, 9, 13, 17, 21] + assert bf_to_ints(bf, 2) == [5, 9, 13, 17, 21] } fn test_range_step_number_too_large() ! { @@ -75,17 +68,15 @@ fn test_range_step_number_too_small() ! { } fn test_range_dash() ! { - mut bitv := []bool{len: 10, init: false} - parse_range('4-8', 0, 9, mut bitv)! + bf := parse_range('4-8', 0, 9)! - assert bitv_to_ints(bitv, 0) == [4, 5, 6, 7, 8] + assert bf_to_ints(bf, 0) == [4, 5, 6, 7, 8] } fn test_range_dash_step() ! { - mut bitv := []bool{len: 10, init: false} - parse_range('4-8/2', 0, 9, mut bitv)! + bf := parse_range('4-8/2', 0, 9)! - assert bitv_to_ints(bitv, 0) == [4, 6, 8] + assert bf_to_ints(bf, 0) == [4, 6, 8] } // =====parse_part=====