Merge pull request 'refactoring for fun' (#275) from Chewing_Bever/vieter:refactor-zen into dev

Reviewed-on: #275
2022-11-19 17:12:34 +01:00 · 2022-11-19 17:12:34 +01:00 · 9e11237ff9
parent 3636dd92db 71c77e90bc
commit 9e11237ff9
8 changed files with 313 additions and 264 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Changed

 * Migrated codebase to V 0.3.2
+* Cron expression parser now uses bitfields instead of bool arrays

 ### Fixed

--- a/src/cron/expression/expression.v
+++ b/src/cron/expression/expression.v
@ -134,142 +134,3 @@ pub fn (ce &CronExpression) next_n(ref time.Time, n int) ![]time.Time {

 	return times
 }
-
-// parse_range parses a given string into a range of sorted integers, if
-// possible.
-fn parse_range(s string, min int, max int, mut bitv []bool) ! {
-	mut start := min
-	mut end := max
-	mut interval := 1
-
-	exps := s.split('/')
-
-	if exps.len > 2 {
-		return error('Invalid expression.')
-	}
-
-	if exps[0] != '*' {
-		dash_parts := exps[0].split('-')
-
-		if dash_parts.len > 2 {
-			return error('Invalid expression.')
-		}
-
-		start = dash_parts[0].int()
-
-		// The builtin parsing functions return zero if the string can't be
-		// parsed into a number, so we have to explicitely check whether they
-		// actually entered zero or if it's an invalid number.
-		if start == 0 && dash_parts[0] != '0' {
-			return error('Invalid number.')
-		}
-
-		// Check whether the start value is out of range
-		if start < min || start > max {
-			return error('Out of range.')
-		}
-
-		if dash_parts.len == 2 {
-			end = dash_parts[1].int()
-
-			if end == 0 && dash_parts[1] != '0' {
-				return error('Invalid number.')
-			}
-
-			if end < start || end > max {
-				return error('Out of range.')
-			}
-		}
-	}
-
-	if exps.len > 1 {
-		interval = exps[1].int()
-
-		// interval being zero is always invalid, but we want to check why
-		// it's invalid for better error messages.
-		if interval == 0 {
-			if exps[1] != '0' {
-				return error('Invalid number.')
-			} else {
-				return error('Step size zero not allowed.')
-			}
-		}
-
-		if interval > max - min {
-			return error('Step size too large.')
-		}
-	}
-	// Here, s solely consists of a number, so that's the only value we
-	// should return.
-	else if exps[0] != '*' && !exps[0].contains('-') {
-		bitv[start - min] = true
-		return
-	}
-
-	for start <= end {
-		bitv[start - min] = true
-		start += interval
-	}
-}
-
-// bitv_to_ints converts a bit vector into an array containing the
-// corresponding values.
-fn bitv_to_ints(bitv []bool, min int) []int {
-	mut out := []int{}
-
-	for i in 0 .. bitv.len {
-		if bitv[i] {
-			out << min + i
-		}
-	}
-
-	return out
-}
-
-// parse_part parses a given part of a cron expression & returns the
-// corresponding array of ints.
-fn parse_part(s string, min int, max int) ![]int {
-	mut bitv := []bool{len: max - min + 1, init: false}
-
-	for range in s.split(',') {
-		parse_range(range, min, max, mut bitv)!
-	}
-
-	return bitv_to_ints(bitv, min)
-}
-
-// parse_expression parses an entire cron expression string into a
-// CronExpression object, if possible.
-pub fn parse_expression(exp string) !CronExpression {
-	// The filter allows for multiple spaces between parts
-	mut parts := exp.split(' ').filter(it != '')
-
-	if parts.len < 2 || parts.len > 4 {
-		return error('Expression must contain between 2 and 4 space-separated parts.')
-	}
-
-	// For ease of use, we allow the user to only specify as many parts as they
-	// need.
-	for parts.len < 4 {
-		parts << '*'
-	}
-
-	mut part_results := [][]int{}
-
-	mins := [0, 0, 1, 1]
-	maxs := [59, 23, 31, 12]
-
-	// This for loop allows us to more clearly propagate the error to the user.
-	for i, min in mins {
-		part_results << parse_part(parts[i], min, maxs[i]) or {
-			return error('An error occurred with part $i: $err.msg()')
-		}
-	}
-
-	return CronExpression{
-		minutes: part_results[0]
-		hours: part_results[1]
-		days: part_results[2]
-		months: part_results[3]
-	}
-}
--- a/src/cron/expression/expression_parse.v
+++ b/src/cron/expression/expression_parse.v
@ -0,0 +1,146 @@
+module expression
+
+import bitfield
+
+// parse_range parses a given string into a range of sorted integers. Its
+// result is a BitField with set bits for all numbers in the result.
+fn parse_range(s string, min int, max int) !bitfield.BitField {
+	mut start := min
+	mut end := max
+	mut interval := 1
+	mut bf := bitfield.new(max - min + 1)
+
+	exps := s.split('/')
+
+	if exps.len > 2 {
+		return error('Invalid expression.')
+	}
+
+	if exps[0] != '*' {
+		dash_parts := exps[0].split('-')
+
+		if dash_parts.len > 2 {
+			return error('Invalid expression.')
+		}
+
+		start = dash_parts[0].int()
+
+		// The builtin parsing functions return zero if the string can't be
+		// parsed into a number, so we have to explicitely check whether they
+		// actually entered zero or if it's an invalid number.
+		if start == 0 && dash_parts[0] != '0' {
+			return error('Invalid number.')
+		}
+
+		// Check whether the start value is out of range
+		if start < min || start > max {
+			return error('Out of range.')
+		}
+
+		if dash_parts.len == 2 {
+			end = dash_parts[1].int()
+
+			if end == 0 && dash_parts[1] != '0' {
+				return error('Invalid number.')
+			}
+
+			if end < start || end > max {
+				return error('Out of range.')
+			}
+		}
+	}
+
+	if exps.len > 1 {
+		interval = exps[1].int()
+
+		// interval being zero is always invalid, but we want to check why
+		// it's invalid for better error messages.
+		if interval == 0 {
+			if exps[1] != '0' {
+				return error('Invalid number.')
+			} else {
+				return error('Step size zero not allowed.')
+			}
+		}
+
+		if interval > max - min {
+			return error('Step size too large.')
+		}
+	}
+	// Here, s solely consists of a number, so that's the only value we
+	// should return.
+	else if exps[0] != '*' && !exps[0].contains('-') {
+		bf.set_bit(start - min)
+		return bf
+	}
+
+	for start <= end {
+		bf.set_bit(start - min)
+		start += interval
+	}
+
+	return bf
+}
+
+// bf_to_ints takes a BitField and converts it into the expected list of actual
+// integers.
+fn bf_to_ints(bf bitfield.BitField, min int) []int {
+	mut out := []int{}
+
+	for i in 0 .. bf.get_size() {
+		if bf.get_bit(i) == 1 {
+			out << min + i
+		}
+	}
+
+	return out
+}
+
+// parse_part parses a given part of a cron expression & returns the
+// corresponding array of ints.
+fn parse_part(s string, min int, max int) ![]int {
+	mut bf := bitfield.new(max - min + 1)
+
+	for range in s.split(',') {
+		bf2 := parse_range(range, min, max)!
+		bf = bitfield.bf_or(bf, bf2)
+	}
+
+	return bf_to_ints(bf, min)
+}
+
+// parse_expression parses an entire cron expression string into a
+// CronExpression object, if possible.
+pub fn parse_expression(exp string) !CronExpression {
+	// The filter allows for multiple spaces between parts
+	mut parts := exp.split(' ').filter(it != '')
+
+	if parts.len < 2 || parts.len > 4 {
+		return error('Expression must contain between 2 and 4 space-separated parts.')
+	}
+
+	// For ease of use, we allow the user to only specify as many parts as they
+	// need.
+	for parts.len < 4 {
+		parts << '*'
+	}
+
+	mut part_results := [][]int{}
+
+	mins := [0, 0, 1, 1]
+	maxs := [59, 23, 31, 12]
+
+	// This for loop allows us to more clearly propagate the error to the user.
+	for i, min in mins {
+		part_results << parse_part(parts[i], min, maxs[i]) or {
+			return error('An error occurred with part $i: $err.msg()')
+		}
+	}
+
+	return CronExpression{
+		minutes: part_results[0]
+		hours: part_results[1]
+		days: part_results[2]
+		months: part_results[3]
+	}
+}
--- a/src/cron/expression/expression_parse_test.v
+++ b/src/cron/expression/expression_parse_test.v
@ -3,26 +3,22 @@ module expression
 // parse_range_error returns the returned error message. If the result is '',
 // that means the function didn't error.
 fn parse_range_error(s string, min int, max int) string {
-	mut bitv := []bool{len: max - min + 1, init: false}
-
-	parse_range(s, min, max, mut bitv) or { return err.msg }
+	parse_range(s, min, max) or { return err.msg }

 	return ''
 }

 // =====parse_range=====
 fn test_range_star_range() ! {
-	mut bitv := []bool{len: 6, init: false}
-	parse_range('*', 0, 5, mut bitv)!
+	bf := parse_range('*', 0, 5)!

-	assert bitv == [true, true, true, true, true, true]
+	assert bf_to_ints(bf, 0) == [0, 1, 2, 3, 4, 5]
 }

 fn test_range_number() ! {
-	mut bitv := []bool{len: 6, init: false}
-	parse_range('4', 0, 5, mut bitv)!
+	bf := parse_range('4', 0, 5)!

-	assert bitv_to_ints(bitv, 0) == [4]
+	assert bf_to_ints(bf, 0) == [4]
 }

 fn test_range_number_too_large() ! {
@ -38,17 +34,15 @@ fn test_range_number_invalid() ! {
 }

 fn test_range_step_star_1() ! {
-	mut bitv := []bool{len: 21, init: false}
-	parse_range('*/4', 0, 20, mut bitv)!
+	bf := parse_range('*/4', 0, 20)!

-	assert bitv_to_ints(bitv, 0) == [0, 4, 8, 12, 16, 20]
+	assert bf_to_ints(bf, 0) == [0, 4, 8, 12, 16, 20]
 }

 fn test_range_step_star_2() ! {
-	mut bitv := []bool{len: 8, init: false}
-	parse_range('*/3', 1, 8, mut bitv)!
+	bf := parse_range('*/3', 1, 8)!

-	assert bitv_to_ints(bitv, 1) == [1, 4, 7]
+	assert bf_to_ints(bf, 1) == [1, 4, 7]
 }

 fn test_range_step_star_too_large() ! {
@ -60,10 +54,9 @@ fn test_range_step_zero() ! {
 }

 fn test_range_step_number() ! {
-	mut bitv := []bool{len: 21, init: false}
-	parse_range('5/4', 2, 22, mut bitv)!
+	bf := parse_range('5/4', 2, 22)!

-	assert bitv_to_ints(bitv, 2) == [5, 9, 13, 17, 21]
+	assert bf_to_ints(bf, 2) == [5, 9, 13, 17, 21]
 }

 fn test_range_step_number_too_large() ! {
@ -75,17 +68,15 @@ fn test_range_step_number_too_small() ! {
 }

 fn test_range_dash() ! {
-	mut bitv := []bool{len: 10, init: false}
-	parse_range('4-8', 0, 9, mut bitv)!
+	bf := parse_range('4-8', 0, 9)!

-	assert bitv_to_ints(bitv, 0) == [4, 5, 6, 7, 8]
+	assert bf_to_ints(bf, 0) == [4, 5, 6, 7, 8]
 }

 fn test_range_dash_step() ! {
-	mut bitv := []bool{len: 10, init: false}
-	parse_range('4-8/2', 0, 9, mut bitv)!
+	bf := parse_range('4-8/2', 0, 9)!

-	assert bitv_to_ints(bitv, 0) == [4, 6, 8]
+	assert bf_to_ints(bf, 0) == [4, 6, 8]
 }

 // =====parse_part=====
--- a/src/package/README.md
+++ b/src/package/README.md
@ -0,0 +1,5 @@
+# package
+
+This module handles both parsing the published Arch tarballs & the contents of
+their `.PKGINFO` files, as well as generating the contents of the database
+archives' `desc` & `files` files.
--- a/src/package/format.v
+++ b/src/package/format.v
@ -0,0 +1,103 @@
+module package
+
+// format_entry returns a string properly formatted to be added to a desc file.
+[inline]
+fn format_entry(key string, value string) string {
+	return '\n%$key%\n$value\n'
+}
+
+// full_name returns the properly formatted name for the package, including
+// version & architecture
+pub fn (pkg &Pkg) full_name() string {
+	p := pkg.info
+	return '$p.name-$p.version-$p.arch'
+}
+
+// filename returns the correct filename of the package file
+pub fn (pkg &Pkg) filename() string {
+	ext := match pkg.compression {
+		0 { '.tar' }
+		1 { '.tar.gz' }
+		6 { '.tar.xz' }
+		14 { '.tar.zst' }
+		else { panic("Another compression code shouldn't be possible. Faulty code: $pkg.compression") }
+	}
+
+	return '${pkg.full_name()}.pkg$ext'
+}
+
+// to_desc returns a desc file valid string representation
+pub fn (pkg &Pkg) to_desc() !string {
+	p := pkg.info
+
+	// filename
+	mut desc := '%FILENAME%\n$pkg.filename()\n'
+
+	desc += format_entry('NAME', p.name)
+	desc += format_entry('BASE', p.base)
+	desc += format_entry('VERSION', p.version)
+
+	if p.description.len > 0 {
+		desc += format_entry('DESC', p.description)
+	}
+
+	if p.groups.len > 0 {
+		desc += format_entry('GROUPS', p.groups.join_lines())
+	}
+
+	desc += format_entry('CSIZE', p.csize.str())
+	desc += format_entry('ISIZE', p.size.str())
+
+	sha256sum := pkg.checksum()!
+
+	desc += format_entry('SHA256SUM', sha256sum)
+
+	// TODO add pgpsig stuff
+
+	if p.url.len > 0 {
+		desc += format_entry('URL', p.url)
+	}
+
+	if p.licenses.len > 0 {
+		desc += format_entry('LICENSE', p.licenses.join_lines())
+	}
+
+	desc += format_entry('ARCH', p.arch)
+	desc += format_entry('BUILDDATE', p.build_date.str())
+	desc += format_entry('PACKAGER', p.packager)
+
+	if p.replaces.len > 0 {
+		desc += format_entry('REPLACES', p.replaces.join_lines())
+	}
+
+	if p.conflicts.len > 0 {
+		desc += format_entry('CONFLICTS', p.conflicts.join_lines())
+	}
+
+	if p.provides.len > 0 {
+		desc += format_entry('PROVIDES', p.provides.join_lines())
+	}
+
+	if p.depends.len > 0 {
+		desc += format_entry('DEPENDS', p.depends.join_lines())
+	}
+
+	if p.optdepends.len > 0 {
+		desc += format_entry('OPTDEPENDS', p.optdepends.join_lines())
+	}
+
+	if p.makedepends.len > 0 {
+		desc += format_entry('MAKEDEPENDS', p.makedepends.join_lines())
+	}
+
+	if p.checkdepends.len > 0 {
+		desc += format_entry('CHECKDEPENDS', p.checkdepends.join_lines())
+	}
+
+	return '$desc\n'
+}
+
+// to_files returns a files file valid string representation
+pub fn (pkg &Pkg) to_files() string {
+	return '%FILES%\n$pkg.files.join_lines()\n'
+}
--- a/src/package/package.v
+++ b/src/package/package.v
@ -174,104 +174,3 @@ pub fn read_pkg_archive(pkg_path string) !Pkg {
 		compression: compression_code
 	}
 }
-
-// format_entry returns a string properly formatted to be added to a desc file.
-fn format_entry(key string, value string) string {
-	return '\n%$key%\n$value\n'
-}
-
-// full_name returns the properly formatted name for the package, including
-// version & architecture
-pub fn (pkg &Pkg) full_name() string {
-	p := pkg.info
-	return '$p.name-$p.version-$p.arch'
-}
-
-// filename returns the correct filename of the package file
-pub fn (pkg &Pkg) filename() string {
-	ext := match pkg.compression {
-		0 { '.tar' }
-		1 { '.tar.gz' }
-		6 { '.tar.xz' }
-		14 { '.tar.zst' }
-		else { panic("Another compression code shouldn't be possible. Faulty code: $pkg.compression") }
-	}
-
-	return '${pkg.full_name()}.pkg$ext'
-}
-
-// to_desc returns a desc file valid string representation
-pub fn (pkg &Pkg) to_desc() !string {
-	p := pkg.info
-
-	// filename
-	mut desc := '%FILENAME%\n$pkg.filename()\n'
-
-	desc += format_entry('NAME', p.name)
-	desc += format_entry('BASE', p.base)
-	desc += format_entry('VERSION', p.version)
-
-	if p.description.len > 0 {
-		desc += format_entry('DESC', p.description)
-	}
-
-	if p.groups.len > 0 {
-		desc += format_entry('GROUPS', p.groups.join_lines())
-	}
-
-	desc += format_entry('CSIZE', p.csize.str())
-	desc += format_entry('ISIZE', p.size.str())
-
-	sha256sum := pkg.checksum()!
-
-	desc += format_entry('SHA256SUM', sha256sum)
-
-	// TODO add pgpsig stuff
-
-	if p.url.len > 0 {
-		desc += format_entry('URL', p.url)
-	}
-
-	if p.licenses.len > 0 {
-		desc += format_entry('LICENSE', p.licenses.join_lines())
-	}
-
-	desc += format_entry('ARCH', p.arch)
-	desc += format_entry('BUILDDATE', p.build_date.str())
-	desc += format_entry('PACKAGER', p.packager)
-
-	if p.replaces.len > 0 {
-		desc += format_entry('REPLACES', p.replaces.join_lines())
-	}
-
-	if p.conflicts.len > 0 {
-		desc += format_entry('CONFLICTS', p.conflicts.join_lines())
-	}
-
-	if p.provides.len > 0 {
-		desc += format_entry('PROVIDES', p.provides.join_lines())
-	}
-
-	if p.depends.len > 0 {
-		desc += format_entry('DEPENDS', p.depends.join_lines())
-	}
-
-	if p.optdepends.len > 0 {
-		desc += format_entry('OPTDEPENDS', p.optdepends.join_lines())
-	}
-
-	if p.makedepends.len > 0 {
-		desc += format_entry('MAKEDEPENDS', p.makedepends.join_lines())
-	}
-
-	if p.checkdepends.len > 0 {
-		desc += format_entry('CHECKDEPENDS', p.checkdepends.join_lines())
-	}
-
-	return '$desc\n'
-}
-
-// to_files returns a files file valid string representation
-pub fn (pkg &Pkg) to_files() string {
-	return '%FILES%\n$pkg.files.join_lines()\n'
-}
--- a/src/repo/README.md
+++ b/src/repo/README.md
@ -0,0 +1,43 @@
+# repo
+
+This module manages the contents of the various repositories stored within a
+Vieter instance.
+
+## Terminology
+
+* Arch-repository (arch-repo): specific architecture of a given repository. This is what
+  Pacman actually uses as a repository, and contains its own `.db` & `.files`
+  files.
+* Repository (repo): a collection of arch-repositories. A single repository can
+  contain packages of different architectures, with each package being stored
+  in that specific architecture' arch-repository.
+* Repository group (repo-group): a collection of repositories. Each Vieter
+  instance consists of a single repository group, which manages all underlying
+  repositories & arch-repositories.
+
+## Arch-repository layout
+
+An arch-repository (aka a regular Pacman repository) consists of a directory
+with the following files (`{repo}` should be replaced with the name of the
+repository):
+
+* One or more package directories. These directories follow the naming scheme
+  `${pkgname}-${pkgver}-${pkgrel}`. Each of these directories contains two
+  files, `desc` & `files`. The `desc` file is a list of the package's metadata,
+  while `files` contains a list of all files that the package contains. The
+  latter is used when using `pacman -F`.
+* `{repo}.db` & `{repo}.db.tar.gz`: the database file of the repository. This
+  is just a compressed tarball of all package directories, but only their
+  `desc` files. Both these files should have the same content (`repo-add`
+  creates a symlink, but Vieter just serves the same file for both routes)
+* `{repo}.files` & `{repo}.files.tar.gz`: the same as the `.db` file, but this
+  also contains the `files` files, instead of just the `desc` files.
+
+## Filesystem layout
+
+The repository part of Vieter consists of two directories. One is the `repos`
+directory inside the configured `data_dir`, while the other is the configured
+`pkg_dir`. `repos` contains only the repository group, while `pkg_dir` contains
+the actual package archives. `pkg_dir` is the directory that can take up a
+significant amount of memory, while `repos` solely consists of small text
+files.