feat(cron): some bug fixes & formatting

2023-01-14 15:03:11 +01:00 · 2023-01-14 15:03:11 +01:00 · dce00bfab6
parent c2e6d168e5
commit dce00bfab6
5 changed files with 266 additions and 238 deletions
--- a/.editorconfig
+++ b/.editorconfig
@ -5,6 +5,5 @@ root = true
 end_of_line = lf
 insert_final_newline = true

-[*.v]
-# vfmt wants it :(
+[*.{v,c,h}]
 indent_style = tab
--- a/src/cron/expression/c/expression.c
+++ b/src/cron/expression/c/expression.c
@ -4,15 +4,15 @@
 const uint8_t month_days[] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};

 struct cron_expression *ce_init() {
-    return malloc(sizeof(struct cron_expression));
+	return malloc(sizeof(struct cron_expression));
 }

 void ce_free(struct cron_expression *ce) {
-    free(ce->months);
-    free(ce->days);
-    free(ce->hours);
-    free(ce->minutes);
-    free(ce);
+	free(ce->months);
+	free(ce->days);
+	free(ce->hours);
+	free(ce->minutes);
+	free(ce);
 }

 int ce_next(struct cron_simple_time *out, struct cron_expression *ce, struct cron_simple_time *ref) {
@ -21,100 +21,101 @@ int ce_next(struct cron_simple_time *out, struct cron_expression *ce, struct cro
 	// means we've looped back around. This means that the "bigger" value has
 	// to be incremented by one. For example, if the minutes have looped
 	// around, that means that the hour has to be incremented as well.
-    uint8_t month_index = 0;
-    uint8_t day_index = 0;
-    uint8_t hour_index = 0;
-    uint8_t minute_index = 0;
+	uint8_t month_index = 0;
+	uint8_t day_index = 0;
+	uint8_t hour_index = 0;
+	uint8_t minute_index = 0;

 	// This chain is the same logic multiple times, namely that if a "bigger"
 	// value loops around, then the smaller value will always reset as well.
 	// For example, if we're going to a new day, the hour & minute will always
 	// be their smallest value again.
-    while (month_index < ce->month_count && ref->month > ce->months[month_index]) {
-        month_index++;
-    }
+	while (month_index < ce->month_count && ref->month > ce->months[month_index]) {
+		month_index++;
+	}

-    if (month_index < ce->month_count && ref->month == ce->months[month_index]) {
-        while (day_index < ce->day_count && ref->day > ce->days[day_index]) {
-            day_index++;
-        }
+	if (month_index < ce->month_count && ref->month == ce->months[month_index]) {
+		while (day_index < ce->day_count && ref->day > ce->days[day_index]) {
+			day_index++;
+		}

-        if (day_index < ce->day_count && ref->day == ce->days[day_index]) {
-            while (hour_index < ce->hour_count && ref->hour > ce->hours[hour_index]) {
-                hour_index++;
-            }
+		if (day_index < ce->day_count && ref->day == ce->days[day_index]) {
+			while (hour_index < ce->hour_count && ref->hour > ce->hours[hour_index]) {
+				hour_index++;
+			}

-            if (hour_index < ce->hour_count && ref->hour == ce->hours[hour_index]) {
+			if (hour_index < ce->hour_count && ref->hour == ce->hours[hour_index]) {
 				// Minute is the only value where we explicitely make sure we
 				// can't match sref's value exactly. This is to ensure we only
 				// return values in the future.
-                while (minute_index < ce->minute_count && ref->minute >= ce->minutes[minute_index]) {
-                    minute_index++;
-                }
-            }
-        }
-    }
+				while (minute_index < ce->minute_count && ref->minute >= ce->minutes[minute_index]) {
+					minute_index++;
+				}
+			}
+		}
+	}

 	// Here, we increment the "bigger" values by one if the smaller ones loop
 	// around. The order is important, as it allows a sort-of waterfall effect
 	// to occur which updates all values if required.
-    if (minute_index == ce->minute_count && hour_index < ce->hour_count) {
-        hour_index++;
-    }
+	if (minute_index == ce->minute_count && hour_index < ce->hour_count) {
+		hour_index++;
+	}

-    if (hour_index == ce->hour_count && day_index < ce->day_count) {
-        day_index++;
-    }
+	if (hour_index == ce->hour_count && day_index < ce->day_count) {
+		day_index++;
+	}

-    if (day_index == ce->day_count && month_index < ce->month_count) {
-        month_index++;
-    }
+	if (day_index == ce->day_count && month_index < ce->month_count) {
+		month_index++;
+	}

-    out->minute = ce->minutes[minute_index % ce->minute_count];
-    out->hour = ce->hours[hour_index % ce->hour_count];
-    out->day = ce->days[day_index % ce->day_count];
+	out->minute = ce->minutes[minute_index % ce->minute_count];
+	out->hour = ce->hours[hour_index % ce->hour_count];
+	out->day = ce->days[day_index % ce->day_count];

 	// Sometimes, we end up with a day that does not exist within the selected
 	// month, e.g. day 30 in February. When this occurs, we reset day back to
 	// the smallest value & loop over to the next month that does have this
 	// day.
-    if (out->day > month_days[ce->months[month_index % ce->month_count] - 1]) {
-        out->day = ce->days[0];
-        month_index++;
+	if (out->day > month_days[ce->months[month_index % ce->month_count] - 1]) {
+		out->day = ce->days[0];
+		month_index++;

-        while (out->day > month_days[ce->months[month_index % ce->month_count] - 1]) {
-            month_index++;
-            
-            if (month_index == 2 * ce->month_count) {
-                return 1;
-            }
-        }
-    }
+		while (out->day > month_days[ce->months[month_index % ce->month_count] - 1]) {
+			month_index++;

-    out->month = ce->months[month_index % ce->month_count];
+			// TODO find out if this can happen
+			if (month_index == 2 * ce->month_count) {
+				return 1;
+			}
+		}
+	}

-    if (month_index >= ce->month_count) {
-        out->year = ref->year + 1;
-    } else {
-        out->year = ref->year;
-    }
+	out->month = ce->months[month_index % ce->month_count];

-    return 0;
+	if (month_index >= ce->month_count) {
+		out->year = ref->year + 1;
+	} else {
+		out->year = ref->year;
+	}
+
+	return 0;
 }

 int ce_next_from_now(struct cron_simple_time *out, struct cron_expression *ce) {
-    time_t t = time(NULL);
-    struct tm gm;
-    gmtime_r(&t, &gm);
+	time_t t = time(NULL);
+	struct tm gm;
+	gmtime_r(&t, &gm);

-    struct cron_simple_time ref = {
-        .year = gm.tm_year,
-        // tm_mon goes from 0 to 11
-        .month = gm.tm_mon + 1,
-        .day = gm.tm_mday,
-        .hour = gm.tm_hour,
-        .minute = gm.tm_min
-    };
+	struct cron_simple_time ref = {
+		.year = gm.tm_year,
+		// tm_mon goes from 0 to 11
+		.month = gm.tm_mon + 1,
+		.day = gm.tm_mday,
+		.hour = gm.tm_hour,
+		.minute = gm.tm_min
+	};

-    return ce_next(out, ce, &ref);
+	return ce_next(out, ce, &ref);
 }
--- a/src/cron/expression/c/expression.h
+++ b/src/cron/expression/c/expression.h
@ -4,29 +4,29 @@
 #include <string.h>

 enum cron_parse_error {
-    CPEParseOk = 0,
-    CPEParseInvalidExpression = 1,
-    CPEParseInvalidNumber = 2,
-    CPEParseOutOfRange = 3
+	CPEParseOk = 0,
+	CPEParseInvalidExpression = 1,
+	CPEParseInvalidNumber = 2,
+	CPEParseOutOfRange = 3
 };

 struct cron_expression {
-    uint8_t *minutes;
-    uint8_t *hours;
-    uint8_t *days;
-    uint8_t *months;
-    uint8_t minute_count;
-    uint8_t hour_count;
-    uint8_t day_count;
-    uint8_t month_count;
+	uint8_t *minutes;
+	uint8_t *hours;
+	uint8_t *days;
+	uint8_t *months;
+	uint8_t minute_count;
+	uint8_t hour_count;
+	uint8_t day_count;
+	uint8_t month_count;
 };

 struct cron_simple_time {
-    int year;
-    int month;
-    int day;
-    int hour;
-    int minute;
+	int year;
+	int month;
+	int day;
+	int hour;
+	int minute;
 };

 struct cron_expression *ce_init();
--- a/src/cron/expression/c/parse.c
+++ b/src/cron/expression/c/parse.c
@ -1,25 +1,28 @@
 #include "expression.h"

+// Allowed value ranges for the minute, hour, day and month field
 const uint8_t min[4] = {0, 0, 1, 1};
 const uint8_t max[4] = {59, 23, 31, 12};

+// Convert a string a uint8_t value by parsing it using atoi and checking
+// whether it's contained within the given range
 #define SAFE_ATOI(v,s,min,max) \
-    int _##v = atoi(s); \
-    if ((_##v) == 0 && strcmp((s), "0") != 0) { \
-        return CPEParseInvalidNumber; \
-    } \
-    if (v < (min) || v > (max)) { \
-        return CPEParseOutOfRange; \
-    } \
-    v = (uint8_t) (_##v);
+	int _##v = atoi(s); \
+	if ((_##v) == 0 && strcmp((s), "0") != 0) { \
+		return CPEParseInvalidNumber; \
+	} \
+	if (v < (min) || v > (max)) { \
+		return CPEParseOutOfRange; \
+	} \
+	v = (uint8_t) (_##v);

 /**
 * Given a range expression, produce a bit field defining what numbers in the
- * min-max range the expression represents. The first bit (starting from the
- * right) corresponds to min, the max - min + 1'th bit to max. All trailing bits
+ * min-max range the expression represents. Bit 0 (starting from the
+ * right) corresponds to min, the bit max - min to max. All trailing bits
 * after this should be ignored. The given bitfield is modified in-place, so
 * multiple calls of this function can be performed on the same value to create
- * the effect of ORing their values:
+ * the effect of ORing their values.
 *
 * A range expression has one of the following forms:
 *
@ -30,185 +33,210 @@ const uint8_t max[4] = {59, 23, 31, 12};
 * - a-b/c
 */
 enum cron_parse_error ce_parse_range(uint64_t *out, char *s, uint8_t min, uint8_t max) {
-    // The * expression means "every possible value"
-    if (s[0] == '*') {
-        // A '*' is only valid on its own
-        if (s[1] != '\0') {
-            return CPEParseInvalidExpression;
-        }
+	// The * expression means "every possible value"
+	if (s[0] == '*') {
+		// A '*' is only valid on its own
+		if (s[1] != '\0') {
+			return CPEParseInvalidExpression;
+		}

-        *out = ~0;
+		*out = ~0;

-        return CPEParseOk;
-    }
+		return CPEParseOk;
+	}

-    size_t slash_index = 0;
-    size_t dash_index = 0;
-    size_t i = 0;
+	size_t slash_index = 0;
+	size_t dash_index = 0;
+	size_t i = 0;

-    // We first iterate over the string to determine whether it contains a slash
-    // and/or a dash. We know the dash can only be valid if it appears before
-    // the slash.
-    while (s[i] != '\0' && slash_index == 0) {
-        if (s[i] == '/') {
-            slash_index = i;
+	// We first iterate over the string to determine whether it contains a slash
+	// and/or a dash. We know the dash can only be valid if it appears before
+	// the slash.
+	while (s[i] != '\0' && slash_index == 0) {
+		if (s[i] == '/') {
+			slash_index = i;

-            s[i] = '\0';
-        } else if (s[i] == '-') {
-            dash_index = i;
+			s[i] = '\0';
+		} else if (s[i] == '-') {
+			dash_index = i;

-            s[i] = '\0';
-        }
+			s[i] = '\0';
+		}

-        i++;
-    }
+		i++;
+	}

-    // Parse the three possible numbers in the pattern
-    uint8_t start = 0;
-    uint8_t end = 0;
-    uint8_t interval = 1;
+	// Parse the three possible numbers in the pattern
+	uint8_t start = 0;
+	uint8_t end = max;
+	uint8_t interval = 1;

-    SAFE_ATOI(start, s, min, max);
+	SAFE_ATOI(start, s, min, max);

-    if (dash_index > 0) {
-        SAFE_ATOI(end, &s[dash_index + 1], min, max);
-    }
+	if (dash_index > 0) {
+		SAFE_ATOI(end, &s[dash_index + 1], min, max);
+	}

-    if (slash_index > 0) {
-        SAFE_ATOI(interval, &s[slash_index + 1], 1, max - min);
-    }
+	if (slash_index > 0) {
+		SAFE_ATOI(interval, &s[slash_index + 1], 1, max - min);
+	}

-    // Single number doesn't need to loop
-    if (end == 0 && slash_index == 0) {
-        *out |= ((uint64_t) 1) << (start - min);
-    } else {
-        for (;start <= end; start += interval) {
-            *out |= ((uint64_t) 1) << (start - min);
-            start += interval;
-        }
-    }
+	if (dash_index == 0 && slash_index == 0) {
+		*out |= ((uint64_t) 1) << (start - min);
+	} else {
+		while (start <= end) {
+			*out |= ((uint64_t) 1) << (start - min);
+			start += interval;
+		}
+	}

-    return CPEParseOk;
+	return CPEParseOk;
 }

+/*
+ * Given an expression part, produce a bitfield defining what numbers in the
+ * min-max range the part represents. A part consists of one or more range
+ * expressions, separated by commas.
+ */
 enum cron_parse_error ce_parse_part(uint64_t *out, char *s, uint8_t min, uint8_t max) {
-    *out = 0;
+	*out = 0;

-    char *next;
-    enum cron_parse_error res;
-    
-    while ((next = strchr(s, ',')) != NULL) {
-        next[0] = '\0';
-        res = ce_parse_range(out, s, min, max);
+	char *next;
+	enum cron_parse_error res;

-        if (res != CPEParseOk) {
-            return res;
-        }
+	while ((next = strchr(s, ',')) != NULL) {
+		next[0] = '\0';
+		res = ce_parse_range(out, s, min, max);

-        s = next + 1;
-    }
+		if (res != CPEParseOk) {
+			return res;
+		}

-    // Make sure to parse the final range as well
-    return ce_parse_range(out, s, min, max);
+		s = next + 1;
+	}
+
+	// Make sure to parse the final range as well
+	return ce_parse_range(out, s, min, max);
 }

+/*
+ * Return how many bits are set in the bitfield, better known as popcount. I
+ * added my own implementation (taken from my algorithms course) as I don't want
+ * to be dependent on GCC-specific extensions.
+ */
+uint8_t uint64_t_popcount(uint64_t n) {
+	uint8_t c = 0;
+
+	while (n != 0) {
+		// This sets the least significant bit to zero (very cool)
+		n &= n - 1;
+		c++;
+	}
+
+	return c;
+}
+
+/*
+ * Convert a bitfield into an array containing the numbers in the min-max range
+ * it represents.
+ */
 uint8_t bf_to_nums(uint8_t **out, uint64_t bf, uint8_t min, uint8_t max) {
-    uint8_t capacity = 8;
-    uint8_t size = 0;
+	// Each bit field only has `max - min + 1` meaningful bits. All other bits
+	// should be ignored, and can be any value. By shifting the bit field back and
+	// forth, we set these excessive bits to zero, ensuring popcount returns the
+	// correct value.
+	uint8_t excess_bits = 64 - (max - min + 1);
+	bf = (bf << excess_bits) >> excess_bits;
+	uint8_t size = uint64_t_popcount(bf);
+	uint8_t *buf = malloc(size * sizeof(uint8_t));

-    uint8_t *buf = malloc(capacity * sizeof(uint8_t));
+	uint8_t i = 0, j = 0;

-    for (uint8_t i = 0; i <= max - min; i++) {
-        if (((uint64_t) 1 << i) & bf) {
-            // Resize buffer if needed
-            if (size == capacity) {
-                capacity *= 2;
-                buf = realloc(buf, capacity * sizeof(uint8_t));
-            }
+	while (j < size && i <= max - min) {
+		if (((uint64_t)1 << i) & bf) {
+			// Resize buffer if needed
+			buf[j] = min + i;
+			j++;
+		}

-            buf[size] = min + i;
-            size++;
-        }
-    }
+		i++;
+	}

-    // Resize buffer once more to remove any trailing unused bytes
-    if (size < capacity) {
-        buf = realloc(buf, size * sizeof(uint8_t));
-    }
+	*out = buf;

-    *out = buf;
-
-    return size;
+	return size;
 }

+/*
+ * Parse a cron expression string into a cron_expression struct.
+ */
 enum cron_parse_error ce_parse_expression(struct cron_expression *out, char *s) {
-    // The parsing functions modify the input string in-place
-    s = strdup(s);
-    char *orig_s = s;
+	// The parsing functions modify the input string in-place
+	s = strdup(s);
+	char *orig_s = s;

-    uint8_t part_count = 0;
+	uint8_t part_count = 0;

-    char *next;
-    enum cron_parse_error res = CPEParseOk;
-    uint64_t bfs[4];
+	char *next;
+	enum cron_parse_error res = CPEParseOk;
+	uint64_t bfs[4];

-    // Skip leading spaces
-    while (s[0] == ' ') {
-        s++;
-    }
-    
-    while (part_count < 4 && ((next = strchr(s, ' ')) != NULL)) {
-        next[0] = '\0';
-        res = ce_parse_part(&bfs[part_count], s, min[part_count], max[part_count]);
+	// Skip leading spaces
+	while (s[0] == ' ') {
+		s++;
+	}

-        if (res != CPEParseOk) {
-            goto end;
-        }
+	while (part_count < 4 && ((next = strchr(s, ' ')) != NULL)) {
+		next[0] = '\0';
+		res = ce_parse_part(&bfs[part_count], s, min[part_count], max[part_count]);

-        size_t offset = 1;
+		if (res != CPEParseOk) {
+			goto end;
+		}

-        // Skip multiple spaces
-        while (next[offset] == ' ') {
-            offset++;
-        }
-        s = next + offset;
+		size_t offset = 1;

-        part_count++;
-    }
+		// Skip multiple spaces
+		while (next[offset] == ' ') {
+			offset++;
+		}
+		s = next + offset;

-    // Parse final trailing part
-    if (part_count < 4 && s[0] != '\0') {
-        // Make sure to parse the final range as well
-        res = ce_parse_part(&bfs[part_count], s, min[part_count], max[part_count]);
+		part_count++;
+	}

-        if (res != CPEParseOk) {
-            goto end;
-        }
+	// Parse final trailing part
+	if (part_count < 4 && s[0] != '\0') {
+		res = ce_parse_part(&bfs[part_count], s, min[part_count], max[part_count]);

-        part_count++;
-    }
+		if (res != CPEParseOk) {
+			goto end;
+		}

-    // At least two parts need to be provided
-    if (part_count < 2) {
-        res = CPEParseInvalidExpression;
-        goto end;
-    }
+		part_count++;
+	}

-    // Ensure there's always 4 parts, as expressions can have between 2 and 4 parts
-    while (part_count < 4) {
-        // Expression is augmented with '*' expressions
-        bfs[part_count] = ~0;
-        part_count++;
-    }
+	// At least two parts need to be provided
+	if (part_count < 2) {
+		res = CPEParseInvalidExpression;
+		goto end;
+	}

-    out->minute_count = bf_to_nums(&out->minutes, bfs[0], min[0], max[0]);
-    out->hour_count = bf_to_nums(&out->hours, bfs[1], min[1], max[1]);
-    out->day_count = bf_to_nums(&out->days, bfs[2], min[2], max[2]);
-    out->month_count = bf_to_nums(&out->months, bfs[3], min[3], max[3]);
+	// Ensure there's always 4 parts, as expressions can have between 2 and 4 parts
+	while (part_count < 4) {
+		// Expression is augmented with '*' expressions
+		bfs[part_count] = ~0;
+		part_count++;
+	}
+
+	out->minute_count = bf_to_nums(&out->minutes, bfs[0], min[0], max[0]);
+	out->hour_count = bf_to_nums(&out->hours, bfs[1], min[1], max[1]);
+	out->day_count = bf_to_nums(&out->days, bfs[2], min[2], max[2]);
+	out->month_count = bf_to_nums(&out->months, bfs[3], min[3], max[3]);

 end:
-    // s is cloned
-    free(orig_s);
+	// s is cloned
+	free(orig_s);

-    return res;
+	return res;
 }
--- a/src/cron/expression/expression_test.v
+++ b/src/cron/expression/expression_test.v
@ -22,15 +22,15 @@ fn test_next_simple() ! {
 	/* util_test_time('0 3', '2002-01-01 00:00:00', '2002-01-01 03:00:00')! */

 	// Overlap to next day
-	mut exp := '0 3'
+	mut exp := '0    3        '
 	util_test_time(exp, '2002-01-01 03:00:00', '2002-01-02 03:00:00')!
 	util_test_time(exp, '2002-01-01 04:00:00', '2002-01-02 03:00:00')!

-	/* util_test_time('0 3/4', '2002-01-01 04:00:00', '2002-01-01 07:00:00')! */
+	util_test_time('0 3/4', '2002-01-01 04:00:00', '2002-01-01 07:00:00')!

 	/* // Overlap to next month */
-	/* util_test_time('0 3', '2002-11-31 04:00:00', '2002-12-01 03:00:00')! */
+	util_test_time('0 3', '2002-11-31 04:00:00', '2002-12-01 03:00:00')!

 	/* // Overlap to next year */
-	/* util_test_time('0 3', '2002-12-31 04:00:00', '2003-01-01 03:00:00')! */
+	util_test_time('0 3', '2002-12-31 04:00:00', '2003-01-01 03:00:00')!
 }