feat(lsm): write str tests; start trie tests

lsm
Jef Roosens 2023-10-14 14:33:22 +02:00
parent 87000e8f73
commit ef8129b8eb
Signed by: Jef Roosens
GPG Key ID: B75D4F293C7052DB
7 changed files with 441 additions and 52 deletions

View File

@ -1,6 +1,8 @@
#ifndef LSM_STR #ifndef LSM_STR
#define LSM_STR #define LSM_STR
#include <stdbool.h>
#include "lsm.h" #include "lsm.h"
/** /**
@ -10,22 +12,6 @@
*/ */
typedef struct lsm_str lsm_str; typedef struct lsm_str lsm_str;
/**
* Allocate a new string struct of length 0.
*
* @param ptr pointer to store newly allocated pointer in
*/
lsm_error lsm_str_init_zero(lsm_str **ptr);
/**
* Update an existing lsm_str so it now represents the new provided string. The
* string pointer of the original object is free'd if needed.
*
* @param str lsm_str object to modify
* @param s string to convert into lsm string; ownership is taken over
*/
void lsm_str_init_prealloc(lsm_str *str, char *s);
/** /**
* Allocate and initialize a new lsm_str object * Allocate and initialize a new lsm_str object
* *
@ -35,17 +21,45 @@ void lsm_str_init_prealloc(lsm_str *str, char *s);
lsm_error lsm_str_init(lsm_str **ptr, char *s); lsm_error lsm_str_init(lsm_str **ptr, char *s);
/** /**
* Same as lsm_str_init, except it copies the original string instead of taking * Allocate a new string struct of length 0.
* over ownership, leaving the original string untouched. *
* @param ptr pointer to store newly allocated pointer in
*/
lsm_error lsm_str_init_zero(lsm_str **ptr);
/**
* Allocate and initialize a new lsm_str object, but copy the original string
* instead of taking over ownership, leaving the original string untouched.
* *
* @param ptr pointer to store newly allocated pointer * @param ptr pointer to store newly allocated pointer
* @param s string to copy into lsm string * @param s string to copy into lsm string
*/ */
lsm_error lsm_str_init_copy(lsm_str **ptr, char *s); lsm_error lsm_str_init_copy(lsm_str **ptr, char *s);
/**
* Overwrite an existing lsm_str so it now represents the new provided string.
* The string pointer of the original object is free'd if needed. Ownership of
* the pointer is taken over.
*
* @param str lsm_str object to modify
* @param s string to convert into lsm string; ownership is taken over
*/
void lsm_str_overwrite(lsm_str *str, char *s);
/**
* Overwrite an existing lsm_str so it now represents the new provided string.
* The string pointer of the original object is free'd if needed. The provided
* string is copied, leaving the original untouched.
*
* @param str lsm_str object to modify
* @param s string to convert into lsm string; ownership is taken over
*/
lsm_error lsm_str_overwrite_copy(lsm_str *str, char *s);
/** /**
* Deallocate the existing internal string if needed and replace the lsm_str * Deallocate the existing internal string if needed and replace the lsm_str
* with a string of length 0, wiping its contents. * with a string of length 0, wiping its contents. This function can be used as
* a substitute for lsm_str_free for stack-allocated structs.
* *
* @param str string to wipe * @param str string to wipe
*/ */
@ -85,7 +99,8 @@ char lsm_str_char(lsm_str *str, uint64_t index);
* Take a substring and copy it to a provided string object. * Take a substring and copy it to a provided string object.
* *
* @param out string to store new substring in. The contents of this string will * @param out string to store new substring in. The contents of this string will
* be replaced. * be replaced. This string is assumed to be unitialized, so zero this string
* manually if you're overwriting an existing string.
* @param str string to take substring from * @param str string to take substring from
* @param start inclusive start index for the substring. If this is greater than * @param start inclusive start index for the substring. If this is greater than
* or equal to the string's length, out will be a zero-length string. * or equal to the string's length, out will be a zero-length string.
@ -109,7 +124,16 @@ uint64_t lsm_str_cmp(lsm_str *s1, uint64_t s1_offset, lsm_str *s2,
uint64_t s2_offset); uint64_t s2_offset);
/** /**
* Truncate a string in-place. * Checks whether the two strings are identical.
*
* @param s1 first string to compare
* @param s2 second string to compare
* @return true if their values are equal, false otherwise
*/
bool lsm_str_eq(lsm_str *s1, lsm_str *s2);
/**
* Truncate an already initialized string in-place.
* *
* @param s string to truncate * @param s string to truncate
* @param new_len new length of the string. If new_len is >= the original * @param new_len new length of the string. If new_len is >= the original

View File

@ -7,6 +7,20 @@
#define MIN(x, y) (((x) < (y)) ? (x) : (y)) #define MIN(x, y) (((x) < (y)) ? (x) : (y))
lsm_error lsm_str_init(lsm_str **ptr, char *s) {
lsm_str *str = calloc(1, sizeof(lsm_str));
if (str == NULL) {
return lsm_error_failed_alloc;
}
lsm_str_overwrite(str, s);
*ptr = str;
return lsm_error_ok;
}
lsm_error lsm_str_init_zero(lsm_str **ptr) { lsm_error lsm_str_init_zero(lsm_str **ptr) {
lsm_str *str = calloc(1, sizeof(lsm_str)); lsm_str *str = calloc(1, sizeof(lsm_str));
@ -19,7 +33,21 @@ lsm_error lsm_str_init_zero(lsm_str **ptr) {
return lsm_error_ok; return lsm_error_ok;
} }
void lsm_str_init_prealloc(lsm_str *str, char *s) { lsm_error lsm_str_init_copy(lsm_str **ptr, char *s) {
lsm_str *str = calloc(1, sizeof(lsm_str));
if (str == NULL) {
return lsm_error_failed_alloc;
}
lsm_str_overwrite_copy(str, s);
*ptr = str;
return lsm_error_ok;
}
void lsm_str_overwrite(lsm_str *str, char *s) {
str->len = strlen(s); str->len = strlen(s);
if (str->len <= 8) { if (str->len <= 8) {
@ -30,27 +58,7 @@ void lsm_str_init_prealloc(lsm_str *str, char *s) {
} }
} }
lsm_error lsm_str_init(lsm_str **ptr, char *s) { lsm_error lsm_str_overwrite_copy(lsm_str *str, char *s) {
lsm_str *str = calloc(1, sizeof(lsm_str));
if (str == NULL) {
return lsm_error_failed_alloc;
}
lsm_str_init_prealloc(str, s);
*ptr = str;
return lsm_error_ok;
}
lsm_error lsm_str_init_copy(lsm_str **ptr, char *s) {
lsm_str *str = calloc(1, sizeof(lsm_str));
if (str == NULL) {
return lsm_error_failed_alloc;
}
str->len = strlen(s); str->len = strlen(s);
if (str->len <= 8) { if (str->len <= 8) {
@ -66,8 +74,6 @@ lsm_error lsm_str_init_copy(lsm_str **ptr, char *s) {
str->data.ptr = buf; str->data.ptr = buf;
} }
*ptr = str;
return lsm_error_ok; return lsm_error_ok;
} }
@ -80,10 +86,7 @@ void lsm_str_zero(lsm_str *str) {
} }
void lsm_str_free(lsm_str *str) { void lsm_str_free(lsm_str *str) {
if (str->len > 8) { lsm_str_zero(str);
free(str->data.ptr);
}
free(str); free(str);
} }
@ -112,7 +115,7 @@ lsm_error lsm_str_substr(lsm_str *out, lsm_str *str, uint64_t start,
const char *str_ptr = lsm_str_ptr(str); const char *str_ptr = lsm_str_ptr(str);
if (len <= 8) { if (len <= 8) {
lsm_str_zero(out); /* lsm_str_zero(out); */
memcpy(out->data.val, &str_ptr[start], len); memcpy(out->data.val, &str_ptr[start], len);
} else { } else {
char *buf = malloc(len * sizeof(char)); char *buf = malloc(len * sizeof(char));
@ -123,7 +126,7 @@ lsm_error lsm_str_substr(lsm_str *out, lsm_str *str, uint64_t start,
memcpy(buf, &str_ptr[start], len); memcpy(buf, &str_ptr[start], len);
lsm_str_zero(out); /* lsm_str_zero(out); */
out->data.ptr = buf; out->data.ptr = buf;
} }
@ -185,3 +188,11 @@ lsm_error lsm_str_split(lsm_str *s, lsm_str *s2, uint64_t index) {
return lsm_str_truncate(s, index); return lsm_str_truncate(s, index);
} }
bool lsm_str_eq(lsm_str *s1, lsm_str *s2) {
if (s1->len != s2->len) {
return false;
}
return memcmp(lsm_str_ptr(s1), lsm_str_ptr(s2), s1->len) == 0;
}

View File

@ -35,6 +35,8 @@ lsm_error lsm_trie_init(lsm_trie **ptr) {
return lsm_error_ok; return lsm_error_ok;
} }
uint64_t lsm_trie_size(lsm_trie *trie) { return trie->size; }
lsm_error lsm_trie_insert(lsm_trie *trie, lsm_str *key, void *data) { lsm_error lsm_trie_insert(lsm_trie *trie, lsm_str *key, void *data) {
// NULL is not allowed as a data value, as it's used to indicate a lack of // NULL is not allowed as a data value, as it's used to indicate a lack of
// data // data
@ -48,6 +50,7 @@ lsm_error lsm_trie_insert(lsm_trie *trie, lsm_str *key, void *data) {
if (key_len == 0) { if (key_len == 0) {
if (trie->root->data == NULL) { if (trie->root->data == NULL) {
trie->root->data = data; trie->root->data = data;
trie->size++;
return lsm_error_ok; return lsm_error_ok;
} else { } else {
@ -75,6 +78,8 @@ lsm_error lsm_trie_insert(lsm_trie *trie, lsm_str *key, void *data) {
} }
new_node->data = data; new_node->data = data;
trie->size++;
lsm_str_substr(&new_node->skip, key, index + 1, key_len); lsm_str_substr(&new_node->skip, key, index + 1, key_len);
return lsm_bt_insert(&node->bt, c, new_node); return lsm_bt_insert(&node->bt, c, new_node);
@ -125,6 +130,7 @@ lsm_error lsm_trie_insert(lsm_trie *trie, lsm_str *key, void *data) {
} }
node->data = data; node->data = data;
trie->size++;
return lsm_error_ok; return lsm_error_ok;
} }

91
lsm/test/str/str.c 100644
View File

@ -0,0 +1,91 @@
#include "test.h"
#include "lsm/str_internal.h"
void test_cmp() {
lsm_str s1, s2, s3;
lsm_str_overwrite_copy(&s1, "some_string");
lsm_str_overwrite_copy(&s2, "some");
lsm_str_overwrite_copy(&s3, "some_string_extra");
TEST_CHECK(lsm_str_cmp(&s1, 0, &s2, 0) == 4);
TEST_CHECK(lsm_str_cmp(&s1, 0, &s2, 1) == 0);
TEST_CHECK(lsm_str_cmp(&s1, 1, &s2, 1) == 3);
TEST_CHECK(lsm_str_cmp(&s1, 1, &s2, 0) == 0);
TEST_CHECK(lsm_str_cmp(&s1, 0, &s3, 0) == lsm_str_len(&s1));
}
void test_eq() {
lsm_str s1, s2;
lsm_str_overwrite_copy(&s1, "longerthan8");
lsm_str_overwrite_copy(&s2, "longerthan8");
TEST_CHECK(lsm_str_eq(&s1, &s2));
lsm_str_overwrite_copy(&s1, "longerthan8");
lsm_str_overwrite_copy(&s2, "lmaolongerthan8");
TEST_CHECK(!lsm_str_eq(&s1, &s2));
lsm_str_overwrite_copy(&s1, "short");
lsm_str_overwrite_copy(&s2, "short");
TEST_CHECK(lsm_str_eq(&s1, &s2));
lsm_str_overwrite_copy(&s1, "short");
lsm_str_overwrite_copy(&s1, "shorte");
TEST_CHECK(!lsm_str_eq(&s1, &s2));
lsm_str_overwrite_copy(&s1, "longerthan8");
lsm_str_overwrite_copy(&s2, "short");
TEST_CHECK(!lsm_str_eq(&s1, &s2));
}
void test_substr() {
lsm_str s1, s2, s3;
lsm_str_overwrite_copy(&s1, "some_string");
lsm_str_overwrite_copy(&s3, "string");
lsm_str_substr(&s2, &s1, 5, lsm_str_len(&s1));
TEST_CHECK(lsm_str_eq(&s2, &s3));
lsm_str_zero(&s2);
lsm_str_substr(&s2, &s1, 25, lsm_str_len(&s1));
TEST_CHECK(lsm_str_len(&s2) == 0);
}
void test_truncate() {
lsm_str s1, s2, s3;
lsm_str_overwrite_copy(&s1, "some_longer_string_thing");
lsm_str_overwrite_copy(&s2, "some_longer_string");
lsm_str_overwrite_copy(&s3, "some");
lsm_str_truncate(&s1, 18);
TEST_CHECK(lsm_str_eq(&s1, &s2));
lsm_str_truncate(&s1, 4);
TEST_CHECK(lsm_str_eq(&s1, &s3));
}
void test_init_copy() {
char orig[] = "some_string";
lsm_str *s;
lsm_str_init_copy(&s, orig);
TEST_CHECK(s->data.ptr != orig);
TEST_CHECK(strcmp(s->data.ptr, orig) == 0);
lsm_str_free(s);
}
TEST_LIST = {
{ "str init_copy", test_init_copy },
{ "str cmp", test_cmp },
{ "str eq", test_eq },
{ "str substr", test_substr },
{ "str truncate", test_truncate },
{ NULL, NULL }
};

View File

@ -0,0 +1,222 @@
#ifndef LSM_TRIE_FUZZY_TEST
#define LSM_TRIE_FUZZY_TEST
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <stdio.h>
#include "lsm/trie.h"
#include "lsm/str_internal.h"
typedef struct fuzzyconfig {
int seed;
int word_length;
int word_count;
} FuzzyConfig;
void random_clean_string(char* s, int len) {
char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789,?";
int charset_len = strlen(charset);
// len - 1 ensures that we can still set the null byte for the final byte
int actual_len = rand() % (len - 1);
int key;
int i;
for (i = 0; i < actual_len; i++) {
key = rand() % charset_len;
s[i] = charset[key];
}
s[i] = '\0';
}
void random_string(char* s, int len) {
int val = rand();
// String can't be an empty string as they aren't supported
s[0] = (char)(val % 255 + 1);
for (int i = 1; i < len - 1; i++) {
val = rand();
s[i] = (char)(val % 255 + 1);
}
// Just in case no null characters were created
s[len - 1] = '\0';
}
void random_string_matrix(char** s, int count, int len) {
for (int i = 0; i < count; i++) {
random_string(s[i], len);
}
}
char** init_string_matrix(int count, int len) {
char** matrix = malloc(count * sizeof(char*));
for (int i = 0; i < count; i++) {
matrix[i] = calloc(len, sizeof(char));
}
return matrix;
}
lsm_str *lsm_random_string_matrix(int count, int max_len) {
lsm_str *matrix = calloc(count, sizeof(lsm_str));
for (int i = 0; i < count; i++) {
int len = rand() % max_len;
char *buf = malloc(len * sizeof(char));
for (int i = 0; i < len; i++) {
buf[i] = (char)(rand() % 255 + 1);
}
lsm_str_overwrite(&matrix[i], buf);
}
return matrix;
}
/**
* Test a given trie implementation using randomly generated strings generated
* using a given seed.
*
* @param seed seed to use for generating random strings
* @param count how many strings to test with
* @param len maximum length of each string
* @param init_func function to creat a new trie of the wanted type
* @param free_func function to free the given trie
* @param add_func function to add a string to the given trie
* @param remove_func function to remove a string from the given trie
* @param size_func function to get the size of the given trie
* @return exit code describing failures, if any
*/
int fuzzy_test_trie_seed(FuzzyConfig conf) {
srand(conf.seed);
lsm_str *matrix = lsm_random_string_matrix(conf.word_count, conf.word_length);
bool* contains = calloc(conf.word_count, sizeof(bool));
// It's possible that the string matrix contains duplicate strings
bool** contains_dedupped = calloc(conf.word_count, sizeof(bool*));
for (int i = 0; i < conf.word_count; i++) {
if (contains_dedupped[i] == NULL) {
contains_dedupped[i] = &contains[i];
for (int j = i + 1; j < conf.word_count; j++) {
if (lsm_str_eq(&matrix[i], &matrix[j])) {
contains_dedupped[j] = &contains[i];
}
}
}
}
// We keep track of the size as well so that we can check whether this is
// also correct
size_t size = 0;
lsm_trie *trie;
lsm_trie_init(&trie);
bool changed;
lsm_error status;
// 0: success
// 1: invalid add
// 2: invalid remove
// 3: bad size after adds
// 4: bad size after removes
int exit_code = 0;
// Add all strings to trie, checking for duplicates
for (int i = 0; i < conf.word_count; i++) {
status = lsm_trie_insert(trie, &matrix[i], (void **)1);
// if changed is false, *contains_dedupped[i] should be true, as changed
// can only be false if the string is already contained in the trie. if
// changed is true, *contains_dedupped[i] should be false, as the string
// cannot be in the trie yet.
if (status == lsm_error_ok && *contains_dedupped[i]) {
exit_code = 1;
goto END;
}
if (!*contains_dedupped[i]) {
*contains_dedupped[i] = true;
size++;
}
}
// Ensure size is correct
if (lsm_trie_size(trie) != size) {
printf("%lu %lu\n", lsm_trie_size(trie), size);
exit_code = 3;
goto END;
}
// Remove all strings again, again taking duplicates into consideration
/* for (int i = 0; i < conf.word_count; i++) { */
/* changed = remove_func(ct, matrix[i]); */
/* // The string shouldn't be in the trie, yet another add operation */
/* // says it added it as well */
/* if (changed != *contains_dedupped[i]) { */
/* exit_code = 2; */
/* goto END; */
/* } */
/* if (*contains_dedupped[i]) { */
/* *contains_dedupped[i] = false; */
/* size--; */
/* } */
/* } */
// Finally, check that the trie is completely empty
/* if (size_func(ct) != 0) { */
/* exit_code = 4; */
/* } */
END:
/* trie_free(ct); */
// Even testing functions should properly free memory
free(contains);
free(contains_dedupped);
for (int i = 0; i < conf.word_count; i++) {
lsm_str_zero(&matrix[i]);
}
free(matrix);
return exit_code;
}
/**
* Same as fuzzy_test_trie_seed, except that the seed is randomly generated.
*
* @param count how many strings to test with
* @param len maximum length of each string
* @param init_func function to creat a new trie of the wanted type
* @param free_func function to free the given trie
* @param add_func function to add a string to the given trie
* @param remove_func function to remove a string from the given trie
* @param size_func function to get the size of the given trie
* @return the generated seed if the test wasn't successful, -1 otherwise.
*/
/* int fuzzy_test_trie(int count, int len, void* (*init_func) (), void (*free_func) (void*), bool (*add_func) (void*, char*), bool (*remove_func) (void*, char*), int (*size_func) (void*)) { */
/* int seed = rand(); */
/* bool succeeded = fuzzy_test_trie_seed(seed, count, len, init_func, free_func, add_func, remove_func, size_func); */
/* if (!succeeded) { */
/* return seed; */
/* } */
/* return -1; */
/* } */
#endif

View File

@ -1,5 +1,5 @@
#include "lsm.h"
#include "test.h" #include "test.h"
#include "lsm.h"
#include "lsm/trie_internal.h" #include "lsm/trie_internal.h"
#define TRIE_INIT() \ #define TRIE_INIT() \

View File

@ -0,0 +1,35 @@
#include "test.h"
#include "lsm.h"
#include "lsm/trie_internal.h"
#include "fuzzy.h"
void test_fuzzy() {
// Randomize seed
srand(time(NULL));
FuzzyConfig config;
int counter = 0;
int res;
for (int len = 1; len < 25; len += 5) {
for (int count = 10; count <= 500; count += 10) {
for (int i = 0; i < 1; i++) {
counter++;
config.seed = rand();
config.word_length = len;
config.word_count = count;
res = fuzzy_test_trie_seed(config);
TEST_CHECK_(res == 0,
"Failed config, seed = %i, len = %i, count = %i, code = %i", config.seed, config.word_length, config.word_count, res);
}
}
}
TEST_MSG("fuzzy tests done = %i", counter);
}
TEST_LIST = {
/* { "trie fuzzy", test_fuzzy }, */
{ NULL, NULL}
};