diff --git a/lsm/include/lsm/str.h b/lsm/include/lsm/str.h index 346fd54..52659c1 100644 --- a/lsm/include/lsm/str.h +++ b/lsm/include/lsm/str.h @@ -1,6 +1,8 @@ #ifndef LSM_STR #define LSM_STR +#include + #include "lsm.h" /** @@ -10,22 +12,6 @@ */ typedef struct lsm_str lsm_str; -/** - * Allocate a new string struct of length 0. - * - * @param ptr pointer to store newly allocated pointer in - */ -lsm_error lsm_str_init_zero(lsm_str **ptr); - -/** - * Update an existing lsm_str so it now represents the new provided string. The - * string pointer of the original object is free'd if needed. - * - * @param str lsm_str object to modify - * @param s string to convert into lsm string; ownership is taken over - */ -void lsm_str_init_prealloc(lsm_str *str, char *s); - /** * Allocate and initialize a new lsm_str object * @@ -35,17 +21,45 @@ void lsm_str_init_prealloc(lsm_str *str, char *s); lsm_error lsm_str_init(lsm_str **ptr, char *s); /** - * Same as lsm_str_init, except it copies the original string instead of taking - * over ownership, leaving the original string untouched. + * Allocate a new string struct of length 0. + * + * @param ptr pointer to store newly allocated pointer in + */ +lsm_error lsm_str_init_zero(lsm_str **ptr); + +/** + * Allocate and initialize a new lsm_str object, but copy the original string + * instead of taking over ownership, leaving the original string untouched. * * @param ptr pointer to store newly allocated pointer * @param s string to copy into lsm string */ lsm_error lsm_str_init_copy(lsm_str **ptr, char *s); +/** + * Overwrite an existing lsm_str so it now represents the new provided string. + * The string pointer of the original object is free'd if needed. Ownership of + * the pointer is taken over. + * + * @param str lsm_str object to modify + * @param s string to convert into lsm string; ownership is taken over + */ +void lsm_str_overwrite(lsm_str *str, char *s); + +/** + * Overwrite an existing lsm_str so it now represents the new provided string. + * The string pointer of the original object is free'd if needed. The provided + * string is copied, leaving the original untouched. + * + * @param str lsm_str object to modify + * @param s string to convert into lsm string; ownership is taken over + */ +lsm_error lsm_str_overwrite_copy(lsm_str *str, char *s); + /** * Deallocate the existing internal string if needed and replace the lsm_str - * with a string of length 0, wiping its contents. + * with a string of length 0, wiping its contents. This function can be used as + * a substitute for lsm_str_free for stack-allocated structs. * * @param str string to wipe */ @@ -85,7 +99,8 @@ char lsm_str_char(lsm_str *str, uint64_t index); * Take a substring and copy it to a provided string object. * * @param out string to store new substring in. The contents of this string will - * be replaced. + * be replaced. This string is assumed to be unitialized, so zero this string + * manually if you're overwriting an existing string. * @param str string to take substring from * @param start inclusive start index for the substring. If this is greater than * or equal to the string's length, out will be a zero-length string. @@ -109,7 +124,16 @@ uint64_t lsm_str_cmp(lsm_str *s1, uint64_t s1_offset, lsm_str *s2, uint64_t s2_offset); /** - * Truncate a string in-place. + * Checks whether the two strings are identical. + * + * @param s1 first string to compare + * @param s2 second string to compare + * @return true if their values are equal, false otherwise + */ +bool lsm_str_eq(lsm_str *s1, lsm_str *s2); + +/** + * Truncate an already initialized string in-place. * * @param s string to truncate * @param new_len new length of the string. If new_len is >= the original diff --git a/lsm/src/str/lsm_str.c b/lsm/src/str/lsm_str.c index a33c700..2244e52 100644 --- a/lsm/src/str/lsm_str.c +++ b/lsm/src/str/lsm_str.c @@ -7,6 +7,20 @@ #define MIN(x, y) (((x) < (y)) ? (x) : (y)) +lsm_error lsm_str_init(lsm_str **ptr, char *s) { + lsm_str *str = calloc(1, sizeof(lsm_str)); + + if (str == NULL) { + return lsm_error_failed_alloc; + } + + lsm_str_overwrite(str, s); + + *ptr = str; + + return lsm_error_ok; +} + lsm_error lsm_str_init_zero(lsm_str **ptr) { lsm_str *str = calloc(1, sizeof(lsm_str)); @@ -19,7 +33,21 @@ lsm_error lsm_str_init_zero(lsm_str **ptr) { return lsm_error_ok; } -void lsm_str_init_prealloc(lsm_str *str, char *s) { +lsm_error lsm_str_init_copy(lsm_str **ptr, char *s) { + lsm_str *str = calloc(1, sizeof(lsm_str)); + + if (str == NULL) { + return lsm_error_failed_alloc; + } + + lsm_str_overwrite_copy(str, s); + + *ptr = str; + + return lsm_error_ok; +} + +void lsm_str_overwrite(lsm_str *str, char *s) { str->len = strlen(s); if (str->len <= 8) { @@ -30,27 +58,7 @@ void lsm_str_init_prealloc(lsm_str *str, char *s) { } } -lsm_error lsm_str_init(lsm_str **ptr, char *s) { - lsm_str *str = calloc(1, sizeof(lsm_str)); - - if (str == NULL) { - return lsm_error_failed_alloc; - } - - lsm_str_init_prealloc(str, s); - - *ptr = str; - - return lsm_error_ok; -} - -lsm_error lsm_str_init_copy(lsm_str **ptr, char *s) { - lsm_str *str = calloc(1, sizeof(lsm_str)); - - if (str == NULL) { - return lsm_error_failed_alloc; - } - +lsm_error lsm_str_overwrite_copy(lsm_str *str, char *s) { str->len = strlen(s); if (str->len <= 8) { @@ -66,8 +74,6 @@ lsm_error lsm_str_init_copy(lsm_str **ptr, char *s) { str->data.ptr = buf; } - *ptr = str; - return lsm_error_ok; } @@ -80,10 +86,7 @@ void lsm_str_zero(lsm_str *str) { } void lsm_str_free(lsm_str *str) { - if (str->len > 8) { - free(str->data.ptr); - } - + lsm_str_zero(str); free(str); } @@ -112,7 +115,7 @@ lsm_error lsm_str_substr(lsm_str *out, lsm_str *str, uint64_t start, const char *str_ptr = lsm_str_ptr(str); if (len <= 8) { - lsm_str_zero(out); + /* lsm_str_zero(out); */ memcpy(out->data.val, &str_ptr[start], len); } else { char *buf = malloc(len * sizeof(char)); @@ -123,7 +126,7 @@ lsm_error lsm_str_substr(lsm_str *out, lsm_str *str, uint64_t start, memcpy(buf, &str_ptr[start], len); - lsm_str_zero(out); + /* lsm_str_zero(out); */ out->data.ptr = buf; } @@ -185,3 +188,11 @@ lsm_error lsm_str_split(lsm_str *s, lsm_str *s2, uint64_t index) { return lsm_str_truncate(s, index); } + +bool lsm_str_eq(lsm_str *s1, lsm_str *s2) { + if (s1->len != s2->len) { + return false; + } + + return memcmp(lsm_str_ptr(s1), lsm_str_ptr(s2), s1->len) == 0; +} diff --git a/lsm/src/trie/lsm_trie.c b/lsm/src/trie/lsm_trie.c index 541f89f..e72c288 100644 --- a/lsm/src/trie/lsm_trie.c +++ b/lsm/src/trie/lsm_trie.c @@ -35,6 +35,8 @@ lsm_error lsm_trie_init(lsm_trie **ptr) { return lsm_error_ok; } +uint64_t lsm_trie_size(lsm_trie *trie) { return trie->size; } + lsm_error lsm_trie_insert(lsm_trie *trie, lsm_str *key, void *data) { // NULL is not allowed as a data value, as it's used to indicate a lack of // data @@ -48,6 +50,7 @@ lsm_error lsm_trie_insert(lsm_trie *trie, lsm_str *key, void *data) { if (key_len == 0) { if (trie->root->data == NULL) { trie->root->data = data; + trie->size++; return lsm_error_ok; } else { @@ -75,6 +78,8 @@ lsm_error lsm_trie_insert(lsm_trie *trie, lsm_str *key, void *data) { } new_node->data = data; + trie->size++; + lsm_str_substr(&new_node->skip, key, index + 1, key_len); return lsm_bt_insert(&node->bt, c, new_node); @@ -125,6 +130,7 @@ lsm_error lsm_trie_insert(lsm_trie *trie, lsm_str *key, void *data) { } node->data = data; + trie->size++; return lsm_error_ok; } diff --git a/lsm/test/str/str.c b/lsm/test/str/str.c new file mode 100644 index 0000000..4cbd2d1 --- /dev/null +++ b/lsm/test/str/str.c @@ -0,0 +1,91 @@ +#include "test.h" +#include "lsm/str_internal.h" + +void test_cmp() { + lsm_str s1, s2, s3; + lsm_str_overwrite_copy(&s1, "some_string"); + lsm_str_overwrite_copy(&s2, "some"); + lsm_str_overwrite_copy(&s3, "some_string_extra"); + + TEST_CHECK(lsm_str_cmp(&s1, 0, &s2, 0) == 4); + TEST_CHECK(lsm_str_cmp(&s1, 0, &s2, 1) == 0); + TEST_CHECK(lsm_str_cmp(&s1, 1, &s2, 1) == 3); + TEST_CHECK(lsm_str_cmp(&s1, 1, &s2, 0) == 0); + + TEST_CHECK(lsm_str_cmp(&s1, 0, &s3, 0) == lsm_str_len(&s1)); +} + +void test_eq() { + lsm_str s1, s2; + lsm_str_overwrite_copy(&s1, "longerthan8"); + lsm_str_overwrite_copy(&s2, "longerthan8"); + + TEST_CHECK(lsm_str_eq(&s1, &s2)); + + lsm_str_overwrite_copy(&s1, "longerthan8"); + lsm_str_overwrite_copy(&s2, "lmaolongerthan8"); + + TEST_CHECK(!lsm_str_eq(&s1, &s2)); + + lsm_str_overwrite_copy(&s1, "short"); + lsm_str_overwrite_copy(&s2, "short"); + + TEST_CHECK(lsm_str_eq(&s1, &s2)); + + lsm_str_overwrite_copy(&s1, "short"); + lsm_str_overwrite_copy(&s1, "shorte"); + + TEST_CHECK(!lsm_str_eq(&s1, &s2)); + + lsm_str_overwrite_copy(&s1, "longerthan8"); + lsm_str_overwrite_copy(&s2, "short"); + + TEST_CHECK(!lsm_str_eq(&s1, &s2)); +} + +void test_substr() { + lsm_str s1, s2, s3; + lsm_str_overwrite_copy(&s1, "some_string"); + lsm_str_overwrite_copy(&s3, "string"); + lsm_str_substr(&s2, &s1, 5, lsm_str_len(&s1)); + + TEST_CHECK(lsm_str_eq(&s2, &s3)); + + lsm_str_zero(&s2); + lsm_str_substr(&s2, &s1, 25, lsm_str_len(&s1)); + + TEST_CHECK(lsm_str_len(&s2) == 0); +} + +void test_truncate() { + lsm_str s1, s2, s3; + lsm_str_overwrite_copy(&s1, "some_longer_string_thing"); + lsm_str_overwrite_copy(&s2, "some_longer_string"); + lsm_str_overwrite_copy(&s3, "some"); + + lsm_str_truncate(&s1, 18); + TEST_CHECK(lsm_str_eq(&s1, &s2)); + + lsm_str_truncate(&s1, 4); + TEST_CHECK(lsm_str_eq(&s1, &s3)); +} + +void test_init_copy() { + char orig[] = "some_string"; + lsm_str *s; + lsm_str_init_copy(&s, orig); + + TEST_CHECK(s->data.ptr != orig); + TEST_CHECK(strcmp(s->data.ptr, orig) == 0); + + lsm_str_free(s); +} + +TEST_LIST = { + { "str init_copy", test_init_copy }, + { "str cmp", test_cmp }, + { "str eq", test_eq }, + { "str substr", test_substr }, + { "str truncate", test_truncate }, + { NULL, NULL } +}; diff --git a/lsm/test/trie/fuzzy.h b/lsm/test/trie/fuzzy.h new file mode 100644 index 0000000..6cd0a07 --- /dev/null +++ b/lsm/test/trie/fuzzy.h @@ -0,0 +1,222 @@ +#ifndef LSM_TRIE_FUZZY_TEST +#define LSM_TRIE_FUZZY_TEST + +#include +#include +#include +#include + +#include "lsm/trie.h" +#include "lsm/str_internal.h" + +typedef struct fuzzyconfig { + int seed; + int word_length; + int word_count; +} FuzzyConfig; + +void random_clean_string(char* s, int len) { + char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789,?"; + int charset_len = strlen(charset); + + // len - 1 ensures that we can still set the null byte for the final byte + int actual_len = rand() % (len - 1); + int key; + int i; + + for (i = 0; i < actual_len; i++) { + key = rand() % charset_len; + s[i] = charset[key]; + } + + s[i] = '\0'; +} + +void random_string(char* s, int len) { + int val = rand(); + + // String can't be an empty string as they aren't supported + s[0] = (char)(val % 255 + 1); + + for (int i = 1; i < len - 1; i++) { + val = rand(); + s[i] = (char)(val % 255 + 1); + } + + // Just in case no null characters were created + s[len - 1] = '\0'; +} + +void random_string_matrix(char** s, int count, int len) { + for (int i = 0; i < count; i++) { + random_string(s[i], len); + } +} + +char** init_string_matrix(int count, int len) { + char** matrix = malloc(count * sizeof(char*)); + + for (int i = 0; i < count; i++) { + matrix[i] = calloc(len, sizeof(char)); + } + + return matrix; +} + +lsm_str *lsm_random_string_matrix(int count, int max_len) { + lsm_str *matrix = calloc(count, sizeof(lsm_str)); + + for (int i = 0; i < count; i++) { + int len = rand() % max_len; + char *buf = malloc(len * sizeof(char)); + + for (int i = 0; i < len; i++) { + buf[i] = (char)(rand() % 255 + 1); + } + + lsm_str_overwrite(&matrix[i], buf); + } + + return matrix; +} + +/** + * Test a given trie implementation using randomly generated strings generated + * using a given seed. + * + * @param seed seed to use for generating random strings + * @param count how many strings to test with + * @param len maximum length of each string + * @param init_func function to creat a new trie of the wanted type + * @param free_func function to free the given trie + * @param add_func function to add a string to the given trie + * @param remove_func function to remove a string from the given trie + * @param size_func function to get the size of the given trie + * @return exit code describing failures, if any + */ +int fuzzy_test_trie_seed(FuzzyConfig conf) { + srand(conf.seed); + + lsm_str *matrix = lsm_random_string_matrix(conf.word_count, conf.word_length); + bool* contains = calloc(conf.word_count, sizeof(bool)); + + // It's possible that the string matrix contains duplicate strings + bool** contains_dedupped = calloc(conf.word_count, sizeof(bool*)); + + for (int i = 0; i < conf.word_count; i++) { + if (contains_dedupped[i] == NULL) { + contains_dedupped[i] = &contains[i]; + + for (int j = i + 1; j < conf.word_count; j++) { + if (lsm_str_eq(&matrix[i], &matrix[j])) { + contains_dedupped[j] = &contains[i]; + } + } + } + } + + // We keep track of the size as well so that we can check whether this is + // also correct + size_t size = 0; + + lsm_trie *trie; + lsm_trie_init(&trie); + + bool changed; + lsm_error status; + + // 0: success + // 1: invalid add + // 2: invalid remove + // 3: bad size after adds + // 4: bad size after removes + int exit_code = 0; + + // Add all strings to trie, checking for duplicates + for (int i = 0; i < conf.word_count; i++) { + status = lsm_trie_insert(trie, &matrix[i], (void **)1); + + // if changed is false, *contains_dedupped[i] should be true, as changed + // can only be false if the string is already contained in the trie. if + // changed is true, *contains_dedupped[i] should be false, as the string + // cannot be in the trie yet. + if (status == lsm_error_ok && *contains_dedupped[i]) { + exit_code = 1; + goto END; + } + + if (!*contains_dedupped[i]) { + *contains_dedupped[i] = true; + size++; + } + } + + // Ensure size is correct + if (lsm_trie_size(trie) != size) { + printf("%lu %lu\n", lsm_trie_size(trie), size); + exit_code = 3; + goto END; + } + + // Remove all strings again, again taking duplicates into consideration + /* for (int i = 0; i < conf.word_count; i++) { */ + /* changed = remove_func(ct, matrix[i]); */ + + /* // The string shouldn't be in the trie, yet another add operation */ + /* // says it added it as well */ + /* if (changed != *contains_dedupped[i]) { */ + /* exit_code = 2; */ + /* goto END; */ + /* } */ + + /* if (*contains_dedupped[i]) { */ + /* *contains_dedupped[i] = false; */ + /* size--; */ + /* } */ + /* } */ + + // Finally, check that the trie is completely empty + /* if (size_func(ct) != 0) { */ + /* exit_code = 4; */ + /* } */ + +END: + /* trie_free(ct); */ + + // Even testing functions should properly free memory + free(contains); + free(contains_dedupped); + + for (int i = 0; i < conf.word_count; i++) { + lsm_str_zero(&matrix[i]); + } + + free(matrix); + + return exit_code; +} + +/** + * Same as fuzzy_test_trie_seed, except that the seed is randomly generated. + * + * @param count how many strings to test with + * @param len maximum length of each string + * @param init_func function to creat a new trie of the wanted type + * @param free_func function to free the given trie + * @param add_func function to add a string to the given trie + * @param remove_func function to remove a string from the given trie + * @param size_func function to get the size of the given trie + * @return the generated seed if the test wasn't successful, -1 otherwise. + */ +/* int fuzzy_test_trie(int count, int len, void* (*init_func) (), void (*free_func) (void*), bool (*add_func) (void*, char*), bool (*remove_func) (void*, char*), int (*size_func) (void*)) { */ +/* int seed = rand(); */ +/* bool succeeded = fuzzy_test_trie_seed(seed, count, len, init_func, free_func, add_func, remove_func, size_func); */ + +/* if (!succeeded) { */ +/* return seed; */ +/* } */ + +/* return -1; */ +/* } */ + +#endif diff --git a/lsm/test/trie/trie.c b/lsm/test/trie/trie.c index f3bf73b..db3e6ee 100644 --- a/lsm/test/trie/trie.c +++ b/lsm/test/trie/trie.c @@ -1,5 +1,5 @@ -#include "lsm.h" #include "test.h" +#include "lsm.h" #include "lsm/trie_internal.h" #define TRIE_INIT() \ diff --git a/lsm/test/trie/trie_fuzzy.c b/lsm/test/trie/trie_fuzzy.c new file mode 100644 index 0000000..9974a68 --- /dev/null +++ b/lsm/test/trie/trie_fuzzy.c @@ -0,0 +1,35 @@ +#include "test.h" +#include "lsm.h" +#include "lsm/trie_internal.h" +#include "fuzzy.h" + +void test_fuzzy() { + // Randomize seed + srand(time(NULL)); + + FuzzyConfig config; + int counter = 0; + int res; + + for (int len = 1; len < 25; len += 5) { + for (int count = 10; count <= 500; count += 10) { + for (int i = 0; i < 1; i++) { + counter++; + + config.seed = rand(); + config.word_length = len; + config.word_count = count; + + res = fuzzy_test_trie_seed(config); + TEST_CHECK_(res == 0, + "Failed config, seed = %i, len = %i, count = %i, code = %i", config.seed, config.word_length, config.word_count, res); + } + } + } + TEST_MSG("fuzzy tests done = %i", counter); +} + +TEST_LIST = { + /* { "trie fuzzy", test_fuzzy }, */ + { NULL, NULL} +};