diff --git a/lsm/include/lsm/str.h b/lsm/include/lsm/str.h index 52659c1..346fd54 100644 --- a/lsm/include/lsm/str.h +++ b/lsm/include/lsm/str.h @@ -1,8 +1,6 @@ #ifndef LSM_STR #define LSM_STR -#include - #include "lsm.h" /** @@ -12,6 +10,22 @@ */ typedef struct lsm_str lsm_str; +/** + * Allocate a new string struct of length 0. + * + * @param ptr pointer to store newly allocated pointer in + */ +lsm_error lsm_str_init_zero(lsm_str **ptr); + +/** + * Update an existing lsm_str so it now represents the new provided string. The + * string pointer of the original object is free'd if needed. + * + * @param str lsm_str object to modify + * @param s string to convert into lsm string; ownership is taken over + */ +void lsm_str_init_prealloc(lsm_str *str, char *s); + /** * Allocate and initialize a new lsm_str object * @@ -21,45 +35,17 @@ typedef struct lsm_str lsm_str; lsm_error lsm_str_init(lsm_str **ptr, char *s); /** - * Allocate a new string struct of length 0. - * - * @param ptr pointer to store newly allocated pointer in - */ -lsm_error lsm_str_init_zero(lsm_str **ptr); - -/** - * Allocate and initialize a new lsm_str object, but copy the original string - * instead of taking over ownership, leaving the original string untouched. + * Same as lsm_str_init, except it copies the original string instead of taking + * over ownership, leaving the original string untouched. * * @param ptr pointer to store newly allocated pointer * @param s string to copy into lsm string */ lsm_error lsm_str_init_copy(lsm_str **ptr, char *s); -/** - * Overwrite an existing lsm_str so it now represents the new provided string. - * The string pointer of the original object is free'd if needed. Ownership of - * the pointer is taken over. - * - * @param str lsm_str object to modify - * @param s string to convert into lsm string; ownership is taken over - */ -void lsm_str_overwrite(lsm_str *str, char *s); - -/** - * Overwrite an existing lsm_str so it now represents the new provided string. - * The string pointer of the original object is free'd if needed. The provided - * string is copied, leaving the original untouched. - * - * @param str lsm_str object to modify - * @param s string to convert into lsm string; ownership is taken over - */ -lsm_error lsm_str_overwrite_copy(lsm_str *str, char *s); - /** * Deallocate the existing internal string if needed and replace the lsm_str - * with a string of length 0, wiping its contents. This function can be used as - * a substitute for lsm_str_free for stack-allocated structs. + * with a string of length 0, wiping its contents. * * @param str string to wipe */ @@ -99,8 +85,7 @@ char lsm_str_char(lsm_str *str, uint64_t index); * Take a substring and copy it to a provided string object. * * @param out string to store new substring in. The contents of this string will - * be replaced. This string is assumed to be unitialized, so zero this string - * manually if you're overwriting an existing string. + * be replaced. * @param str string to take substring from * @param start inclusive start index for the substring. If this is greater than * or equal to the string's length, out will be a zero-length string. @@ -124,16 +109,7 @@ uint64_t lsm_str_cmp(lsm_str *s1, uint64_t s1_offset, lsm_str *s2, uint64_t s2_offset); /** - * Checks whether the two strings are identical. - * - * @param s1 first string to compare - * @param s2 second string to compare - * @return true if their values are equal, false otherwise - */ -bool lsm_str_eq(lsm_str *s1, lsm_str *s2); - -/** - * Truncate an already initialized string in-place. + * Truncate a string in-place. * * @param s string to truncate * @param new_len new length of the string. If new_len is >= the original diff --git a/lsm/src/str/lsm_str.c b/lsm/src/str/lsm_str.c index 2244e52..a33c700 100644 --- a/lsm/src/str/lsm_str.c +++ b/lsm/src/str/lsm_str.c @@ -7,20 +7,6 @@ #define MIN(x, y) (((x) < (y)) ? (x) : (y)) -lsm_error lsm_str_init(lsm_str **ptr, char *s) { - lsm_str *str = calloc(1, sizeof(lsm_str)); - - if (str == NULL) { - return lsm_error_failed_alloc; - } - - lsm_str_overwrite(str, s); - - *ptr = str; - - return lsm_error_ok; -} - lsm_error lsm_str_init_zero(lsm_str **ptr) { lsm_str *str = calloc(1, sizeof(lsm_str)); @@ -33,21 +19,7 @@ lsm_error lsm_str_init_zero(lsm_str **ptr) { return lsm_error_ok; } -lsm_error lsm_str_init_copy(lsm_str **ptr, char *s) { - lsm_str *str = calloc(1, sizeof(lsm_str)); - - if (str == NULL) { - return lsm_error_failed_alloc; - } - - lsm_str_overwrite_copy(str, s); - - *ptr = str; - - return lsm_error_ok; -} - -void lsm_str_overwrite(lsm_str *str, char *s) { +void lsm_str_init_prealloc(lsm_str *str, char *s) { str->len = strlen(s); if (str->len <= 8) { @@ -58,7 +30,27 @@ void lsm_str_overwrite(lsm_str *str, char *s) { } } -lsm_error lsm_str_overwrite_copy(lsm_str *str, char *s) { +lsm_error lsm_str_init(lsm_str **ptr, char *s) { + lsm_str *str = calloc(1, sizeof(lsm_str)); + + if (str == NULL) { + return lsm_error_failed_alloc; + } + + lsm_str_init_prealloc(str, s); + + *ptr = str; + + return lsm_error_ok; +} + +lsm_error lsm_str_init_copy(lsm_str **ptr, char *s) { + lsm_str *str = calloc(1, sizeof(lsm_str)); + + if (str == NULL) { + return lsm_error_failed_alloc; + } + str->len = strlen(s); if (str->len <= 8) { @@ -74,6 +66,8 @@ lsm_error lsm_str_overwrite_copy(lsm_str *str, char *s) { str->data.ptr = buf; } + *ptr = str; + return lsm_error_ok; } @@ -86,7 +80,10 @@ void lsm_str_zero(lsm_str *str) { } void lsm_str_free(lsm_str *str) { - lsm_str_zero(str); + if (str->len > 8) { + free(str->data.ptr); + } + free(str); } @@ -115,7 +112,7 @@ lsm_error lsm_str_substr(lsm_str *out, lsm_str *str, uint64_t start, const char *str_ptr = lsm_str_ptr(str); if (len <= 8) { - /* lsm_str_zero(out); */ + lsm_str_zero(out); memcpy(out->data.val, &str_ptr[start], len); } else { char *buf = malloc(len * sizeof(char)); @@ -126,7 +123,7 @@ lsm_error lsm_str_substr(lsm_str *out, lsm_str *str, uint64_t start, memcpy(buf, &str_ptr[start], len); - /* lsm_str_zero(out); */ + lsm_str_zero(out); out->data.ptr = buf; } @@ -188,11 +185,3 @@ lsm_error lsm_str_split(lsm_str *s, lsm_str *s2, uint64_t index) { return lsm_str_truncate(s, index); } - -bool lsm_str_eq(lsm_str *s1, lsm_str *s2) { - if (s1->len != s2->len) { - return false; - } - - return memcmp(lsm_str_ptr(s1), lsm_str_ptr(s2), s1->len) == 0; -} diff --git a/lsm/src/trie/lsm_trie.c b/lsm/src/trie/lsm_trie.c index a8b7d82..541f89f 100644 --- a/lsm/src/trie/lsm_trie.c +++ b/lsm/src/trie/lsm_trie.c @@ -1,4 +1,3 @@ -#include #include #include "lsm.h" @@ -36,8 +35,6 @@ lsm_error lsm_trie_init(lsm_trie **ptr) { return lsm_error_ok; } -uint64_t lsm_trie_size(lsm_trie *trie) { return trie->size; } - lsm_error lsm_trie_insert(lsm_trie *trie, lsm_str *key, void *data) { // NULL is not allowed as a data value, as it's used to indicate a lack of // data @@ -51,7 +48,6 @@ lsm_error lsm_trie_insert(lsm_trie *trie, lsm_str *key, void *data) { if (key_len == 0) { if (trie->root->data == NULL) { trie->root->data = data; - trie->size++; return lsm_error_ok; } else { @@ -79,8 +75,6 @@ lsm_error lsm_trie_insert(lsm_trie *trie, lsm_str *key, void *data) { } new_node->data = data; - trie->size++; - lsm_str_substr(&new_node->skip, key, index + 1, key_len); return lsm_bt_insert(&node->bt, c, new_node); @@ -102,26 +96,20 @@ lsm_error lsm_trie_insert(lsm_trie *trie, lsm_str *key, void *data) { } // split_node replaces the original node as the new child node - // bottom_node here is always the same value as next_node lsm_trie_node *bottom_node; lsm_bt_replace((void **)&bottom_node, &node->bt, c, split_node); - // The old next node now becomes the child of split_node - lsm_bt_insert(&split_node->bt, lsm_str_char(&next_node->skip, cmp), - next_node); + // The old child node now becomes the child of split_node + lsm_bt_insert(&split_node->bt, lsm_str_char(key, index + cmp), + bottom_node); - // split_node's skip has not been initialized yet, so we can simply - // overwrite it with bottom_node's skip - split_node->skip = next_node->skip; - - // The new node splits the edge into two parts, so the new split node will - // have the first part of the skip (minus the one character) as its - // skip - lsm_str_substr(&next_node->skip, &split_node->skip, cmp + 1, - lsm_str_len(&split_node->skip)); + // The new node splits the edge into two parts, so the new node will have + // the remaining part of the skip (minus the one character) as its skip + lsm_str_substr(&split_node->skip, &next_node->skip, cmp + 1, + lsm_str_len(&next_node->skip)); // The old node keeps the first part of the skip - lsm_str_truncate(&split_node->skip, cmp); + lsm_str_truncate(&next_node->skip, cmp); next_node = split_node; } @@ -137,7 +125,6 @@ lsm_error lsm_trie_insert(lsm_trie *trie, lsm_str *key, void *data) { } node->data = data; - trie->size++; return lsm_error_ok; } diff --git a/lsm/test/str/str.c b/lsm/test/str/str.c deleted file mode 100644 index 4cbd2d1..0000000 --- a/lsm/test/str/str.c +++ /dev/null @@ -1,91 +0,0 @@ -#include "test.h" -#include "lsm/str_internal.h" - -void test_cmp() { - lsm_str s1, s2, s3; - lsm_str_overwrite_copy(&s1, "some_string"); - lsm_str_overwrite_copy(&s2, "some"); - lsm_str_overwrite_copy(&s3, "some_string_extra"); - - TEST_CHECK(lsm_str_cmp(&s1, 0, &s2, 0) == 4); - TEST_CHECK(lsm_str_cmp(&s1, 0, &s2, 1) == 0); - TEST_CHECK(lsm_str_cmp(&s1, 1, &s2, 1) == 3); - TEST_CHECK(lsm_str_cmp(&s1, 1, &s2, 0) == 0); - - TEST_CHECK(lsm_str_cmp(&s1, 0, &s3, 0) == lsm_str_len(&s1)); -} - -void test_eq() { - lsm_str s1, s2; - lsm_str_overwrite_copy(&s1, "longerthan8"); - lsm_str_overwrite_copy(&s2, "longerthan8"); - - TEST_CHECK(lsm_str_eq(&s1, &s2)); - - lsm_str_overwrite_copy(&s1, "longerthan8"); - lsm_str_overwrite_copy(&s2, "lmaolongerthan8"); - - TEST_CHECK(!lsm_str_eq(&s1, &s2)); - - lsm_str_overwrite_copy(&s1, "short"); - lsm_str_overwrite_copy(&s2, "short"); - - TEST_CHECK(lsm_str_eq(&s1, &s2)); - - lsm_str_overwrite_copy(&s1, "short"); - lsm_str_overwrite_copy(&s1, "shorte"); - - TEST_CHECK(!lsm_str_eq(&s1, &s2)); - - lsm_str_overwrite_copy(&s1, "longerthan8"); - lsm_str_overwrite_copy(&s2, "short"); - - TEST_CHECK(!lsm_str_eq(&s1, &s2)); -} - -void test_substr() { - lsm_str s1, s2, s3; - lsm_str_overwrite_copy(&s1, "some_string"); - lsm_str_overwrite_copy(&s3, "string"); - lsm_str_substr(&s2, &s1, 5, lsm_str_len(&s1)); - - TEST_CHECK(lsm_str_eq(&s2, &s3)); - - lsm_str_zero(&s2); - lsm_str_substr(&s2, &s1, 25, lsm_str_len(&s1)); - - TEST_CHECK(lsm_str_len(&s2) == 0); -} - -void test_truncate() { - lsm_str s1, s2, s3; - lsm_str_overwrite_copy(&s1, "some_longer_string_thing"); - lsm_str_overwrite_copy(&s2, "some_longer_string"); - lsm_str_overwrite_copy(&s3, "some"); - - lsm_str_truncate(&s1, 18); - TEST_CHECK(lsm_str_eq(&s1, &s2)); - - lsm_str_truncate(&s1, 4); - TEST_CHECK(lsm_str_eq(&s1, &s3)); -} - -void test_init_copy() { - char orig[] = "some_string"; - lsm_str *s; - lsm_str_init_copy(&s, orig); - - TEST_CHECK(s->data.ptr != orig); - TEST_CHECK(strcmp(s->data.ptr, orig) == 0); - - lsm_str_free(s); -} - -TEST_LIST = { - { "str init_copy", test_init_copy }, - { "str cmp", test_cmp }, - { "str eq", test_eq }, - { "str substr", test_substr }, - { "str truncate", test_truncate }, - { NULL, NULL } -}; diff --git a/lsm/test/trie/fuzzy.h b/lsm/test/trie/fuzzy.h deleted file mode 100644 index 6cd0a07..0000000 --- a/lsm/test/trie/fuzzy.h +++ /dev/null @@ -1,222 +0,0 @@ -#ifndef LSM_TRIE_FUZZY_TEST -#define LSM_TRIE_FUZZY_TEST - -#include -#include -#include -#include - -#include "lsm/trie.h" -#include "lsm/str_internal.h" - -typedef struct fuzzyconfig { - int seed; - int word_length; - int word_count; -} FuzzyConfig; - -void random_clean_string(char* s, int len) { - char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789,?"; - int charset_len = strlen(charset); - - // len - 1 ensures that we can still set the null byte for the final byte - int actual_len = rand() % (len - 1); - int key; - int i; - - for (i = 0; i < actual_len; i++) { - key = rand() % charset_len; - s[i] = charset[key]; - } - - s[i] = '\0'; -} - -void random_string(char* s, int len) { - int val = rand(); - - // String can't be an empty string as they aren't supported - s[0] = (char)(val % 255 + 1); - - for (int i = 1; i < len - 1; i++) { - val = rand(); - s[i] = (char)(val % 255 + 1); - } - - // Just in case no null characters were created - s[len - 1] = '\0'; -} - -void random_string_matrix(char** s, int count, int len) { - for (int i = 0; i < count; i++) { - random_string(s[i], len); - } -} - -char** init_string_matrix(int count, int len) { - char** matrix = malloc(count * sizeof(char*)); - - for (int i = 0; i < count; i++) { - matrix[i] = calloc(len, sizeof(char)); - } - - return matrix; -} - -lsm_str *lsm_random_string_matrix(int count, int max_len) { - lsm_str *matrix = calloc(count, sizeof(lsm_str)); - - for (int i = 0; i < count; i++) { - int len = rand() % max_len; - char *buf = malloc(len * sizeof(char)); - - for (int i = 0; i < len; i++) { - buf[i] = (char)(rand() % 255 + 1); - } - - lsm_str_overwrite(&matrix[i], buf); - } - - return matrix; -} - -/** - * Test a given trie implementation using randomly generated strings generated - * using a given seed. - * - * @param seed seed to use for generating random strings - * @param count how many strings to test with - * @param len maximum length of each string - * @param init_func function to creat a new trie of the wanted type - * @param free_func function to free the given trie - * @param add_func function to add a string to the given trie - * @param remove_func function to remove a string from the given trie - * @param size_func function to get the size of the given trie - * @return exit code describing failures, if any - */ -int fuzzy_test_trie_seed(FuzzyConfig conf) { - srand(conf.seed); - - lsm_str *matrix = lsm_random_string_matrix(conf.word_count, conf.word_length); - bool* contains = calloc(conf.word_count, sizeof(bool)); - - // It's possible that the string matrix contains duplicate strings - bool** contains_dedupped = calloc(conf.word_count, sizeof(bool*)); - - for (int i = 0; i < conf.word_count; i++) { - if (contains_dedupped[i] == NULL) { - contains_dedupped[i] = &contains[i]; - - for (int j = i + 1; j < conf.word_count; j++) { - if (lsm_str_eq(&matrix[i], &matrix[j])) { - contains_dedupped[j] = &contains[i]; - } - } - } - } - - // We keep track of the size as well so that we can check whether this is - // also correct - size_t size = 0; - - lsm_trie *trie; - lsm_trie_init(&trie); - - bool changed; - lsm_error status; - - // 0: success - // 1: invalid add - // 2: invalid remove - // 3: bad size after adds - // 4: bad size after removes - int exit_code = 0; - - // Add all strings to trie, checking for duplicates - for (int i = 0; i < conf.word_count; i++) { - status = lsm_trie_insert(trie, &matrix[i], (void **)1); - - // if changed is false, *contains_dedupped[i] should be true, as changed - // can only be false if the string is already contained in the trie. if - // changed is true, *contains_dedupped[i] should be false, as the string - // cannot be in the trie yet. - if (status == lsm_error_ok && *contains_dedupped[i]) { - exit_code = 1; - goto END; - } - - if (!*contains_dedupped[i]) { - *contains_dedupped[i] = true; - size++; - } - } - - // Ensure size is correct - if (lsm_trie_size(trie) != size) { - printf("%lu %lu\n", lsm_trie_size(trie), size); - exit_code = 3; - goto END; - } - - // Remove all strings again, again taking duplicates into consideration - /* for (int i = 0; i < conf.word_count; i++) { */ - /* changed = remove_func(ct, matrix[i]); */ - - /* // The string shouldn't be in the trie, yet another add operation */ - /* // says it added it as well */ - /* if (changed != *contains_dedupped[i]) { */ - /* exit_code = 2; */ - /* goto END; */ - /* } */ - - /* if (*contains_dedupped[i]) { */ - /* *contains_dedupped[i] = false; */ - /* size--; */ - /* } */ - /* } */ - - // Finally, check that the trie is completely empty - /* if (size_func(ct) != 0) { */ - /* exit_code = 4; */ - /* } */ - -END: - /* trie_free(ct); */ - - // Even testing functions should properly free memory - free(contains); - free(contains_dedupped); - - for (int i = 0; i < conf.word_count; i++) { - lsm_str_zero(&matrix[i]); - } - - free(matrix); - - return exit_code; -} - -/** - * Same as fuzzy_test_trie_seed, except that the seed is randomly generated. - * - * @param count how many strings to test with - * @param len maximum length of each string - * @param init_func function to creat a new trie of the wanted type - * @param free_func function to free the given trie - * @param add_func function to add a string to the given trie - * @param remove_func function to remove a string from the given trie - * @param size_func function to get the size of the given trie - * @return the generated seed if the test wasn't successful, -1 otherwise. - */ -/* int fuzzy_test_trie(int count, int len, void* (*init_func) (), void (*free_func) (void*), bool (*add_func) (void*, char*), bool (*remove_func) (void*, char*), int (*size_func) (void*)) { */ -/* int seed = rand(); */ -/* bool succeeded = fuzzy_test_trie_seed(seed, count, len, init_func, free_func, add_func, remove_func, size_func); */ - -/* if (!succeeded) { */ -/* return seed; */ -/* } */ - -/* return -1; */ -/* } */ - -#endif diff --git a/lsm/test/trie/trie.c b/lsm/test/trie/trie.c index db3e6ee..f3bf73b 100644 --- a/lsm/test/trie/trie.c +++ b/lsm/test/trie/trie.c @@ -1,5 +1,5 @@ -#include "test.h" #include "lsm.h" +#include "test.h" #include "lsm/trie_internal.h" #define TRIE_INIT() \ diff --git a/lsm/test/trie/trie_fuzzy.c b/lsm/test/trie/trie_fuzzy.c deleted file mode 100644 index 9462c27..0000000 --- a/lsm/test/trie/trie_fuzzy.c +++ /dev/null @@ -1,35 +0,0 @@ -#include "test.h" -#include "lsm.h" -#include "lsm/trie_internal.h" -#include "fuzzy.h" - -void test_fuzzy() { - // Randomize seed - srand(time(NULL)); - - FuzzyConfig config; - int counter = 0; - int res; - - for (int len = 1; len < 25; len += 5) { - for (int count = 10; count <= 500; count += 10) { - for (int i = 0; i < 10; i++) { - counter++; - - config.seed = rand(); - config.word_length = len; - config.word_count = count; - - res = fuzzy_test_trie_seed(config); - TEST_CHECK_(res == 0, - "Failed config, seed = %i, len = %i, count = %i, code = %i", config.seed, config.word_length, config.word_count, res); - } - } - } - TEST_MSG("fuzzy tests done = %i", counter); -} - -TEST_LIST = { - { "trie fuzzy", test_fuzzy }, - { NULL, NULL} -};