feat: add some more tests; some optimisations

trie-skips
Jef Roosens 2022-11-29 20:59:28 +01:00
parent e1e3d7cb46
commit f9a5fc14e5
Signed by: Jef Roosens
GPG Key ID: B75D4F293C7052DB
7 changed files with 186 additions and 24 deletions

View File

@ -84,6 +84,8 @@ Entry *trie_search(Trie *trie, const char *key);
*/ */
bool trie_add(Trie *trie, const char *key, Entry *entry); bool trie_add(Trie *trie, const char *key, Entry *entry);
bool trie_add_no_lock(Trie *trie, const char *key, Entry *entry);
/** /**
* Add an entry by generating a random string as the key. * Add an entry by generating a random string as the key.
* *

View File

@ -102,7 +102,7 @@ int main() {
exit(1); exit(1);
} else { } else {
std::cout << "Added " << count << " entries to trie." << std::endl; std::cout << "Added " << count << " (" << trie_size(trie) << ") entries to trie." << std::endl;
} }
// Create pastes directory if not present // Create pastes directory if not present

View File

@ -149,27 +149,34 @@ SearchResult trie_search_node(Trie *trie, const char *key) {
} }
i++; i++;
offset = 0;
// We iterate over each character on the edge and compare it to the string. if (memcmp((*child_ptr)->string, key + i, (*child_ptr)->string_len) != 0) {
while (offset < (*child_ptr)->string_len) {
// Our string ends in the middle of an edge, so it's definitely not in
// the trie.
if (key[i + offset] == DELIMITER) {
return out; return out;
} }
// We compare each character with the characters in the skipped i += (*child_ptr)->string_len;
// substring. If they don't match, we know the string isn't in the
// trie.
if (key[i + offset] != ((*child_ptr)->string[offset])) {
return out;
}
offset++; /* offset = 0; */
}
i += offset; /* // We iterate over each character on the edge and compare it to the string. */
/* while (offset < (*child_ptr)->string_len) { */
/* // Our string ends in the middle of an edge, so it's definitely not in */
/* // the trie. */
/* if (key[i + offset] == DELIMITER) { */
/* return out; */
/* } */
/* // We compare each character with the characters in the skipped */
/* // substring. If they don't match, we know the string isn't in the */
/* // trie. */
/* if (key[i + offset] != ((*child_ptr)->string[offset])) { */
/* return out; */
/* } */
/* offset++; */
/* } */
/* i += offset; */
if (key[i] != DELIMITER) { if (key[i] != DELIMITER) {
node_ptr = child_ptr; node_ptr = child_ptr;
@ -236,10 +243,11 @@ bool trie_add_no_lock(Trie *trie, const char *string, Entry *entry) {
while (offset < TRIE_MAX_SKIP_SIZE && while (offset < TRIE_MAX_SKIP_SIZE &&
string[i + 1 + offset] != DELIMITER) { string[i + 1 + offset] != DELIMITER) {
child_node->string[offset] = string[i + 1 + offset];
offset++; offset++;
} }
memcpy(child_node->string, string + i + 1, offset);
child_node->string_len = offset; child_node->string_len = offset;
*child_node_ptr = child_node; *child_node_ptr = child_node;

View File

@ -32,11 +32,12 @@ typedef struct tnode {
TrieInnerNode *tree; TrieInnerNode *tree;
uint8_t tree_size; uint8_t tree_size;
// Skips are at most 8 characters, and are stored in the nodes // Skips are at most TRIE_MAX_SKIP_SIZE characters, and are stored in the
// nodes
char string[TRIE_MAX_SKIP_SIZE]; char string[TRIE_MAX_SKIP_SIZE];
uint8_t string_len : 4; uint8_t string_len;
bool represents : 1; bool represents;
} TrieNode; } TrieNode;
// Required for recursively freeing tree structure // Required for recursively freeing tree structure

View File

@ -1,4 +1,6 @@
add_compile_options(-Wno-incompatible-pointer-types) add_compile_options(-Wno-incompatible-pointer-types)
add_executable(test_trie test_trie.c ../src/trie/trie.c)
add_test(NAME test_trie COMMAND test_trie)
add_executable(test_trie_fuzzy test_trie_fuzzy.c ../src/trie/trie.c) add_executable(test_trie_fuzzy test_trie_fuzzy.c ../src/trie/trie.c)
add_test(NAME test_trie_fuzzy COMMAND test_trie_fuzzy) add_test(NAME test_trie_fuzzy COMMAND test_trie_fuzzy)

149
test/test_trie.c 100644
View File

@ -0,0 +1,149 @@
#include "test.h"
#include "trie.h"
#include "fuzzy.h"
#define TEST_SIZE(ct, size) \
TEST_CHECK(trie_size(ct) == size); \
TEST_MSG("Size: %zu", trie_size(ct))
\
void test_init() {
Trie* ct = trie_init();
TEST_CHECK(ct != NULL);
TEST_SIZE(ct, 0);
trie_free(ct);
}
void test_add_one() {
Trie* ct = trie_init();
TEST_CHECK(ct != NULL);
Entry *entry = entry_new(Redirect, "");
const char* string = "this is a test";
TEST_CHECK(trie_add(ct, string, entry));
TEST_CHECK(trie_search(ct, string) == entry);
TEST_SIZE(ct, 1);
trie_free(ct);
}
void test_search_not_present() {
Trie* ct = trie_init();
TEST_CHECK(ct != NULL);
TEST_CHECK(trie_add(ct, "this string exists", NULL));
TEST_CHECK(trie_search(ct, "this string does not exist") == NULL);
trie_free(ct);
}
void test_add_more() {
Trie* ct = trie_init();
TEST_CHECK(ct != NULL);
const char* one = "one";
const char* two = "two";
const char* twenty = "twenty";
const char* twentytwo = "twentytwo";
Entry *entry = entry_new(Redirect, "");
TEST_CHECK(trie_add(ct, one, entry));
TEST_CHECK(trie_add(ct, two, entry));
TEST_CHECK(trie_add(ct, twenty, entry));
TEST_CHECK(trie_add(ct, twentytwo, entry));
TEST_SIZE(ct, 4);
TEST_CHECK(trie_search(ct, one) == entry);
TEST_CHECK(trie_search(ct, two) == entry);
TEST_CHECK(trie_search(ct, twenty) == entry);
TEST_CHECK(trie_search(ct, twentytwo) == entry);
TEST_CHECK(!trie_add(ct, one, NULL));
TEST_CHECK(!trie_add(ct, two, NULL));
TEST_CHECK(!trie_add(ct, twenty, NULL));
TEST_CHECK(!trie_add(ct, twentytwo, NULL));
trie_free(ct);
}
/* void test_remove_one() { */
/* Trie* ct = trie_init(); */
/* TEST_CHECK(ct != NULL); */
/* const char* string = "this is a test"; */
/* TEST_CHECK(trie_add(ct, string, NULL)); */
/* TEST_SIZE(ct, 1); */
/* TEST_CHECK(trie_remove(ct, string)); */
/* TEST_SIZE(ct, 0); */
/* trie_free(ct); */
/* } */
/* void test_remove_more() { */
/* Trie* ct = trie_init(); */
/* TEST_CHECK(ct != NULL); */
/* const char* one = "one"; */
/* const char* two = "two"; */
/* const char* twenty = "twenty"; */
/* const char* twentytwo = "twentytwo"; */
/* TEST_CHECK(trie_add(ct, one, NULL)); */
/* TEST_CHECK(trie_add(ct, two, NULL)); */
/* TEST_CHECK(trie_add(ct, twenty, NULL)); */
/* TEST_CHECK(trie_add(ct, twentytwo, NULL)); */
/* TEST_SIZE(ct, 4); */
/* TEST_CHECK(trie_remove(ct, one)); */
/* TEST_CHECK(trie_remove(ct, two)); */
/* TEST_CHECK(trie_remove(ct, twenty)); */
/* TEST_CHECK(trie_remove(ct, twentytwo)); */
/* TEST_SIZE(ct, 0); */
/* trie_free(ct); */
/* } */
/* void test_remove_not_present() { */
/* Trie* ct = trie_init(); */
/* TEST_CHECK(ct != NULL); */
/* TEST_CHECK(trie_add(ct, "this string exists", NULL)); */
/* TEST_CHECK(!trie_remove(ct, "this string does not exist")); */
/* trie_free(ct); */
/* } */
// Test seeds that are known to fail so we don't get regressions
void test_fuzzy_set() {
FuzzyConfig configs[] = {
{ 403318210, 5, 500},
{ 588218406, 16, 460},
{ 297512224, 21, 500},
{ 403318210, 5, 500}
};
int count = sizeof(configs) / sizeof(FuzzyConfig);
int res;
for (int i = 0; i < count; i++) {
res = fuzzy_test_trie_seed(configs[i], trie_init,
trie_free, trie_add_no_lock,
NULL, trie_size);
TEST_CHECK_(res == 0,
"Failed config, seed = %i, len = %i, count = %i, code=%i", configs[i].seed, configs[i].word_length, configs[i].word_count, res);
}
}
TEST_LIST = {
{"trie init",test_init },
{ "trie add one",test_add_one },
{ "trie add more",test_add_more },
{ "trie search not present",test_search_not_present},
/* { "trie remove one",test_remove_one }, */
/* { "trie remove more",test_remove_more }, */
/* { "trie remove not present",test_remove_not_present}, */
{ "trie fuzzy set", test_fuzzy_set },
{ NULL, NULL}
};

View File

@ -20,7 +20,7 @@ void test_fuzzy() {
config.word_count = count; config.word_count = count;
res = fuzzy_test_trie_seed(config, trie_init, res = fuzzy_test_trie_seed(config, trie_init,
trie_free, trie_add, trie_free, trie_add_no_lock,
NULL, trie_size); NULL, trie_size);
TEST_CHECK_(res == 0, TEST_CHECK_(res == 0,
"Failed config, seed = %i, len = %i, count = %i, code = %i", config.seed, config.word_length, config.word_count, res); "Failed config, seed = %i, len = %i, count = %i, code = %i", config.seed, config.word_length, config.word_count, res);