lander/tries/src/ternarytrie.c

248 lines
5.7 KiB
C
Raw Normal View History

2022-11-15 16:21:27 +01:00
#include "ternarytrie.h"
#include "ternarytrie_node.c"
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
typedef struct ttrie {
TernaryTrieNode *root;
size_t size;
} TernaryTrie;
/**
* Allocate and initialize an empty TernaryTrie
*
* @return pointer to the empty TernaryTrie
*/
2022-11-15 17:05:14 +01:00
TernaryTrie *ternarytrie_init() {
2022-11-15 16:21:27 +01:00
TernaryTrie *node = calloc(1, sizeof(TernaryTrie));
node->root = ttnode_init();
return node;
}
/**
* De-allocate a TernaryTree by freeing its entire underlying structure.
*
* @param trie trie to free
*/
2022-11-15 17:05:14 +01:00
void ternarytrie_free(TernaryTrie *trie) {
2022-11-15 16:21:27 +01:00
ttnode_free(trie->root);
free(trie);
}
typedef struct searchresult {
TernaryTrieNode *parent;
TernaryTrieNode *child;
} SearchResult;
SearchResult ternarytrie_search_node(TernaryTrie *trie, const char *string) {
SearchResult out = {NULL, NULL};
// Edge case for empty string
if (string[0] == DELIMITER) {
if (trie->root->type == 1) {
out.child = trie->root;
}
return out;
}
size_t i = 0;
TernaryTrieNode **node_ptr = &(trie->root);
TernaryTrieNode **child_ptr;
do {
child_ptr = ttnode_search(*node_ptr, string[i], false);
// We don't have to check whether *node_ptr is NULL, because if it was
// NULL, it wouldn't be in the binary tree.
if (child_ptr == NULL || *child_ptr == NULL) {
return out;
}
i++;
if (string[i] == DELIMITER || (*child_ptr)->type == 2) {
break;
}
node_ptr = child_ptr;
} while (1);
if ((*child_ptr)->type == 2) {
if (string[i] != DELIMITER &&
strcmp(string + i, (*child_ptr)->ptr.string) == 0) {
out.child = *child_ptr;
out.parent = *node_ptr;
}
}
// Here we know we've traversed through the entire string and have arrived at
// a node that isn't a full leaf
else if ((*child_ptr)->type == 1) {
out.child = *child_ptr;
out.parent = *node_ptr;
}
return out;
}
/**
* Returns whether the given string is present in the trie.
*
* @param trie trie to look in
* @param string string to look up
* @return true if the string is present in the trie, false otherwise
*/
2022-11-15 17:05:14 +01:00
bool ternarytrie_search(TernaryTrie *trie, const char *string) {
2022-11-15 16:21:27 +01:00
SearchResult res = ternarytrie_search_node(trie, string);
return res.child != NULL;
}
/**
* Add the given string to the TernaryTrie.
*
* @param trie trie to add string to
* @param string string to add
* @return true if the string wasn't present in the trie and thus added, false
* otherwise
*/
bool ternarytrie_add(TernaryTrie *trie, const char *string) {
// Edge case for empty string
if (string[0] == DELIMITER) {
if (trie->root->type == 0) {
trie->root->type = 1;
trie->size++;
return true;
}
return false;
}
size_t i = 0;
TernaryTrieNode **node_ptr = &(trie->root);
TernaryTrieNode **new_node_ptr;
do {
new_node_ptr = ttnode_search(*node_ptr, string[i], true);
// ttnode_search will only return NULL with create true if the node to look
// in represents a full leaf. Therefore, we split the node and restart the
// iteration.
if (new_node_ptr == NULL) {
// It's possible we've ended up in the full leaf node that represents this
// string
if (strcmp(string + i, (*node_ptr)->ptr.string) == 0) {
return false;
}
ttnode_split(*node_ptr);
continue;
}
node_ptr = new_node_ptr;
// The search function has added the character to the node
i++;
// The next node in the string's path doesn't exist yet, so we add it to the
// trie
if (*node_ptr == NULL) {
TernaryTrieNode *new_node = ttnode_init();
// If there's a remaining part of the string, we add it to the leaf
if (string[i] != DELIMITER) {
ttnode_set_string(new_node, string + i);
} else {
new_node->type = 1;
}
*node_ptr = new_node;
trie->size++;
return true;
}
} while (string[i] != DELIMITER);
// If we've arrived here, we've traversed through the entire string and have
// arrived at a node that already exists.
// The existing node is a full leaf, so we split it and make it
// represent our new string.
if ((*node_ptr)->type == 2) {
ttnode_split(*node_ptr);
}
// The string is already in the trie
else if ((*node_ptr)->type == 1) {
return false;
}
(*node_ptr)->type = 1;
trie->size++;
return true;
}
/**
* Remove the given string from a TernaryTrie.
*
* @param trie trie to remove string from
* @param string string to remove
* @return true if the string was in the trie and thus removed, false otherwise
*/
bool ternarytrie_remove(TernaryTrie *trie, const char *string) {
SearchResult res = ternarytrie_search_node(trie, string);
if (res.child == NULL) {
return false;
}
trie->size--;
if (res.parent != NULL) {
// We're removing a full leaf, so we calculate the offset of the character
// to remove from the parent
if (res.child->type == 2) {
size_t str_len = strlen(string);
size_t suffix_len = strlen(res.child->ptr.string);
ttnode_remove(res.parent, string[str_len - suffix_len - 1]);
}
// In the other case, the character to remove from the parent is the last
// character of the string
else if (res.child->size == 0) {
size_t i = 0;
while (string[i + 1] != DELIMITER) {
i++;
}
ttnode_remove(res.parent, string[i]);
} else {
res.child->type = 0;
return true;
}
ttnode_free(res.child);
}
// We're in the root here
else {
res.child->type = 0;
}
return true;
}
/**
* Return the current size of the given trie.
*
* @param trie trie to return size for
* @return size of the trie
*/
2022-11-15 17:05:14 +01:00
size_t ternarytrie_size(TernaryTrie *trie) { return trie->size; }