475 lines
11 KiB
C
475 lines
11 KiB
C
|
#include <pthread.h>
|
||
|
#include <stdint.h>
|
||
|
#include <stdio.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <string.h>
|
||
|
|
||
|
#include "trie.h"
|
||
|
#include "trie_node.c"
|
||
|
|
||
|
typedef struct ttrie {
|
||
|
TrieNode *root;
|
||
|
size_t size;
|
||
|
char *file_path;
|
||
|
pthread_rwlock_t lock;
|
||
|
} Trie;
|
||
|
|
||
|
/**
|
||
|
* Allocate and initialize an empty Trie
|
||
|
*
|
||
|
* @return pointer to the empty Trie
|
||
|
*/
|
||
|
Trie *trie_init() {
|
||
|
Trie *trie = calloc(1, sizeof(Trie));
|
||
|
trie->root = tnode_init();
|
||
|
pthread_rwlock_init(&trie->lock, NULL);
|
||
|
|
||
|
return trie;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* De-allocate a TernaryTree by freeing its entire underlying structure.
|
||
|
*
|
||
|
* @param trie trie to free
|
||
|
*/
|
||
|
void trie_free(Trie *trie) {
|
||
|
tnode_free(trie->root);
|
||
|
free(trie);
|
||
|
}
|
||
|
|
||
|
bool trie_add_no_lock(Trie *trie, const char *key, Entry *entry);
|
||
|
|
||
|
EntryType entry_type_from_char(char c) {
|
||
|
switch (c) {
|
||
|
case '0':
|
||
|
return Redirect;
|
||
|
case '1':
|
||
|
return Paste;
|
||
|
default:
|
||
|
return Unknown;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
char entry_type_to_char(EntryType et) {
|
||
|
switch (et) {
|
||
|
case Redirect:
|
||
|
return '0';
|
||
|
case Paste:
|
||
|
return '1';
|
||
|
default:
|
||
|
return '\0';
|
||
|
}
|
||
|
}
|
||
|
|
||
|
Entry *entry_new(EntryType type, const char *string) {
|
||
|
Entry *entry = malloc(sizeof(Entry));
|
||
|
entry->type = type;
|
||
|
|
||
|
if (string != NULL) {
|
||
|
entry->string = strdup(string);
|
||
|
} else {
|
||
|
entry->string = NULL;
|
||
|
}
|
||
|
|
||
|
return entry;
|
||
|
}
|
||
|
|
||
|
int trie_populate(Trie *trie, const char *file_path) {
|
||
|
trie->file_path = strdup(file_path);
|
||
|
|
||
|
FILE *fp = fopen(file_path, "r");
|
||
|
|
||
|
// TODO properly handle this
|
||
|
if (fp == NULL) {
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
// We read in lines of at most 8192 characters (sounds like enough)
|
||
|
char buffer[8192];
|
||
|
EntryType type;
|
||
|
Entry *entry;
|
||
|
char *string;
|
||
|
int i, j;
|
||
|
int entries = 0;
|
||
|
|
||
|
while (fgets(buffer, 8192, fp)) {
|
||
|
i = 0;
|
||
|
|
||
|
// Move index in buffer until we encounter first space character
|
||
|
while (buffer[i] != ' ') {
|
||
|
i++;
|
||
|
}
|
||
|
|
||
|
// Split the buffer into two strings, the key and the payload
|
||
|
buffer[i] = '\0';
|
||
|
|
||
|
type = entry_type_from_char(buffer[i + 1]);
|
||
|
|
||
|
// Skip type character & its surrounding spaces
|
||
|
j = i + 3;
|
||
|
|
||
|
// Now remove the newline character
|
||
|
while (buffer[j] != '\n') {
|
||
|
j++;
|
||
|
}
|
||
|
|
||
|
buffer[j] = '\0';
|
||
|
|
||
|
entry = entry_new(type, buffer + i + 3);
|
||
|
trie_add_no_lock(trie, buffer, entry);
|
||
|
|
||
|
entries++;
|
||
|
}
|
||
|
|
||
|
fclose(fp);
|
||
|
|
||
|
return entries;
|
||
|
}
|
||
|
|
||
|
typedef struct searchresult {
|
||
|
TrieNode *parent;
|
||
|
TrieNode *child;
|
||
|
} SearchResult;
|
||
|
|
||
|
SearchResult trie_search_node(Trie *trie, const char *key) {
|
||
|
SearchResult out = {NULL, NULL};
|
||
|
|
||
|
size_t i = 0;
|
||
|
size_t offset;
|
||
|
TrieNode **node_ptr = &(trie->root);
|
||
|
TrieNode **child_ptr;
|
||
|
|
||
|
do {
|
||
|
child_ptr = tnode_search(*node_ptr, key[i], false);
|
||
|
|
||
|
// We don't have to check whether *node_ptr is NULL, because if it was
|
||
|
// NULL, it wouldn't be in the binary tree.
|
||
|
if (child_ptr == NULL) {
|
||
|
return out;
|
||
|
}
|
||
|
|
||
|
i++;
|
||
|
|
||
|
if (memcmp((*child_ptr)->string, key + i, (*child_ptr)->string_len) != 0) {
|
||
|
return out;
|
||
|
}
|
||
|
|
||
|
i += (*child_ptr)->string_len;
|
||
|
|
||
|
/* offset = 0; */
|
||
|
|
||
|
/* // We iterate over each character on the edge and compare it to the string. */
|
||
|
/* while (offset < (*child_ptr)->string_len) { */
|
||
|
/* // Our string ends in the middle of an edge, so it's definitely not in */
|
||
|
/* // the trie. */
|
||
|
/* if (key[i + offset] == DELIMITER) { */
|
||
|
/* return out; */
|
||
|
/* } */
|
||
|
|
||
|
/* // We compare each character with the characters in the skipped */
|
||
|
/* // substring. If they don't match, we know the string isn't in the */
|
||
|
/* // trie. */
|
||
|
/* if (key[i + offset] != ((*child_ptr)->string[offset])) { */
|
||
|
/* return out; */
|
||
|
/* } */
|
||
|
|
||
|
/* offset++; */
|
||
|
/* } */
|
||
|
|
||
|
/* i += offset; */
|
||
|
|
||
|
if (key[i] != DELIMITER) {
|
||
|
node_ptr = child_ptr;
|
||
|
}
|
||
|
} while (key[i] != DELIMITER);
|
||
|
|
||
|
// At this point, we've either arrived at an empty child, or traversed through
|
||
|
// the entire string. Therefore, all we have to do is check whether we're at
|
||
|
// the end of the string and if node represents a string.
|
||
|
if (key[i] == DELIMITER && (*child_ptr)->represents) {
|
||
|
out.parent = *node_ptr;
|
||
|
out.child = *child_ptr;
|
||
|
}
|
||
|
|
||
|
return out;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Returns whether the given string is present in the trie.
|
||
|
*
|
||
|
* @param trie trie to look in
|
||
|
* @param string string to look up
|
||
|
* @return true if the string is present in the trie, false otherwise
|
||
|
*/
|
||
|
Entry *trie_search(Trie *trie, const char *key) {
|
||
|
pthread_rwlock_rdlock(&trie->lock);
|
||
|
|
||
|
SearchResult res = trie_search_node(trie, key);
|
||
|
|
||
|
Entry *return_value = NULL;
|
||
|
|
||
|
if (res.child != NULL) {
|
||
|
return_value = res.child->entry;
|
||
|
}
|
||
|
|
||
|
pthread_rwlock_unlock(&trie->lock);
|
||
|
|
||
|
return return_value;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Add the given string to the Trie.
|
||
|
*
|
||
|
* @param trie trie to add string to
|
||
|
* @param string string to add
|
||
|
* @return true if the string wasn't present in the trie and thus added, false
|
||
|
* otherwise
|
||
|
*/
|
||
|
bool trie_add_no_lock(Trie *trie, const char *string, Entry *entry) {
|
||
|
size_t i = 0;
|
||
|
uint8_t offset;
|
||
|
TrieNode **node_ptr = &(trie->root);
|
||
|
TrieNode **child_node_ptr;
|
||
|
TrieNode *child_node;
|
||
|
|
||
|
do {
|
||
|
offset = 0;
|
||
|
child_node_ptr = tnode_search(*node_ptr, string[i], true);
|
||
|
|
||
|
i++;
|
||
|
|
||
|
// We've reached a NULL child, so we add the remaining part of the string
|
||
|
// here
|
||
|
if (*child_node_ptr == NULL) {
|
||
|
child_node = tnode_init();
|
||
|
|
||
|
while (offset < TRIE_MAX_SKIP_SIZE &&
|
||
|
string[i + offset] != DELIMITER) {
|
||
|
offset++;
|
||
|
}
|
||
|
|
||
|
memcpy(child_node->string, string + i, offset);
|
||
|
|
||
|
child_node->string_len = offset;
|
||
|
*child_node_ptr = child_node;
|
||
|
|
||
|
// If the remaining part of the string is still longer than the maximum
|
||
|
// allowed skip length, we continue through the loop. The next iteration
|
||
|
// will enter this if statement again, and perform the same loop, until
|
||
|
// the string is fully added to the trie.
|
||
|
if (string[i + offset] != DELIMITER) {
|
||
|
node_ptr = child_node_ptr;
|
||
|
i += offset;
|
||
|
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
child_node->represents = true;
|
||
|
child_node->entry = entry;
|
||
|
|
||
|
trie->size++;
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
while (offset < (*child_node_ptr)->string_len) {
|
||
|
// String no longer aligns with edge, so we have to split
|
||
|
if (string[i + offset] != (*child_node_ptr)->string[offset]) {
|
||
|
TrieNode *split_node = tnode_init();
|
||
|
child_node = *child_node_ptr;
|
||
|
|
||
|
// New string of the split node is the prefix that we were able
|
||
|
// to skip
|
||
|
if (offset > 0) {
|
||
|
memcpy(split_node->string, child_node->string, offset);
|
||
|
split_node->string_len = offset;
|
||
|
}
|
||
|
|
||
|
// split_node replaces child_node as the child of node
|
||
|
*child_node_ptr = split_node;
|
||
|
TrieNode **new_node_ptr =
|
||
|
tnode_search(split_node, child_node->string[offset], true);
|
||
|
*new_node_ptr = child_node;
|
||
|
|
||
|
// child_node has now become a child of split_node, so we update its
|
||
|
// string accordingely by removing the skipped prefix + the one
|
||
|
// character that's already stored by being a child of split_node
|
||
|
/* char *old_string = child_node->string.ptr; */
|
||
|
uint8_t new_skip_len = child_node->string_len - (offset + 1);
|
||
|
|
||
|
if (new_skip_len > 0) {
|
||
|
char old_string[TRIE_MAX_SKIP_SIZE];
|
||
|
memcpy(old_string, child_node->string + offset + 1, new_skip_len);
|
||
|
memcpy(child_node->string, old_string, new_skip_len);
|
||
|
}
|
||
|
|
||
|
child_node->string_len = new_skip_len;
|
||
|
|
||
|
// The while loop will exit either way after this has happened, as
|
||
|
// child_node is now split_node and split_node's len is already set to
|
||
|
// offset.
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
offset++;
|
||
|
}
|
||
|
|
||
|
node_ptr = child_node_ptr;
|
||
|
|
||
|
i += offset;
|
||
|
} while (string[i] != DELIMITER);
|
||
|
|
||
|
if ((*child_node_ptr)->represents) {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
(*child_node_ptr)->represents = true;
|
||
|
trie->size++;
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
bool trie_add_persistent(Trie *trie, const char *key, Entry *entry) {
|
||
|
bool return_value = false;
|
||
|
|
||
|
if (trie->file_path != NULL) {
|
||
|
// Easiest way to make sure we don't add duplicate entries
|
||
|
// We use an internal function that doesn't require a read lock, as we're
|
||
|
// already inside a write lock
|
||
|
if (trie_search_node(trie, key).child != NULL) {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
FILE *fp = fopen(trie->file_path, "a");
|
||
|
|
||
|
if (fp == NULL) {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
fputs(key, fp);
|
||
|
fputs(" ", fp);
|
||
|
fputc(entry_type_to_char(entry->type), fp);
|
||
|
fputs(" ", fp);
|
||
|
fputs(entry->string, fp);
|
||
|
fputs("\n", fp);
|
||
|
|
||
|
fclose(fp);
|
||
|
}
|
||
|
|
||
|
// This function *should* always return true. Otherwise, the function would've
|
||
|
// exited because the string was found in the trie.
|
||
|
return trie_add_no_lock(trie, key, entry);
|
||
|
}
|
||
|
|
||
|
bool trie_add(Trie *trie, const char *key, Entry *entry) {
|
||
|
pthread_rwlock_wrlock(&trie->lock);
|
||
|
|
||
|
bool return_value = trie_add_persistent(trie, key, entry);
|
||
|
|
||
|
pthread_rwlock_unlock(&trie->lock);
|
||
|
|
||
|
return return_value;
|
||
|
}
|
||
|
|
||
|
char *trie_add_random(Trie *trie, Entry *entry, bool secure) {
|
||
|
pthread_rwlock_wrlock(&trie->lock);
|
||
|
|
||
|
// Generate random key
|
||
|
bool ok = false;
|
||
|
int key_length = secure ? RANDOM_KEY_LENGTH_LONG : RANDOM_KEY_LENGTH_SHORT;
|
||
|
char *key = malloc(key_length + 1);
|
||
|
key[key_length] = '\0';
|
||
|
|
||
|
// We naively generate new keys until we find a key that isn't in the trie
|
||
|
// yet. With charset_len ** RANDOM_KEY_LENGTH sufficiently large, this isn't a
|
||
|
// problem, because the chances of collisions are extremely small.
|
||
|
while (!ok) {
|
||
|
for (int i = 0; i < key_length; i++) {
|
||
|
key[i] = charset[rand() % charset_len];
|
||
|
}
|
||
|
|
||
|
ok = trie_search_node(trie, key).child == NULL;
|
||
|
}
|
||
|
|
||
|
bool res = trie_add_persistent(trie, key, entry);
|
||
|
char *return_value;
|
||
|
|
||
|
if (res) {
|
||
|
return_value = key;
|
||
|
} else {
|
||
|
return_value = NULL;
|
||
|
free(key);
|
||
|
}
|
||
|
|
||
|
pthread_rwlock_unlock(&trie->lock);
|
||
|
|
||
|
return return_value;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Remove the given string from a Trie.
|
||
|
*
|
||
|
* @param trie trie to remove string from
|
||
|
* @param string string to remove
|
||
|
* @return true if the string was in the trie and thus removed, false otherwise
|
||
|
*/
|
||
|
/* bool trie_remove(Trie *trie, const char *string) { */
|
||
|
/* pthread_rwlock_wrlock(&trie->lock); */
|
||
|
|
||
|
/* bool return_value = false; */
|
||
|
|
||
|
/* SearchResult res = trie_search_node(trie, string); */
|
||
|
|
||
|
/* if (res.child == NULL) { */
|
||
|
/* goto end; */
|
||
|
/* } */
|
||
|
|
||
|
/* trie->size--; */
|
||
|
/* return_value = true; */
|
||
|
|
||
|
/* if (res.parent != NULL) { */
|
||
|
/* // We're removing a full leaf, so we calculate the offset of the
|
||
|
* character */
|
||
|
/* // to remove from the parent */
|
||
|
/* if (res.child->type == 2) { */
|
||
|
/* size_t str_len = strlen(string); */
|
||
|
/* size_t suffix_len = strlen(res.child->ptr.string); */
|
||
|
|
||
|
/* tnode_remove(res.parent, string[str_len - suffix_len - 1]); */
|
||
|
/* } */
|
||
|
/* // In the other case, the character to remove from the parent is the last
|
||
|
*/
|
||
|
/* // character of the string */
|
||
|
/* else if (res.child->size == 0) { */
|
||
|
/* size_t i = 0; */
|
||
|
|
||
|
/* while (string[i + 1] != DELIMITER) { */
|
||
|
/* i++; */
|
||
|
/* } */
|
||
|
|
||
|
/* tnode_remove(res.parent, string[i]); */
|
||
|
/* } else { */
|
||
|
/* res.child->type = 0; */
|
||
|
|
||
|
/* goto end; */
|
||
|
/* } */
|
||
|
|
||
|
/* tnode_free(res.child); */
|
||
|
/* } */
|
||
|
/* // We're in the root here */
|
||
|
/* else { */
|
||
|
/* res.child->type = 0; */
|
||
|
/* } */
|
||
|
|
||
|
/* end: */
|
||
|
/* pthread_rwlock_unlock(&trie->lock); */
|
||
|
|
||
|
/* return return_value; */
|
||
|
/* } */
|
||
|
|
||
|
/**
|
||
|
* Return the current size of the given trie.
|
||
|
*
|
||
|
* @param trie trie to return size for
|
||
|
* @return size of the trie
|
||
|
*/
|
||
|
size_t trie_size(Trie *trie) { return trie->size; }
|