lander/trie/src/trie/trie.c

#include <pthread.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "trie.h"
#include "trie_entry.h"
#include "trie_node.h"

typedef struct ttrie {
  TrieNode *root;
  size_t size;
  char *file_path;
  pthread_rwlock_t lock;
} Trie;

TrieExitCode trie_add_no_lock(Trie *trie, const char *key, Entry *entry);

/**
 * Allocate and initialize an empty Trie
 *
 * @return pointer to the empty Trie
 */
TrieExitCode trie_init(Trie **trie_ptr, const char *file_path) {
  // Allocate & initialize trie
  Trie *trie = calloc(1, sizeof(Trie));
  trie->root = tnode_init();
  pthread_rwlock_init(&trie->lock, NULL);

  if (file_path == NULL) {
    trie->file_path = NULL;
    *trie_ptr = trie;
    return Ok;
  }

  trie->file_path = strdup(file_path);

  // Populate trie with data from file
  FILE *fp = fopen(file_path, "r");

  if (fp == NULL) {
    return FileError;
  }

  // We read in lines of at most 8192 characters (sounds like enough)
  char buffer[8192];
  EntryType type;
  Entry *entry;
  int i, j;
  TrieExitCode status;

  while (fgets(buffer, 8192, fp)) {
    i = 0;

    // Move index in buffer until we encounter first space character
    while (buffer[i] != ' ') {
      i++;
    }

    // Split the buffer into two strings, the key and the payload
    buffer[i] = '\0';

    type = entry_type_from_char(buffer[i + 1]);

    // Skip type character & its surrounding spaces
    j = i + 3;

    // Now remove the newline character
    while (buffer[j] != '\n') {
      j++;
    }

    buffer[j] = '\0';

    entry = entry_new(type, buffer + i + 3);
    status = trie_add_no_lock(trie, buffer, entry);

    if (status != Ok) {
      trie_free(trie);
      return status;
    }
  }

  fclose(fp);

  *trie_ptr = trie;

  return Ok;
}

/**
 * De-allocate a TernaryTree by freeing its entire underlying structure.
 *
 * @param trie trie to free
 */
void trie_free(Trie *trie) {
  tnode_free(trie->root);
  free(trie);
}

typedef struct searchresult {
  TrieNode *parent;
  TrieNode *child;
} SearchResult;

SearchResult trie_search_node_len(Trie *trie, const char *key, size_t key_len) {
  SearchResult out = {NULL, NULL};

  size_t i = 0;
  TrieNode **node_ptr = &(trie->root);
  TrieNode **child_ptr;

  do {
    child_ptr = tnode_search(*node_ptr, key[i], false);

    // We don't have to check whether *node_ptr is NULL, because if it was
    // NULL, it wouldn't be in the binary tree.
    if (child_ptr == NULL) {
      return out;
    }

    i++;

    if (memcmp((*child_ptr)->string, key + i, (*child_ptr)->string_len) != 0) {
      return out;
    }

    i += (*child_ptr)->string_len;

    if (i < key_len) {
      node_ptr = child_ptr;
    }
  } while (i < key_len);

  // At this point, we've either arrived at an empty child, or traversed through
  // the entire string. Therefore, all we have to do is check whether we're at
  // the end of the string and if node represents a string.
  if (i == key_len && (*child_ptr)->represents) {
    out.parent = *node_ptr;
    out.child = *child_ptr;
  }

  return out;
}

SearchResult trie_search_node(Trie *trie, const char *key) {
  return trie_search_node_len(trie, key, strlen(key));
}

/**
 * Returns whether the given string is present in the trie.
 *
 * @param trie trie to look in
 * @param string string to look up
 * @return true if the string is present in the trie, false otherwise
 */
TrieExitCode trie_search_len(Trie *trie, Entry **entry_ptr, const char *key,
                             size_t key_len) {
  SearchResult res = trie_search_node_len(trie, key, key_len);

  if (res.child == NULL) {
    return NotFound;
  }

  *entry_ptr = res.child->entry;

  return Ok;
}

TrieExitCode trie_search(Trie *trie, Entry **entry_ptr, const char *key) {
  return trie_search_len(trie, entry_ptr, key, strlen(key));
}

/**
 * Add the given string to the Trie.
 *
 * @param trie trie to add string to
 * @param string string to add
 * @return true if the string wasn't present in the trie and thus added, false
 * otherwise
 */
TrieExitCode trie_add_len_no_lock(Trie *trie, const char *key, size_t key_len,
                                  Entry *entry) {
  size_t i = 0;
  uint8_t offset;
  TrieNode **node_ptr = &(trie->root);
  TrieNode **child_node_ptr;
  TrieNode *child_node;

  do {
    offset = 0;
    child_node_ptr = tnode_search(*node_ptr, key[i], true);

    i++;

    // We've reached a NULL child, so we add the remaining part of the string
    // here
    if (*child_node_ptr == NULL) {
      child_node = tnode_init();

      while (offset < TRIE_MAX_SKIP_SIZE && i + offset < key_len) {
        offset++;
      }

      memcpy(child_node->string, key + i, offset);

      child_node->string_len = offset;
      *child_node_ptr = child_node;

      // If the remaining part of the string is still longer than the maximum
      // allowed skip length, we continue through the loop. The next iteration
      // will enter this if statement again, and perform the same loop, until
      // the string is fully added to the trie.
      if (i + offset < key_len) {
        node_ptr = child_node_ptr;
        i += offset;

        continue;
      }

      child_node->represents = true;
      child_node->entry = entry;

      trie->size++;
      return Ok;
    }

    while (offset < (*child_node_ptr)->string_len) {
      // String no longer aligns with edge, so we have to split
      if (key[i + offset] != (*child_node_ptr)->string[offset]) {
        TrieNode *split_node = tnode_init();
        child_node = *child_node_ptr;

        // New string of the split node is the prefix that we were able
        // to skip
        if (offset > 0) {
          memcpy(split_node->string, child_node->string, offset);
          split_node->string_len = offset;
        }

        // split_node replaces child_node as the child of node
        *child_node_ptr = split_node;
        TrieNode **new_node_ptr =
            tnode_search(split_node, child_node->string[offset], true);
        *new_node_ptr = child_node;

        // child_node has now become a child of split_node, so we update its
        // string accordingely by removing the skipped prefix + the one
        // character that's already stored by being a child of split_node
        /* char *old_string = child_node->string.ptr; */
        uint8_t new_skip_len = child_node->string_len - (offset + 1);

        if (new_skip_len > 0) {
          char old_string[TRIE_MAX_SKIP_SIZE];
          memcpy(old_string, child_node->string + offset + 1, new_skip_len);
          memcpy(child_node->string, old_string, new_skip_len);
        }

        child_node->string_len = new_skip_len;

        // The while loop will exit either way after this has happened, as
        // child_node is now split_node and split_node's len is already set to
        // offset.
        break;
      }

      offset++;
    }

    node_ptr = child_node_ptr;

    i += offset;
  } while (i < key_len);

  if ((*child_node_ptr)->represents) {
    return AlreadyPresent;
  }

  (*child_node_ptr)->represents = true;
  (*child_node_ptr)->entry = entry;
  trie->size++;
  return Ok;
}

TrieExitCode trie_add_no_lock(Trie *trie, const char *key, Entry *entry) {
  return trie_add_len_no_lock(trie, key, strlen(key), entry);
}

TrieExitCode trie_add_len(Trie *trie, const char *key, size_t key_len,
                          Entry *entry) {
  if (trie->file_path != NULL) {
    // Easiest way to make sure we don't add duplicate entries
    // We use an internal function that doesn't require a read lock, as we're
    // already inside a write lock
    if (trie_search_node_len(trie, key, key_len).child != NULL) {
      return AlreadyPresent;
    }

    FILE *fp = fopen(trie->file_path, "a");

    if (fp == NULL) {
      return FileError;
    }

    fputs(key, fp);
    fputs(" ", fp);
    fputc(entry_type_to_char(entry->type), fp);
    fputs(" ", fp);
    fputs(entry->string, fp);
    fputs("\n", fp);

    fclose(fp);
  }

  // This function *should* always return Ok. Otherwise, the function would've
  // exited because the string was found in the trie.
  return trie_add_len_no_lock(trie, key, key_len, entry);
}

TrieExitCode trie_add(Trie *trie, const char *key, Entry *entry) {
  return trie_add_len(trie, key, strlen(key), entry);
}

TrieExitCode trie_add_random(Trie *trie, char **key_ptr, Entry *entry,
                             bool secure) {
  // Generate random key
  bool ok = false;
  int key_length = secure ? RANDOM_KEY_LENGTH_LONG : RANDOM_KEY_LENGTH_SHORT;
  char *key = malloc(key_length + 1);
  key[key_length] = '\0';

  // We naively generate new keys until we find a key that isn't in the trie
  // yet. With charset_len ** RANDOM_KEY_LENGTH sufficiently large, this isn't a
  // problem, because the chances of collisions are extremely small.
  while (!ok) {
    for (int i = 0; i < key_length; i++) {
      key[i] = charset[rand() % charset_len];
    }

    ok = trie_search_node(trie, key).child == NULL;
  }

  TrieExitCode return_value = trie_add(trie, key, entry);

  if (return_value == Ok) {
    *key_ptr = key;
  } else {
    free(key);
  }

  return return_value;
}

/**
 * Remove the given string from a Trie.
 *
 * @param trie trie to remove string from
 * @param string string to remove
 * @return true if the string was in the trie and thus removed, false otherwise
 */
/* bool trie_remove(Trie *trie, const char *string) { */
/*   pthread_rwlock_wrlock(&trie->lock); */

/*   bool return_value = false; */

/*   SearchResult res = trie_search_node(trie, string); */

/*   if (res.child == NULL) { */
/*     goto end; */
/*   } */

/*   trie->size--; */
/*   return_value = true; */

/*   if (res.parent != NULL) { */
/*     // We're removing a full leaf, so we calculate the offset of the
 * character */
/*     // to remove from the parent */
/*     if (res.child->type == 2) { */
/*       size_t str_len = strlen(string); */
/*       size_t suffix_len = strlen(res.child->ptr.string); */

/*       tnode_remove(res.parent, string[str_len - suffix_len - 1]); */
/*     } */
/*     // In the other case, the character to remove from the parent is the last
 */
/*     // character of the string */
/*     else if (res.child->size == 0) { */
/*       size_t i = 0; */

/*       while (string[i + 1] != DELIMITER) { */
/*         i++; */
/*       } */

/*       tnode_remove(res.parent, string[i]); */
/*     } else { */
/*       res.child->type = 0; */

/*       goto end; */
/*     } */

/*     tnode_free(res.child); */
/*   } */
/*   // We're in the root here */
/*   else { */
/*     res.child->type = 0; */
/*   } */

/* end: */
/*   pthread_rwlock_unlock(&trie->lock); */

/*   return return_value; */
/* } */

/**
 * Return the current size of the given trie.
 *
 * @param trie trie to return size for
 * @return size of the trie
 */
size_t trie_size(Trie *trie) { return trie->size; }

int trie_rlock(Trie *trie) { return pthread_rwlock_rdlock(&trie->lock); }

int trie_wlock(Trie *trie) { return pthread_rwlock_wrlock(&trie->lock); }

int trie_unlock(Trie *trie) { return pthread_rwlock_unlock(&trie->lock); }