2023-10-28 15:48:28 +02:00
|
|
|
#include <pthread.h>
|
2023-10-25 10:57:45 +02:00
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
#include "lsm.h"
|
2023-10-20 10:41:53 +02:00
|
|
|
#include "lsm/store.h"
|
|
|
|
#include "lsm/store_internal.h"
|
2023-10-28 15:48:28 +02:00
|
|
|
#include "lsm/trie.h"
|
2023-10-20 10:41:53 +02:00
|
|
|
|
2023-10-25 10:57:45 +02:00
|
|
|
lsm_error lsm_store_init(lsm_store **ptr) {
|
|
|
|
lsm_store *store = calloc(1, sizeof(lsm_store));
|
|
|
|
|
|
|
|
if (store == NULL) {
|
|
|
|
return lsm_error_failed_alloc;
|
|
|
|
}
|
|
|
|
|
|
|
|
lsm_error res = lsm_trie_init(&store->trie);
|
|
|
|
|
|
|
|
if (res != lsm_error_ok) {
|
|
|
|
free(store);
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
*ptr = store;
|
|
|
|
|
|
|
|
return lsm_error_ok;
|
|
|
|
}
|
|
|
|
|
2023-10-29 12:19:59 +01:00
|
|
|
lsm_error lsm_store_load(lsm_store **ptr, lsm_str *db_path,
|
2023-10-28 15:48:28 +02:00
|
|
|
lsm_str *data_path) {
|
2023-10-25 10:57:45 +02:00
|
|
|
lsm_store *store;
|
|
|
|
LSM_RES(lsm_store_init(&store));
|
|
|
|
|
|
|
|
// TODO implement all of reading the db file
|
|
|
|
|
|
|
|
store->db_path = db_path;
|
|
|
|
store->data_path = data_path;
|
|
|
|
|
|
|
|
*ptr = store;
|
|
|
|
|
|
|
|
return lsm_error_ok;
|
|
|
|
}
|
|
|
|
|
2023-10-29 12:19:59 +01:00
|
|
|
lsm_error lsm_store_open_read(lsm_entry_handle **out, lsm_store *store,
|
|
|
|
lsm_str *key) {
|
2023-10-28 15:48:28 +02:00
|
|
|
lsm_entry_wrapper *wrapper;
|
|
|
|
|
|
|
|
LSM_RES(lsm_trie_search((void **)&wrapper, store->trie, key));
|
|
|
|
|
2023-10-29 12:19:59 +01:00
|
|
|
// Try to get a read lock on the entry's lock
|
2023-10-28 15:48:28 +02:00
|
|
|
if (pthread_rwlock_tryrdlock(&wrapper->lock) != 0) {
|
|
|
|
return lsm_error_lock_busy;
|
|
|
|
}
|
|
|
|
|
|
|
|
lsm_entry *entry = wrapper->entry;
|
|
|
|
|
|
|
|
// While the trie's data field will never be NULL, the actual entry pointer
|
|
|
|
// might be
|
|
|
|
if (entry == NULL) {
|
|
|
|
pthread_rwlock_unlock(&wrapper->lock);
|
|
|
|
|
|
|
|
return lsm_error_not_found;
|
|
|
|
}
|
|
|
|
|
2023-10-29 12:19:59 +01:00
|
|
|
lsm_entry_handle *handle;
|
|
|
|
lsm_error res = lsm_entry_handle_init(&handle);
|
|
|
|
|
|
|
|
if (res != lsm_error_ok) {
|
|
|
|
pthread_rwlock_unlock(&wrapper->lock);
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2023-10-28 15:48:28 +02:00
|
|
|
// Open a new file descriptor if needed
|
2023-10-29 12:19:59 +01:00
|
|
|
if (entry->data.on_disk) {
|
2023-10-28 15:48:28 +02:00
|
|
|
char path[store->data_path->len + entry->key->len + 2];
|
|
|
|
sprintf(path, "%s/%s", lsm_str_ptr(store->data_path),
|
|
|
|
lsm_str_ptr(entry->key));
|
|
|
|
|
|
|
|
FILE *f = fopen(path, "rb");
|
|
|
|
|
|
|
|
if (f == NULL) {
|
2023-10-29 12:19:59 +01:00
|
|
|
free(handle);
|
|
|
|
|
|
|
|
return lsm_error_failed_io;
|
|
|
|
}
|
|
|
|
|
|
|
|
handle->f = f;
|
|
|
|
}
|
|
|
|
|
|
|
|
handle->wrapper = wrapper;
|
|
|
|
*out = handle;
|
|
|
|
|
|
|
|
return lsm_error_ok;
|
|
|
|
}
|
|
|
|
|
|
|
|
lsm_error lsm_store_open_write(lsm_entry_handle **out, lsm_store *store,
|
|
|
|
lsm_str *key) {
|
|
|
|
lsm_entry_wrapper *wrapper;
|
|
|
|
|
|
|
|
LSM_RES(lsm_trie_search((void **)&wrapper, store->trie, key));
|
|
|
|
|
|
|
|
// Try to get a write lock on the entry's lock
|
|
|
|
// TODO make this timeout to not block
|
|
|
|
if (pthread_rwlock_wrlock(&wrapper->lock) != 0) {
|
|
|
|
return lsm_error_lock_busy;
|
|
|
|
}
|
|
|
|
|
|
|
|
lsm_entry *entry = wrapper->entry;
|
|
|
|
|
|
|
|
// While the trie's data field will never be NULL, the actual entry pointer
|
|
|
|
// might be
|
|
|
|
if (entry == NULL) {
|
|
|
|
pthread_rwlock_unlock(&wrapper->lock);
|
|
|
|
|
|
|
|
return lsm_error_not_found;
|
|
|
|
}
|
|
|
|
|
|
|
|
lsm_entry_handle *handle;
|
|
|
|
lsm_error res = lsm_entry_handle_init(&handle);
|
|
|
|
|
|
|
|
if (res != lsm_error_ok) {
|
|
|
|
pthread_rwlock_unlock(&wrapper->lock);
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Open a new file descriptor if needed
|
|
|
|
if (entry->data.on_disk) {
|
|
|
|
char path[store->data_path->len + entry->key->len + 2];
|
|
|
|
sprintf(path, "%s/%s", lsm_str_ptr(store->data_path),
|
|
|
|
lsm_str_ptr(entry->key));
|
|
|
|
|
2023-10-29 12:33:07 +01:00
|
|
|
FILE *f = fopen(path, "ab");
|
2023-10-29 12:19:59 +01:00
|
|
|
|
|
|
|
if (f == NULL) {
|
|
|
|
free(handle);
|
|
|
|
|
2023-10-28 15:48:28 +02:00
|
|
|
return lsm_error_failed_io;
|
|
|
|
}
|
|
|
|
|
2023-10-29 12:19:59 +01:00
|
|
|
handle->f = f;
|
2023-10-28 15:48:28 +02:00
|
|
|
}
|
|
|
|
|
2023-10-29 12:19:59 +01:00
|
|
|
handle->wrapper = wrapper;
|
|
|
|
*out = handle;
|
|
|
|
|
2023-10-28 15:48:28 +02:00
|
|
|
return lsm_error_ok;
|
2023-10-25 10:57:45 +02:00
|
|
|
}
|
|
|
|
|
2023-10-29 12:19:59 +01:00
|
|
|
lsm_error lsm_store_insert(lsm_entry_handle **out, lsm_store *store,
|
|
|
|
lsm_str *key) {
|
|
|
|
// TODO what happens when two inserts to the same key happen at the same time?
|
2023-10-28 15:48:28 +02:00
|
|
|
lsm_entry_wrapper *wrapper;
|
|
|
|
LSM_RES(lsm_entry_wrapper_init(&wrapper));
|
2023-10-29 12:19:59 +01:00
|
|
|
pthread_rwlock_wrlock(&wrapper->lock);
|
|
|
|
|
|
|
|
lsm_error res = lsm_trie_insert(store->trie, key, wrapper);
|
|
|
|
|
|
|
|
// Check if entry isn't already present in advance
|
|
|
|
if (res != lsm_error_ok) {
|
|
|
|
lsm_entry_wrapper_free(wrapper);
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
2023-10-25 10:57:45 +02:00
|
|
|
|
2023-10-28 15:48:28 +02:00
|
|
|
lsm_entry *entry;
|
2023-10-25 10:57:45 +02:00
|
|
|
LSM_RES(lsm_entry_init(&entry));
|
|
|
|
|
|
|
|
entry->key = key;
|
2023-10-28 15:48:28 +02:00
|
|
|
wrapper->entry = entry;
|
|
|
|
|
2023-10-29 12:19:59 +01:00
|
|
|
lsm_entry_handle *handle;
|
|
|
|
LSM_RES(lsm_entry_handle_init(&handle));
|
|
|
|
|
|
|
|
// No need to set the handle's file, as the entry doesn't have any data yet
|
|
|
|
handle->wrapper = wrapper;
|
2023-10-28 15:48:28 +02:00
|
|
|
|
2023-10-29 12:19:59 +01:00
|
|
|
*out = handle;
|
2023-10-25 10:57:45 +02:00
|
|
|
|
|
|
|
return lsm_error_ok;
|
|
|
|
}
|
|
|
|
|
2023-10-29 12:19:59 +01:00
|
|
|
lsm_error lsm_entry_data_append(lsm_store *store, lsm_entry_handle *handle,
|
|
|
|
lsm_str *data) {
|
2023-10-29 12:33:07 +01:00
|
|
|
if (lsm_str_len(data) == 0) {
|
|
|
|
return lsm_error_ok;
|
|
|
|
}
|
|
|
|
|
2023-10-29 12:19:59 +01:00
|
|
|
lsm_entry *entry = handle->wrapper->entry;
|
|
|
|
|
2023-10-25 10:57:45 +02:00
|
|
|
uint64_t new_len = entry->data.len + lsm_str_len(data);
|
2023-10-28 15:48:28 +02:00
|
|
|
const char *data_s = lsm_str_ptr(data);
|
2023-10-25 10:57:45 +02:00
|
|
|
|
|
|
|
// Data is in memory and still fits -> keep it in memory
|
|
|
|
if ((new_len <= LSM_STORE_DISK_THRESHOLD) && (!entry->data.on_disk)) {
|
2023-10-29 13:41:16 +01:00
|
|
|
char *buf;
|
|
|
|
|
|
|
|
// Entries with no data do not have an allocated buffer yet
|
|
|
|
if (entry->data.len == 0) {
|
|
|
|
buf = malloc(new_len * sizeof(char));
|
|
|
|
} else {
|
|
|
|
buf = realloc(entry->data.value.ptr, new_len * sizeof(char));
|
|
|
|
}
|
2023-10-25 10:57:45 +02:00
|
|
|
|
|
|
|
if (buf == NULL) {
|
|
|
|
return lsm_error_failed_alloc;
|
|
|
|
}
|
|
|
|
|
2023-10-28 15:48:28 +02:00
|
|
|
memcpy(&buf[entry->data.len], data_s, lsm_str_len(data));
|
2023-10-25 10:57:45 +02:00
|
|
|
entry->data.value.ptr = buf;
|
|
|
|
}
|
|
|
|
// Data will end up on disk
|
|
|
|
else {
|
|
|
|
// Data is not yet on disk, so we create the file
|
|
|
|
if (!entry->data.on_disk) {
|
2023-10-28 15:48:28 +02:00
|
|
|
char path[store->data_path->len + entry->key->len + 2];
|
|
|
|
sprintf(path, "%s/%s", lsm_str_ptr(store->data_path),
|
|
|
|
lsm_str_ptr(entry->key));
|
|
|
|
|
2023-10-29 12:19:59 +01:00
|
|
|
FILE *f = fopen(path, "ab");
|
2023-10-28 15:48:28 +02:00
|
|
|
|
|
|
|
if (f == NULL) {
|
|
|
|
return lsm_error_failed_io;
|
|
|
|
}
|
|
|
|
|
2023-10-29 13:41:16 +01:00
|
|
|
// If there was data present in memory already, we sync this to disk.
|
|
|
|
// This check is required because it's possible that more than the
|
|
|
|
// treshold is written to an empty entry immediately, meaning there's no
|
|
|
|
// allocated memory buffer present.
|
|
|
|
if (entry->data.len > 0) {
|
|
|
|
size_t written = 0;
|
|
|
|
|
|
|
|
// Write original in-memory data to file
|
|
|
|
while (written < entry->data.len) {
|
|
|
|
written += fwrite(&entry->data.value.ptr[written], sizeof(char),
|
|
|
|
entry->data.len - written, f);
|
|
|
|
}
|
|
|
|
|
|
|
|
free(entry->data.value.ptr);
|
|
|
|
entry->data.value.ptr = NULL;
|
2023-10-29 12:33:07 +01:00
|
|
|
}
|
|
|
|
|
2023-10-29 12:19:59 +01:00
|
|
|
handle->f = f;
|
2023-10-28 15:48:28 +02:00
|
|
|
entry->data.on_disk = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t written = 0;
|
2023-10-25 10:57:45 +02:00
|
|
|
|
2023-10-28 15:48:28 +02:00
|
|
|
// TODO what happens when I/O fails?
|
|
|
|
while (written < data->len) {
|
|
|
|
written += fwrite(&data_s[written], sizeof(char), data->len - written,
|
2023-10-29 12:19:59 +01:00
|
|
|
handle->f);
|
2023-10-25 10:57:45 +02:00
|
|
|
}
|
|
|
|
}
|
2023-10-25 11:19:28 +02:00
|
|
|
|
2023-10-29 12:33:07 +01:00
|
|
|
entry->data.len = new_len;
|
|
|
|
|
2023-10-25 11:19:28 +02:00
|
|
|
return lsm_error_ok;
|
2023-10-25 10:57:45 +02:00
|
|
|
}
|
2023-10-29 14:41:40 +01:00
|
|
|
|
|
|
|
lsm_error lsm_entry_data_read(uint64_t *out, char *buf,
|
|
|
|
lsm_entry_handle *handle, uint64_t len) {
|
|
|
|
lsm_entry *entry = handle->wrapper->entry;
|
|
|
|
|
|
|
|
if (entry->data.len == 0) {
|
|
|
|
*out = 0;
|
|
|
|
|
|
|
|
return lsm_error_ok;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t read;
|
|
|
|
|
|
|
|
if (entry->data.on_disk) {
|
|
|
|
read = fread(buf, sizeof(char), len, handle->f);
|
|
|
|
|
|
|
|
if ((read == 0) && (ferror(handle->f) != 0)) {
|
|
|
|
return lsm_error_failed_io;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
read = (entry->data.len - handle->pos) < len
|
|
|
|
? (entry->data.len - handle->pos)
|
|
|
|
: len;
|
|
|
|
memcpy(buf, &entry->data.value.ptr[handle->pos], read * sizeof(char));
|
|
|
|
}
|
|
|
|
|
|
|
|
handle->pos += read;
|
|
|
|
*out = read;
|
|
|
|
|
|
|
|
return lsm_error_ok;
|
|
|
|
}
|