refactor(lsm): switch to uint8_t attr types; refactor disk code

lsm
Jef Roosens 2023-11-08 13:42:46 +01:00
parent b5fc3a3612
commit 35c301955f
Signed by: Jef Roosens
GPG Key ID: B75D4F293C7052DB
6 changed files with 84 additions and 96 deletions

View File

@ -18,10 +18,10 @@ typedef struct lander_ctx {
uint64_t remaining_data; uint64_t remaining_data;
} lander_ctx; } lander_ctx;
typedef enum lander_attr_type : uint64_t { typedef enum lander_attr_type : uint8_t {
lander_attr_type_entry_type = 1 << 0, lander_attr_type_entry_type = 0,
lander_attr_type_content_type = 1 << 1, lander_attr_type_content_type = 1,
lander_attr_type_url = 1 << 2, lander_attr_type_url = 2,
} lander_attr_type; } lander_attr_type;
typedef enum lander_entry_type { typedef enum lander_entry_type {

View File

@ -21,7 +21,7 @@ typedef struct lsm_entry_handle lsm_entry_handle;
* @param entry entry to check * @param entry entry to check
* @param type type of attribute to check for * @param type type of attribute to check for
*/ */
bool lsm_entry_attr_present(lsm_entry_handle *handle, uint64_t type); bool lsm_entry_attr_present(lsm_entry_handle *handle, uint8_t type);
/** /**
* Retrieve the contents of an attribute from an entry, if present * Retrieve the contents of an attribute from an entry, if present
@ -31,7 +31,7 @@ bool lsm_entry_attr_present(lsm_entry_handle *handle, uint64_t type);
* @param type type of attribute to return * @param type type of attribute to return
*/ */
lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry_handle *handle, lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry_handle *handle,
uint64_t type); uint8_t type);
/** /**
* Convenience wrapper around `lsm_entry_attr_get` that can be used if we know * Convenience wrapper around `lsm_entry_attr_get` that can be used if we know
@ -42,7 +42,7 @@ lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry_handle *handle,
* @param type type of attribute to return * @param type type of attribute to return
*/ */
lsm_error lsm_entry_attr_get_num(uint64_t *out, lsm_entry_handle *handle, lsm_error lsm_entry_attr_get_num(uint64_t *out, lsm_entry_handle *handle,
uint64_t type); uint8_t type);
/** /**
* Add a new attribute to the entry. * Add a new attribute to the entry.
@ -51,7 +51,7 @@ lsm_error lsm_entry_attr_get_num(uint64_t *out, lsm_entry_handle *handle,
* @param type type of attribute to add * @param type type of attribute to add
* @param data data of attribute; ownership of pointer is taken over * @param data data of attribute; ownership of pointer is taken over
*/ */
lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, uint64_t type, lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, uint8_t type,
lsm_str *data); lsm_str *data);
/** /**
@ -62,7 +62,7 @@ lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, uint64_t type,
* @param type type of attribute to add * @param type type of attribute to add
* @param data data of attribute * @param data data of attribute
*/ */
lsm_error lsm_entry_attr_insert_num(lsm_entry_handle *handle, uint64_t type, lsm_error lsm_entry_attr_insert_num(lsm_entry_handle *handle, uint8_t type,
uint64_t data); uint64_t data);
/** /**
@ -74,7 +74,7 @@ lsm_error lsm_entry_attr_insert_num(lsm_entry_handle *handle, uint64_t type,
* @param type type of attribute to remove * @param type type of attribute to remove
*/ */
lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry_handle *handle, lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry_handle *handle,
uint64_t type); uint8_t type);
/** /**
* A store consisting of LSM entries. * A store consisting of LSM entries.

View File

@ -12,7 +12,7 @@
#define LSM_IDX_FILE_NAME "lsm.idx" #define LSM_IDX_FILE_NAME "lsm.idx"
typedef struct lsm_attr { typedef struct lsm_attr {
uint64_t type; uint8_t type;
lsm_str *str; lsm_str *str;
} lsm_attr; } lsm_attr;
@ -26,8 +26,8 @@ typedef struct lsm_attr {
typedef struct lsm_entry { typedef struct lsm_entry {
lsm_str *key; lsm_str *key;
struct { struct {
uint64_t count; uint64_t bitmap[4];
uint64_t bitmap; uint8_t count;
lsm_attr *items; lsm_attr *items;
} attrs; } attrs;
struct { struct {

View File

@ -1,6 +1,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include "lsm.h"
#include "lsm/store_internal.h" #include "lsm/store_internal.h"
lsm_error lsm_store_load(lsm_store **ptr, lsm_str *data_path) { lsm_error lsm_store_load(lsm_store **ptr, lsm_str *data_path) {
@ -79,48 +80,59 @@ lsm_error lsm_store_load(lsm_store **ptr, lsm_str *data_path) {
return lsm_error_ok; return lsm_error_ok;
} }
static lsm_error lsm_entry_read_attrs(lsm_entry_handle *handle, FILE *db_file) { static lsm_error lsm_fread(void *out, uint64_t *sum, FILE *f, uint64_t size,
uint64_t attr_count; uint64_t count) {
size_t res = fread(&attr_count, sizeof(uint64_t), 1, db_file); size_t res = fread(out, size, count, f);
if (res == 0) { if (res < count) {
return lsm_error_failed_io; return lsm_error_failed_io;
} }
// attr_type, val_len *sum += size * count;
uint64_t nums[2];
lsm_str *val;
for (uint64_t i = 0; i < attr_count; i++) { return lsm_error_ok;
res = fread(nums, sizeof(uint64_t), 2, db_file); }
if (res < 2) { static lsm_error lsm_entry_read_str(lsm_str **out, uint64_t *sum, FILE *f) {
return lsm_error_failed_io; uint64_t len;
} LSM_RES(lsm_fread(&len, sum, f, sizeof(uint64_t), 1));
char *val_s = malloc(nums[1] + 1); char *buf = malloc(len + 1);
val_s[nums[1]] = '\0';
if (val_s == NULL) { if (buf == NULL) {
return lsm_error_failed_alloc; return lsm_error_failed_alloc;
} }
uint64_t read = 0; uint64_t read = 0;
while (read < nums[1]) { while (read < len) {
read += fread(&val_s[read], 1, nums[1] - read, db_file); read += fread(&buf[read], 1, len - read, f);
} }
LSM_RES(lsm_str_init(&val, val_s)); *sum += len;
;
lsm_entry_attr_insert(handle, nums[0], val); return lsm_str_init(out, buf);
}
static lsm_error lsm_entry_read_attrs(uint64_t *sum, lsm_entry_handle *handle,
FILE *db_file) {
uint8_t attr_count;
LSM_RES(lsm_fread(&attr_count, sum, db_file, sizeof(uint8_t), 1));
// attr_type, val_len
uint8_t attr_type;
lsm_str *val;
for (uint64_t i = 0; i < attr_count; i++) {
LSM_RES(lsm_fread(&attr_type, sum, db_file, sizeof(uint8_t), 1));
LSM_RES(lsm_entry_read_str(&val, sum, db_file));
lsm_entry_attr_insert(handle, attr_type, val);
} }
return lsm_error_ok; return lsm_error_ok;
} }
lsm_error lsm_store_load_db(lsm_store *store) { lsm_error lsm_store_load_db(lsm_store *store) {
uint64_t key_len;
uint64_t db_dim[2]; uint64_t db_dim[2];
lsm_str *key; lsm_str *key;
lsm_entry_handle *handle; lsm_entry_handle *handle;
@ -128,43 +140,13 @@ lsm_error lsm_store_load_db(lsm_store *store) {
rewind(store->idx_file); rewind(store->idx_file);
// idx file starts with block count // idx file starts with block count
size_t res = LSM_RES(lsm_fread(&store->idx_file_block_count, &store->idx_file_size,
fread(&store->idx_file_block_count, sizeof(uint64_t), 1, store->idx_file); store->idx_file, sizeof(uint64_t), 1));
if (res == 0) {
return lsm_error_failed_io;
}
store->idx_file_size += sizeof(uint64_t);
for (uint64_t i = 0; i < store->idx_file_block_count; i++) { for (uint64_t i = 0; i < store->idx_file_block_count; i++) {
// Read in idx metadata LSM_RES(lsm_entry_read_str(&key, &store->idx_file_size, store->idx_file));
res = fread(&key_len, sizeof(uint64_t), 1, store->idx_file); LSM_RES(lsm_fread(&db_dim, &store->idx_file_size, store->idx_file,
sizeof(uint64_t), 2));
if (res == 0) {
return lsm_error_failed_io;
}
char *key_s = malloc(key_len + 1);
key_s[key_len] = '\0';
if (key_s == NULL) {
return lsm_error_failed_alloc;
}
res = fread(key_s, 1, key_len, store->idx_file);
if (res < key_len) {
return lsm_error_failed_io;
}
res = fread(db_dim, sizeof(uint64_t), 2, store->idx_file);
if (res < 2) {
return lsm_error_failed_io;
}
LSM_RES(lsm_str_init(&key, key_s));
LSM_RES(lsm_store_insert(&handle, store, key)); LSM_RES(lsm_store_insert(&handle, store, key));
// Read attributes from database file // Read attributes from database file
@ -172,10 +154,10 @@ lsm_error lsm_store_load_db(lsm_store *store) {
return lsm_error_failed_io; return lsm_error_failed_io;
} }
LSM_RES(lsm_entry_read_attrs(handle, store->db_file)); LSM_RES(
lsm_entry_read_attrs(&store->idx_file_size, handle, store->db_file));
lsm_entry_close(handle); lsm_entry_close(handle);
store->idx_file_size += 3 * sizeof(uint64_t) + key_len;
store->db_file_size += db_dim[1]; store->db_file_size += db_dim[1];
} }

View File

@ -1,9 +1,8 @@
#include "lsm/store_internal.h" #include "lsm/store_internal.h"
static lsm_error lsm_entry_write_uint64_t(FILE *f, uint64_t num) { static lsm_error lsm_entry_write_single(FILE *f, uint64_t size, void *val) {
size_t res = fwrite(&num, sizeof(uint64_t), 1, f); size_t res = fwrite(val, size, 1, f);
// Such a small write should succeed in one go
if (res == 0) { if (res == 0) {
return lsm_error_failed_io; return lsm_error_failed_io;
} }
@ -11,6 +10,10 @@ static lsm_error lsm_entry_write_uint64_t(FILE *f, uint64_t num) {
return lsm_error_ok; return lsm_error_ok;
} }
static lsm_error lsm_entry_write_uint64_t(FILE *f, uint64_t num) {
return lsm_entry_write_single(f, sizeof(uint64_t), &num);
}
static lsm_error lsm_entry_write_str(FILE *f, lsm_str *s) { static lsm_error lsm_entry_write_str(FILE *f, lsm_str *s) {
uint64_t to_write = lsm_str_len(s); uint64_t to_write = lsm_str_len(s);
uint64_t written = 0; uint64_t written = 0;
@ -35,17 +38,19 @@ lsm_error lsm_entry_write_db(uint64_t *size, FILE *db_file, lsm_entry *entry,
LSM_RES(lsm_seek(db_file, pos)); LSM_RES(lsm_seek(db_file, pos));
// First we write how many attributes follow // First we write how many attributes follow
LSM_RES(lsm_entry_write_uint64_t(db_file, entry->attrs.count)); LSM_RES(
*size = sizeof(uint64_t); lsm_entry_write_single(db_file, sizeof(uint8_t), &entry->attrs.count));
*size = sizeof(uint8_t);
for (uint64_t i = 0; i < entry->attrs.count; i++) { for (uint8_t i = 0; i < entry->attrs.count; i++) {
// Write attribute type, length & value // Write attribute type, length & value
LSM_RES(lsm_entry_write_uint64_t(db_file, entry->attrs.items[i].type)); LSM_RES(lsm_entry_write_single(db_file, sizeof(uint8_t),
&entry->attrs.items[i].type));
LSM_RES(lsm_entry_write_uint64_t(db_file, LSM_RES(lsm_entry_write_uint64_t(db_file,
lsm_str_len(entry->attrs.items[i].str))); lsm_str_len(entry->attrs.items[i].str)));
LSM_RES(lsm_entry_write_str(db_file, entry->attrs.items[i].str)); LSM_RES(lsm_entry_write_str(db_file, entry->attrs.items[i].str));
*size += 2 * sizeof(uint64_t) + *size += sizeof(uint8_t) + sizeof(uint64_t) +
lsm_str_len(entry->attrs.items[i].str) * sizeof(char); lsm_str_len(entry->attrs.items[i].str) * sizeof(char);
} }

View File

@ -56,12 +56,13 @@ void lsm_entry_close(lsm_entry_handle *handle) {
free(handle); free(handle);
} }
bool lsm_entry_attr_present(lsm_entry_handle *handle, uint64_t type) { bool lsm_entry_attr_present(lsm_entry_handle *handle, uint8_t type) {
return (handle->wrapper->entry->attrs.bitmap & type) != 0; return (handle->wrapper->entry->attrs.bitmap[type / 64] &
(((uint64_t)1) << (type % 64))) != 0;
} }
lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry_handle *handle, lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry_handle *handle,
uint64_t type) { uint8_t type) {
if (!lsm_entry_attr_present(handle, type)) { if (!lsm_entry_attr_present(handle, type)) {
return lsm_error_not_found; return lsm_error_not_found;
} }
@ -79,7 +80,7 @@ lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry_handle *handle,
} }
lsm_error lsm_entry_attr_get_num(uint64_t *out, lsm_entry_handle *handle, lsm_error lsm_entry_attr_get_num(uint64_t *out, lsm_entry_handle *handle,
uint64_t type) { uint8_t type) {
lsm_str *s; lsm_str *s;
LSM_RES(lsm_entry_attr_get(&s, handle, type)); LSM_RES(lsm_entry_attr_get(&s, handle, type));
@ -96,7 +97,7 @@ lsm_error lsm_entry_attr_get_num(uint64_t *out, lsm_entry_handle *handle,
} }
lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry_handle *handle, lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry_handle *handle,
uint64_t type) { uint8_t type) {
if (!lsm_entry_attr_present(handle, type)) { if (!lsm_entry_attr_present(handle, type)) {
return lsm_error_not_found; return lsm_error_not_found;
} }
@ -109,7 +110,7 @@ lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry_handle *handle,
free(entry->attrs.items); free(entry->attrs.items);
entry->attrs.items = NULL; entry->attrs.items = NULL;
entry->attrs.count = 0; entry->attrs.count = 0;
entry->attrs.bitmap = 0; entry->attrs.bitmap[type / 64] &= ~(((uint64_t)1) << (type % 64));
return lsm_error_ok; return lsm_error_ok;
} }
@ -138,12 +139,12 @@ lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry_handle *handle,
entry->attrs.items = new_attrs; entry->attrs.items = new_attrs;
entry->attrs.count--; entry->attrs.count--;
entry->attrs.bitmap &= ~type; entry->attrs.bitmap[type / 64] &= ~(((uint64_t)1) << (type % 64));
return lsm_error_ok; return lsm_error_ok;
} }
lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, uint64_t type, lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, uint8_t type,
lsm_str *data) { lsm_str *data) {
if (lsm_entry_attr_present(handle, type)) { if (lsm_entry_attr_present(handle, type)) {
return lsm_error_already_present; return lsm_error_already_present;
@ -163,12 +164,12 @@ lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, uint64_t type,
entry->attrs.items = new_attrs; entry->attrs.items = new_attrs;
entry->attrs.count++; entry->attrs.count++;
entry->attrs.bitmap |= type; entry->attrs.bitmap[type / 64] |= ((uint64_t)1) << (type % 64);
return lsm_error_ok; return lsm_error_ok;
} }
lsm_error lsm_entry_attr_insert_num(lsm_entry_handle *handle, uint64_t type, lsm_error lsm_entry_attr_insert_num(lsm_entry_handle *handle, uint8_t type,
uint64_t data) { uint64_t data) {
lsm_str *s; lsm_str *s;
LSM_RES( LSM_RES(