From 35c301955f50db7a3d76adb4a425a7bc86803289 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Wed, 8 Nov 2023 13:42:46 +0100 Subject: [PATCH] refactor(lsm): switch to uint8_t attr types; refactor disk code --- include/lander.h | 8 +- lsm/include/lsm/store.h | 12 +-- lsm/src/_include/lsm/store_internal.h | 6 +- lsm/src/store/lsm_store_disk_read.c | 112 +++++++++++--------------- lsm/src/store/lsm_store_disk_write.c | 21 +++-- lsm/src/store/lsm_store_entry.c | 21 ++--- 6 files changed, 84 insertions(+), 96 deletions(-) diff --git a/include/lander.h b/include/lander.h index 0697cce..5abea75 100644 --- a/include/lander.h +++ b/include/lander.h @@ -18,10 +18,10 @@ typedef struct lander_ctx { uint64_t remaining_data; } lander_ctx; -typedef enum lander_attr_type : uint64_t { - lander_attr_type_entry_type = 1 << 0, - lander_attr_type_content_type = 1 << 1, - lander_attr_type_url = 1 << 2, +typedef enum lander_attr_type : uint8_t { + lander_attr_type_entry_type = 0, + lander_attr_type_content_type = 1, + lander_attr_type_url = 2, } lander_attr_type; typedef enum lander_entry_type { diff --git a/lsm/include/lsm/store.h b/lsm/include/lsm/store.h index 37fdb9d..cf0c9f7 100644 --- a/lsm/include/lsm/store.h +++ b/lsm/include/lsm/store.h @@ -21,7 +21,7 @@ typedef struct lsm_entry_handle lsm_entry_handle; * @param entry entry to check * @param type type of attribute to check for */ -bool lsm_entry_attr_present(lsm_entry_handle *handle, uint64_t type); +bool lsm_entry_attr_present(lsm_entry_handle *handle, uint8_t type); /** * Retrieve the contents of an attribute from an entry, if present @@ -31,7 +31,7 @@ bool lsm_entry_attr_present(lsm_entry_handle *handle, uint64_t type); * @param type type of attribute to return */ lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry_handle *handle, - uint64_t type); + uint8_t type); /** * Convenience wrapper around `lsm_entry_attr_get` that can be used if we know @@ -42,7 +42,7 @@ lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry_handle *handle, * @param type type of attribute to return */ lsm_error lsm_entry_attr_get_num(uint64_t *out, lsm_entry_handle *handle, - uint64_t type); + uint8_t type); /** * Add a new attribute to the entry. @@ -51,7 +51,7 @@ lsm_error lsm_entry_attr_get_num(uint64_t *out, lsm_entry_handle *handle, * @param type type of attribute to add * @param data data of attribute; ownership of pointer is taken over */ -lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, uint64_t type, +lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, uint8_t type, lsm_str *data); /** @@ -62,7 +62,7 @@ lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, uint64_t type, * @param type type of attribute to add * @param data data of attribute */ -lsm_error lsm_entry_attr_insert_num(lsm_entry_handle *handle, uint64_t type, +lsm_error lsm_entry_attr_insert_num(lsm_entry_handle *handle, uint8_t type, uint64_t data); /** @@ -74,7 +74,7 @@ lsm_error lsm_entry_attr_insert_num(lsm_entry_handle *handle, uint64_t type, * @param type type of attribute to remove */ lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry_handle *handle, - uint64_t type); + uint8_t type); /** * A store consisting of LSM entries. diff --git a/lsm/src/_include/lsm/store_internal.h b/lsm/src/_include/lsm/store_internal.h index f426d34..b8c24dc 100644 --- a/lsm/src/_include/lsm/store_internal.h +++ b/lsm/src/_include/lsm/store_internal.h @@ -12,7 +12,7 @@ #define LSM_IDX_FILE_NAME "lsm.idx" typedef struct lsm_attr { - uint64_t type; + uint8_t type; lsm_str *str; } lsm_attr; @@ -26,8 +26,8 @@ typedef struct lsm_attr { typedef struct lsm_entry { lsm_str *key; struct { - uint64_t count; - uint64_t bitmap; + uint64_t bitmap[4]; + uint8_t count; lsm_attr *items; } attrs; struct { diff --git a/lsm/src/store/lsm_store_disk_read.c b/lsm/src/store/lsm_store_disk_read.c index 4c76b76..e644846 100644 --- a/lsm/src/store/lsm_store_disk_read.c +++ b/lsm/src/store/lsm_store_disk_read.c @@ -1,6 +1,7 @@ #include #include +#include "lsm.h" #include "lsm/store_internal.h" lsm_error lsm_store_load(lsm_store **ptr, lsm_str *data_path) { @@ -79,48 +80,59 @@ lsm_error lsm_store_load(lsm_store **ptr, lsm_str *data_path) { return lsm_error_ok; } -static lsm_error lsm_entry_read_attrs(lsm_entry_handle *handle, FILE *db_file) { - uint64_t attr_count; - size_t res = fread(&attr_count, sizeof(uint64_t), 1, db_file); +static lsm_error lsm_fread(void *out, uint64_t *sum, FILE *f, uint64_t size, + uint64_t count) { + size_t res = fread(out, size, count, f); - if (res == 0) { + if (res < count) { return lsm_error_failed_io; } + *sum += size * count; + + return lsm_error_ok; +} + +static lsm_error lsm_entry_read_str(lsm_str **out, uint64_t *sum, FILE *f) { + uint64_t len; + LSM_RES(lsm_fread(&len, sum, f, sizeof(uint64_t), 1)); + + char *buf = malloc(len + 1); + + if (buf == NULL) { + return lsm_error_failed_alloc; + } + + uint64_t read = 0; + + while (read < len) { + read += fread(&buf[read], 1, len - read, f); + } + + *sum += len; + + return lsm_str_init(out, buf); +} + +static lsm_error lsm_entry_read_attrs(uint64_t *sum, lsm_entry_handle *handle, + FILE *db_file) { + uint8_t attr_count; + LSM_RES(lsm_fread(&attr_count, sum, db_file, sizeof(uint8_t), 1)); + // attr_type, val_len - uint64_t nums[2]; + uint8_t attr_type; lsm_str *val; for (uint64_t i = 0; i < attr_count; i++) { - res = fread(nums, sizeof(uint64_t), 2, db_file); - - if (res < 2) { - return lsm_error_failed_io; - } - - char *val_s = malloc(nums[1] + 1); - val_s[nums[1]] = '\0'; - - if (val_s == NULL) { - return lsm_error_failed_alloc; - } - - uint64_t read = 0; - - while (read < nums[1]) { - read += fread(&val_s[read], 1, nums[1] - read, db_file); - } - - LSM_RES(lsm_str_init(&val, val_s)); - ; - lsm_entry_attr_insert(handle, nums[0], val); + LSM_RES(lsm_fread(&attr_type, sum, db_file, sizeof(uint8_t), 1)); + LSM_RES(lsm_entry_read_str(&val, sum, db_file)); + lsm_entry_attr_insert(handle, attr_type, val); } return lsm_error_ok; } lsm_error lsm_store_load_db(lsm_store *store) { - uint64_t key_len; uint64_t db_dim[2]; lsm_str *key; lsm_entry_handle *handle; @@ -128,43 +140,13 @@ lsm_error lsm_store_load_db(lsm_store *store) { rewind(store->idx_file); // idx file starts with block count - size_t res = - fread(&store->idx_file_block_count, sizeof(uint64_t), 1, store->idx_file); - - if (res == 0) { - return lsm_error_failed_io; - } - - store->idx_file_size += sizeof(uint64_t); + LSM_RES(lsm_fread(&store->idx_file_block_count, &store->idx_file_size, + store->idx_file, sizeof(uint64_t), 1)); for (uint64_t i = 0; i < store->idx_file_block_count; i++) { - // Read in idx metadata - res = fread(&key_len, sizeof(uint64_t), 1, store->idx_file); - - if (res == 0) { - return lsm_error_failed_io; - } - - char *key_s = malloc(key_len + 1); - key_s[key_len] = '\0'; - - if (key_s == NULL) { - return lsm_error_failed_alloc; - } - - res = fread(key_s, 1, key_len, store->idx_file); - - if (res < key_len) { - return lsm_error_failed_io; - } - - res = fread(db_dim, sizeof(uint64_t), 2, store->idx_file); - - if (res < 2) { - return lsm_error_failed_io; - } - - LSM_RES(lsm_str_init(&key, key_s)); + LSM_RES(lsm_entry_read_str(&key, &store->idx_file_size, store->idx_file)); + LSM_RES(lsm_fread(&db_dim, &store->idx_file_size, store->idx_file, + sizeof(uint64_t), 2)); LSM_RES(lsm_store_insert(&handle, store, key)); // Read attributes from database file @@ -172,10 +154,10 @@ lsm_error lsm_store_load_db(lsm_store *store) { return lsm_error_failed_io; } - LSM_RES(lsm_entry_read_attrs(handle, store->db_file)); + LSM_RES( + lsm_entry_read_attrs(&store->idx_file_size, handle, store->db_file)); lsm_entry_close(handle); - store->idx_file_size += 3 * sizeof(uint64_t) + key_len; store->db_file_size += db_dim[1]; } diff --git a/lsm/src/store/lsm_store_disk_write.c b/lsm/src/store/lsm_store_disk_write.c index 07034a1..4bdd297 100644 --- a/lsm/src/store/lsm_store_disk_write.c +++ b/lsm/src/store/lsm_store_disk_write.c @@ -1,9 +1,8 @@ #include "lsm/store_internal.h" -static lsm_error lsm_entry_write_uint64_t(FILE *f, uint64_t num) { - size_t res = fwrite(&num, sizeof(uint64_t), 1, f); +static lsm_error lsm_entry_write_single(FILE *f, uint64_t size, void *val) { + size_t res = fwrite(val, size, 1, f); - // Such a small write should succeed in one go if (res == 0) { return lsm_error_failed_io; } @@ -11,6 +10,10 @@ static lsm_error lsm_entry_write_uint64_t(FILE *f, uint64_t num) { return lsm_error_ok; } +static lsm_error lsm_entry_write_uint64_t(FILE *f, uint64_t num) { + return lsm_entry_write_single(f, sizeof(uint64_t), &num); +} + static lsm_error lsm_entry_write_str(FILE *f, lsm_str *s) { uint64_t to_write = lsm_str_len(s); uint64_t written = 0; @@ -35,17 +38,19 @@ lsm_error lsm_entry_write_db(uint64_t *size, FILE *db_file, lsm_entry *entry, LSM_RES(lsm_seek(db_file, pos)); // First we write how many attributes follow - LSM_RES(lsm_entry_write_uint64_t(db_file, entry->attrs.count)); - *size = sizeof(uint64_t); + LSM_RES( + lsm_entry_write_single(db_file, sizeof(uint8_t), &entry->attrs.count)); + *size = sizeof(uint8_t); - for (uint64_t i = 0; i < entry->attrs.count; i++) { + for (uint8_t i = 0; i < entry->attrs.count; i++) { // Write attribute type, length & value - LSM_RES(lsm_entry_write_uint64_t(db_file, entry->attrs.items[i].type)); + LSM_RES(lsm_entry_write_single(db_file, sizeof(uint8_t), + &entry->attrs.items[i].type)); LSM_RES(lsm_entry_write_uint64_t(db_file, lsm_str_len(entry->attrs.items[i].str))); LSM_RES(lsm_entry_write_str(db_file, entry->attrs.items[i].str)); - *size += 2 * sizeof(uint64_t) + + *size += sizeof(uint8_t) + sizeof(uint64_t) + lsm_str_len(entry->attrs.items[i].str) * sizeof(char); } diff --git a/lsm/src/store/lsm_store_entry.c b/lsm/src/store/lsm_store_entry.c index ea150f0..51dd998 100644 --- a/lsm/src/store/lsm_store_entry.c +++ b/lsm/src/store/lsm_store_entry.c @@ -56,12 +56,13 @@ void lsm_entry_close(lsm_entry_handle *handle) { free(handle); } -bool lsm_entry_attr_present(lsm_entry_handle *handle, uint64_t type) { - return (handle->wrapper->entry->attrs.bitmap & type) != 0; +bool lsm_entry_attr_present(lsm_entry_handle *handle, uint8_t type) { + return (handle->wrapper->entry->attrs.bitmap[type / 64] & + (((uint64_t)1) << (type % 64))) != 0; } lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry_handle *handle, - uint64_t type) { + uint8_t type) { if (!lsm_entry_attr_present(handle, type)) { return lsm_error_not_found; } @@ -79,7 +80,7 @@ lsm_error lsm_entry_attr_get(lsm_str **out, lsm_entry_handle *handle, } lsm_error lsm_entry_attr_get_num(uint64_t *out, lsm_entry_handle *handle, - uint64_t type) { + uint8_t type) { lsm_str *s; LSM_RES(lsm_entry_attr_get(&s, handle, type)); @@ -96,7 +97,7 @@ lsm_error lsm_entry_attr_get_num(uint64_t *out, lsm_entry_handle *handle, } lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry_handle *handle, - uint64_t type) { + uint8_t type) { if (!lsm_entry_attr_present(handle, type)) { return lsm_error_not_found; } @@ -109,7 +110,7 @@ lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry_handle *handle, free(entry->attrs.items); entry->attrs.items = NULL; entry->attrs.count = 0; - entry->attrs.bitmap = 0; + entry->attrs.bitmap[type / 64] &= ~(((uint64_t)1) << (type % 64)); return lsm_error_ok; } @@ -138,12 +139,12 @@ lsm_error lsm_entry_attr_remove(lsm_str **out, lsm_entry_handle *handle, entry->attrs.items = new_attrs; entry->attrs.count--; - entry->attrs.bitmap &= ~type; + entry->attrs.bitmap[type / 64] &= ~(((uint64_t)1) << (type % 64)); return lsm_error_ok; } -lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, uint64_t type, +lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, uint8_t type, lsm_str *data) { if (lsm_entry_attr_present(handle, type)) { return lsm_error_already_present; @@ -163,12 +164,12 @@ lsm_error lsm_entry_attr_insert(lsm_entry_handle *handle, uint64_t type, entry->attrs.items = new_attrs; entry->attrs.count++; - entry->attrs.bitmap |= type; + entry->attrs.bitmap[type / 64] |= ((uint64_t)1) << (type % 64); return lsm_error_ok; } -lsm_error lsm_entry_attr_insert_num(lsm_entry_handle *handle, uint64_t type, +lsm_error lsm_entry_attr_insert_num(lsm_entry_handle *handle, uint8_t type, uint64_t data) { lsm_str *s; LSM_RES(