From 51e4a203e988b5ca03690fc8a046b2346e8d5d5b Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Sat, 11 Nov 2023 13:10:14 +0100 Subject: [PATCH] feat(lsm): move keys to db file; idx entries are now constant length --- lsm/src/store/lsm_store_disk_read.c | 77 ++++++++++++++-------------- lsm/src/store/lsm_store_disk_write.c | 14 ++--- 2 files changed, 43 insertions(+), 48 deletions(-) diff --git a/lsm/src/store/lsm_store_disk_read.c b/lsm/src/store/lsm_store_disk_read.c index 8ce315c..72e34bd 100644 --- a/lsm/src/store/lsm_store_disk_read.c +++ b/lsm/src/store/lsm_store_disk_read.c @@ -137,12 +137,41 @@ static lsm_error lsm_entry_read_attrs(uint64_t *sum, lsm_entry_handle *handle, return lsm_error_ok; } +static lsm_error lsm_fseek(FILE *f, uint64_t pos) { + if (fseek(f, pos, SEEK_SET) != 0) { + return lsm_error_failed_io; + } + + return lsm_error_ok; +} + +/** + * Insert a new entry by reading it from the db file + */ +lsm_error lsm_store_insert_from_db(lsm_store *store, uint64_t pos, + uint64_t idx_file_offset) { + LSM_RES(lsm_fseek(store->db.f, pos)); + + lsm_str *key; + LSM_RES(lsm_entry_read_str(&key, &store->db.size, store->db.f)); + + lsm_entry_handle *handle; + LSM_RES(lsm_store_insert(&handle, store, key)); + + LSM_RES(lsm_fread(&handle->wrapper->entry->data_len, &store->db.size, + store->db.f, sizeof(uint64_t), 1)); + LSM_RES(lsm_entry_read_attrs(&store->db.size, handle, store->db.f)); + + handle->wrapper->entry->idx_file_offset = idx_file_offset; + + handle->states = 0; + lsm_entry_close(handle); + + return lsm_error_ok; +} lsm_error lsm_store_load_db(lsm_store *store) { uint64_t db_dim[2]; - lsm_str *key; - lsm_entry_handle *handle; - bool valid_entry; rewind(store->idx.f); @@ -153,45 +182,15 @@ lsm_error lsm_store_load_db(lsm_store *store) { for (uint64_t i = 0; i < store->idx.block_count; i++) { uint64_t idx_file_offset = store->idx.size; - LSM_RES(lsm_fread(&valid_entry, &store->idx.size, store->idx.f, - sizeof(bool), 1)); + LSM_RES(lsm_fread(&db_dim, &store->idx.size, store->idx.f, sizeof(uint64_t), + 2)); - if (valid_entry) { - LSM_RES(lsm_entry_read_str(&key, &store->idx.size, store->idx.f)); - LSM_RES(lsm_fread(&db_dim, &store->idx.size, store->idx.f, - sizeof(uint64_t), 2)); - LSM_RES(lsm_store_insert(&handle, store, key)); - - // Read attributes from database file - if (fseek(store->db.f, db_dim[0], SEEK_SET) != 0) { - return lsm_error_failed_io; - } - - LSM_RES(lsm_fread(&handle->wrapper->entry->data_len, NULL, store->db.f, - sizeof(uint64_t), 1)); - LSM_RES(lsm_entry_read_attrs(NULL, handle, store->db.f)); - - handle->wrapper->entry->idx_file_offset = idx_file_offset; - - handle->states = 0; - lsm_entry_close(handle); - - store->db.size += db_dim[1]; + // We zero out the length of entries if they're no longer valid + if (db_dim[1] == 0) { + continue; } - // Simply skip the invalid entry - else { - uint64_t key_len; - LSM_RES(lsm_fread(&key_len, &store->idx.size, store->idx.f, - sizeof(uint64_t), 1)); - uint64_t remaining = key_len + 2 * sizeof(uint64_t); - - if (fseek(store->idx.f, remaining, SEEK_CUR) != 0) { - return lsm_error_failed_io; - } - - store->idx.size += remaining; - } + LSM_RES(lsm_store_insert_from_db(store, db_dim[0], idx_file_offset)); } return lsm_error_ok; diff --git a/lsm/src/store/lsm_store_disk_write.c b/lsm/src/store/lsm_store_disk_write.c index 3808bb8..acb0015 100644 --- a/lsm/src/store/lsm_store_disk_write.c +++ b/lsm/src/store/lsm_store_disk_write.c @@ -47,6 +47,7 @@ lsm_error lsm_write_db_entry(uint64_t *size, FILE *db_file, lsm_entry *entry, LSM_RES(lsm_fseek(db_file, pos)); + LSM_RES(lsm_write_str(size, db_file, entry->key)); LSM_RES(lsm_fwrite(size, db_file, sizeof(uint64_t), 1, &entry->data_len)); LSM_RES(lsm_fwrite(size, db_file, sizeof(uint8_t), 1, &entry->attrs.count)); @@ -59,16 +60,12 @@ lsm_error lsm_write_db_entry(uint64_t *size, FILE *db_file, lsm_entry *entry, return lsm_error_ok; } -lsm_error lsm_write_idx_entry(uint64_t *size, FILE *idx_file, lsm_entry *entry, - uint64_t offset, uint64_t len, uint64_t pos) { +lsm_error lsm_write_idx_entry(uint64_t *size, FILE *idx_file, uint64_t offset, + uint64_t len, uint64_t pos) { *size = 0; LSM_RES(lsm_fseek(idx_file, pos)); - bool valid_entry_marker = true; - LSM_RES(lsm_fwrite(size, idx_file, sizeof(bool), 1, &valid_entry_marker)); - - LSM_RES(lsm_write_str(size, idx_file, entry->key)); LSM_RES(lsm_fwrite(size, idx_file, sizeof(uint64_t), 1, &offset)); LSM_RES(lsm_fwrite(size, idx_file, sizeof(uint64_t), 1, &len)); @@ -99,9 +96,8 @@ lsm_error lsm_entry_disk_insert(lsm_entry_handle *handle) { uint64_t idx_entry_index = store->idx.size; uint64_t idx_entry_size; - res = - lsm_write_idx_entry(&idx_entry_size, store->idx.f, handle->wrapper->entry, - db_entry_index, db_entry_size, store->idx.size); + res = lsm_write_idx_entry(&idx_entry_size, store->idx.f, db_entry_index, + db_entry_size, store->idx.size); if (res == lsm_error_ok) { // Update the counter at the beginning of the file