From 11cd5377593b079d3e51af76b5b7ec85dfc03014 Mon Sep 17 00:00:00 2001 From: Chewing_Bever Date: Wed, 24 May 2023 09:03:22 +0200 Subject: [PATCH] feat: switch to C-based compilation, start of simple event loop --- Makefile | 143 ++++++--- include/picohttpparser.h | 87 +++++ src/event_loop.c | 339 ++++++++++++++++++++ src/picohttpparser.c | 665 +++++++++++++++++++++++++++++++++++++++ trie/include/trie.h | 4 +- trie/src/trie_node.c | 41 +-- trie/src/trie_node.h | 53 ++++ 7 files changed, 1238 insertions(+), 94 deletions(-) create mode 100644 include/picohttpparser.h create mode 100644 src/event_loop.c create mode 100644 src/picohttpparser.c create mode 100644 trie/src/trie_node.h diff --git a/Makefile b/Makefile index 0d9d919..3711c13 100644 --- a/Makefile +++ b/Makefile @@ -1,69 +1,108 @@ -# =====CONFIG===== -BUILD_DIR := ./build -SRC_DIRS := ./src ./trie/src -INCLUDE_DIRS := ./trie/include -TEST_DIR := test -CORES != nproc +# https://spin.atomicobject.com/2016/08/26/makefile-c-projects/ was a great +# base for this Makefile -SRCS := $(shell find $(SRC_DIRS) $(INCLUDE_DIRS) \( -iname '*.cpp' -or -iname '*.c' -or -iname '*.h' \)) +BIN_FILENAME ?= lander + +BUILD_DIR ?= build +SRC_DIR ?= src +TEST_DIR ?= test +INC_DIRS ?= include + +BIN := $(BUILD_DIR)/$(BIN_FILENAME) + +SRCS != find '$(SRC_DIR)' -iname '*.c' +SRCS_H != find $(INC_DIRS) -iname '*.h' +SRCS_H_INTERNAL != find $(SRC_DIR) -iname '*.h' +SRCS_TEST != find '$(TEST_DIR)' -iname '*.c' + +OBJS := $(SRCS:%=$(BUILD_DIR)/%.o) +OBJS_TEST := $(SRCS_TEST:%=$(BUILD_DIR)/%.o) +DEPS := $(SRCS:%=$(BUILD_DIR)/%.d) $(SRCS_TEST:%=$(BUILD_DIR)/%.d) + +BINS_TEST := $(OBJS_TEST:%.c.o=%) +TARGETS_TEST := $(BINS_TEST:%=test-%) +TARGETS_MEM_TEST := $(BINS_TEST:%=test-mem-%) + +INC_FLAGS := $(addprefix -I,$(INC_DIRS)) + +# -MMD: generate a .d file for every source file. This file can be imported by +# make and makes make aware that a header file has been changed, ensuring an +# object file is also recompiled if only a header is changed. +# -MP: generate a dummy target for every header file (according to the docs it +# prevents some errors when removing header files) +CFLAGS ?= -MMD -MP -g +INTERNALCFLAGS := $(INC_FLAGS) $(CFLAGS) -Wall -Wextra + +.PHONY: all +all: bin -# =====RECIPES===== -all: build +# =====COMPILATION===== +# Utility used by the CI to lint +.PHONY: objs +objs: $(OBJS) -.PHONY: cmake -cmake: $(BUILD_DIR)/Debug/Makefile -$(BUILD_DIR)/Debug/Makefile: CMakeLists.txt - @ cmake -B'$(BUILD_DIR)/Debug' -DCMAKE_BUILD_TYPE=Debug -DCMAKE_EXPORT_COMPILE_COMMANDS=1 . - @ ln -sf '$(BUILD_DIR)/Debug/compile_commands.json' compile_commands.json +.PHONY: bin +bin: $(BIN) +$(BIN): $(OBJS) + $(CC) $(INTERNALCFLAGS) -o $@ $^ -.PHONY: cmake-test -cmake-test: $(BUILD_DIR)/Test/Makefile -$(BUILD_DIR)/Test/Makefile: CMakeLists.txt - @ cmake -B'$(BUILD_DIR)/Test' -DCMAKE_BUILD_TYPE=Test . +$(BUILD_DIR)/$(SRC_DIR)/%.c.o: $(SRC_DIR)/%.c + mkdir -p $(dir $@) + $(CC) $(INTERNALCFLAGS) -c $< -o $@ -.PHONY: build -build: cmake - @ make -C '$(BUILD_DIR)/Debug' + +# =====TESTING===== +.PHONY: test +test: $(TARGETS_TEST) + +.PHONY: test-mem +test-mem: $(TARGETS_MEM_TEST) + +.PHONY: $(TARGETS_TEST) +$(TARGETS_TEST): test-%: % + ./$^ + +.PHONY: $(TARGETS_MEM_TEST) +$(TARGETS_MEM_TEST): test-mem-%: % + valgrind --tool=memcheck --error-exitcode=1 --track-origins=yes --leak-check=full ./$^ .PHONY: build-test -build-test: cmake-test - @ make -C '$(BUILD_DIR)/Test' +build-test: $(BINS_TEST) -.PHONY: cmake-release -cmake-release: $(BUILD_DIR)/Release/Makefile -$(BUILD_DIR)/Release/Makefile: CMakeLists.txt - @ cmake -B'$(BUILD_DIR)/Release' -DCMAKE_BUILD_TYPE=Release . +$(BINS_TEST): %: %.c.o $(BIN) + $(CC) \ + $^ -o $@ -.PHONY: prod -prod: cmake-release - @ make -C '$(BUILD_DIR)/Release' +# Along with the include directory, each test includes $(TEST_DIR) (which +# contains the acutest.h header file), and the src directory of the module it's +# testing. This allows tests to access internal methods, which aren't publicly +# exposed. +$(BUILD_DIR)/$(TEST_DIR)/%.c.o: $(TEST_DIR)/%.c + mkdir -p $(dir $@) + $(CC) $(INTERNALCFLAGS) -I$(TEST_DIR) \ + -I$(dir $(@:$(BUILD_DIR)/$(TEST_DIR)/%=$(SRC_DIR)/%)) \ + -c $< -o $@ -.PHONY: run -run: build - @ LANDER_DATA_DIR=data LANDER_BASE_URL=http://localhost:18080/ LANDER_API_KEY=test ./build/Debug/lander +# =====MAINTENANCE===== +.PHONY: lint +lint: + clang-format -n --Werror $(SRCS) $(SRCS_H) $(SRCS_H_INTERNAL) -.PHONY: valgrind -valgrind: build - @ LANDER_DATA_DIR=data LANDER_BASE_URL=http://localhost:18080/ LANDER_API_KEY=test \ - valgrind --tool=memcheck --error-exitcode=1 --track-origins=yes --leak-check=full ./build/Debug/lander - -.PHONY: gdb -gdb: build - @ LANDER_DATA_DIR=data LANDER_BASE_URL=http://localhost:18080/ LANDER_API_KEY=test gdb --args ./build/Debug/lander - -.PHONY: test -test: build-test - @ $(MAKE) -C '$(BUILD_DIR)/Test' test ARGS=-j$(CORES) CTEST_OUTPUT_ON_FAILURE=1 +.PHONY: fmt +fmt: + clang-format -i $(SRCS) $(SRCS_H) $(SRCS_H_INTERNAL) .PHONY: clean clean: - @ rm -rf '$(BUILD_DIR)' compile_commands.json + rm -rf $(BUILD_DIR) -.PHONY: lint -lint: - @ clang-format --Werror -n $(SRCS) -.PHONY: format -format: - @ clang-format -i $(SRCS) +.PHONY: bear +bear: clean + bear -- make + bear --append -- make build-test + + +# Make make aware of the .d files +-include $(DEPS) diff --git a/include/picohttpparser.h b/include/picohttpparser.h new file mode 100644 index 0000000..07537cf --- /dev/null +++ b/include/picohttpparser.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase, + * Shigeo Mitsunari + * + * The software is licensed under either the MIT License (below) or the Perl + * license. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef picohttpparser_h +#define picohttpparser_h + +#include + +#ifdef _MSC_VER +#define ssize_t intptr_t +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* contains name and value of a header (name == NULL if is a continuing line + * of a multiline header */ +struct phr_header { + const char *name; + size_t name_len; + const char *value; + size_t value_len; +}; + +/* returns number of bytes consumed if successful, -2 if request is partial, + * -1 if failed */ +int phr_parse_request(const char *buf, size_t len, const char **method, size_t *method_len, const char **path, size_t *path_len, + int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len); + +/* ditto */ +int phr_parse_response(const char *_buf, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len, + struct phr_header *headers, size_t *num_headers, size_t last_len); + +/* ditto */ +int phr_parse_headers(const char *buf, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len); + +/* should be zero-filled before start */ +struct phr_chunked_decoder { + size_t bytes_left_in_chunk; /* number of bytes left in current chunk */ + char consume_trailer; /* if trailing headers should be consumed */ + char _hex_count; + char _state; +}; + +/* the function rewrites the buffer given as (buf, bufsz) removing the chunked- + * encoding headers. When the function returns without an error, bufsz is + * updated to the length of the decoded data available. Applications should + * repeatedly call the function while it returns -2 (incomplete) every time + * supplying newly arrived data. If the end of the chunked-encoded data is + * found, the function returns a non-negative number indicating the number of + * octets left undecoded, that starts from the offset returned by `*bufsz`. + * Returns -1 on error. + */ +ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *bufsz); + +/* returns if the chunked decoder is in middle of chunked data */ +int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/event_loop.c b/src/event_loop.c new file mode 100644 index 0000000..2b0c333 --- /dev/null +++ b/src/event_loop.c @@ -0,0 +1,339 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "picohttpparser.h" + +#define MAX_MSG_SIZE 1024 + +const char http_200_ok[] = + "HTTP/1.1 200 OK\n" + "Connection: close\n"; + +static void fd_set_nb(int fd) { + int flags = fcntl(fd, F_GETFL, 0); + /* if (errno) { */ + /* die("fcntl error"); */ + /* return; */ + /* } */ + + flags |= O_NONBLOCK; + + fcntl(fd, F_SETFL, flags); + /* if (errno) { */ + /* die("fcntl error"); */ + /* } */ +} + +enum { + STATE_REQ = 0, + STATE_RES = 1, + STATE_END = 2, +}; + +typedef struct conn { + int fd; + uint32_t state; + // buffer for reading + size_t rbuf_size; + uint8_t rbuf[MAX_MSG_SIZE]; + // buffer for writing + size_t wbuf_size; + size_t wbuf_sent; + uint8_t wbuf[MAX_MSG_SIZE]; + void (*process_func) (struct conn *); +} conn; + +typedef struct event_loop { + conn **connections; + size_t connection_count; +} event_loop; + +void event_loop_put(event_loop *loop, conn *c) { + // TODO properly catch realloc + if (c->fd >= loop->connection_count) { + loop->connections = realloc(loop->connections, sizeof(conn) * (c->fd + 1)); + loop->connection_count = c->fd + 1; + } + + printf("Add fd %i\n", c->fd); + + loop->connections[c->fd] = c; +} + +int event_loop_accept(event_loop *loop, int fd) { + struct sockaddr_in client_addr; + socklen_t socklen = sizeof(client_addr); + int connfd = accept(fd, (struct sockaddr *)&client_addr, &socklen); + if (connfd < 0) { + printf("accept() error"); + return -1; // error + } + + // set the new connection fd to nonblocking mode + fd_set_nb(connfd); + + // creating the struct Conn + conn *c = calloc(sizeof(conn), 1); + + if (!c) { + close(connfd); + return -1; + } + + c->fd = connfd; + c->state = STATE_REQ; + + event_loop_put(loop, c); + return 0; +} + +bool conn_write_to_fd(conn *c) { + ssize_t res = 0; + size_t remain = c->wbuf_size - c->wbuf_sent; + + do { + res = write(c->fd, &c->wbuf[c->wbuf_sent], remain); + } while (res < 0 && errno == EINTR); + + // EAGAIN doesn't mean there was an error, but rather that there's no more + // data right now, but there might be more later, aka "try again later" + if (res < 0 && errno == EAGAIN) { + return false; + } + + // If it's not EGAIN, there was an error writing so we simply end the request + if (res < 0) { + c->state = STATE_END; + return false; + } + + c->wbuf_sent += (size_t)res; + + // Everything is written from the buffer, so we exit + if (c->wbuf_sent == c->wbuf_size) { + c->state = STATE_END; + /* c->wbuf_sent = 0; */ + /* c->wbuf_size = 0; */ + + return false; + } + + // still got some data in wbuf, could try to write again + return true; +} + +void try_one_request(conn *c) { + if (c->process_func != NULL) { + c->process_func(c); + } + + char *method, *path; + struct phr_header headers[16]; + size_t method_len, path_len, num_headers; + int minor_version; + + num_headers = sizeof(headers) / sizeof(headers[0]); + + int res = phr_parse_request((const char *) c->rbuf, c->rbuf_size, &method, &method_len, &path, &path_len, &minor_version, headers, &num_headers, 0); + + if (res > 0) { + + } else if (res == -1) { + c->state = STATE_END; + } else if (res == -2) { + // We don't do anything here + } +} + +/** + * Read new data into the read buffer. This command performs at most one + * successful read syscall. + * + * Returns whether the function should be retried immediately or not. + */ +bool conn_read_from_fd(conn *c) { + ssize_t res; + size_t cap = MAX_MSG_SIZE - c->rbuf_size; + + // Try to read at most cap bytes from the file descriptor + do { + res = read(c->fd, &c->rbuf[c->rbuf_size], cap); + } while (res < 0 && errno == EINTR); + + // EGAIN means we try again later + if (res < 0 && errno == EAGAIN) { + return false; + } + + // Any other negative error message means the read errored out + if (res < 0) { + c->state = STATE_END; + + return false; + } + + // An output of 0 zero means we've reached the end of the input + if (res == 0) { + } + + // We switch to processing mode if we've reached the end of the data stream, + // or if the read buffer is filled + /* if (res == 0 || c->rbuf_size == MAX_MSG_SIZE) { */ + /* c->state = STATE_PROCESS; */ + /* return false; */ + /* } */ + + c->rbuf_size += (size_t)res; + printf("rbuf size: %lu", c->rbuf_size); + + /* assert(conn->rbuf_size <= sizeof(conn->rbuf)); */ + + // Try to process requests one by one. + // Try to process requests one by one. + // Why is there a loop? Please read the explanation of "pipelining". + try_one_request(c); + + // We can keep reading as long as we're in request mode + return c->state == STATE_REQ; + /* while (try_one_request(conn)) {} */ + /* return (conn->state == STATE_REQ); */ +} + +void conn_state_res(conn *c) { + while (conn_write_to_fd(c)) {} +} + +void conn_state_req(conn *c) { + while (conn_read_from_fd(c)) {} +} + +/* void conn_state_process(conn *c) { */ +/* printf("bruh"); */ +/* memcpy(c->wbuf, c->rbuf, c->rbuf_size); */ +/* c->wbuf_size = c->rbuf_size; */ +/* c->state = STATE_WRITE; */ +/* } */ + + +static void connection_io(conn *c) { + c->rbuf[c->rbuf_size - 1] = '\0'; + printf("%s\n", c->rbuf); + switch (c->state) { + case STATE_REQ: + conn_state_req(c); break; + case STATE_RES: + conn_state_res(c); break; + } + printf("%i\n", c->state); +} + +int main() { + setvbuf(stdout, NULL, _IONBF, 0); + int fd = socket(AF_INET, SOCK_STREAM, 0); + if (fd < 0) { + return -1; + /* die("socket()"); */ + } + + int val = 1; + setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)); + + // bind + struct sockaddr_in addr = {}; + addr.sin_family = AF_INET; + addr.sin_port = ntohs(8000); + addr.sin_addr.s_addr = ntohl(0); // wildcard address 0.0.0.0 + int rv = bind(fd, (const struct sockaddr *)&addr, sizeof(addr)); + if (rv) { + /* die("bind()"); */ + return -1; + } + + // listen + rv = listen(fd, SOMAXCONN); + if (rv) { + /* die("listen()"); */ + return -1; + } + + // set the listen fd to nonblocking mode + fd_set_nb(fd); + + event_loop *loop = calloc(sizeof(event_loop), 1); + loop->connections = calloc(sizeof(conn), 1); + loop->connection_count = 1; + + struct pollfd *poll_args = calloc(sizeof(struct pollfd), 32); + size_t poll_args_count; + + // for convenience, the listening fd is put in the first position + struct pollfd pfd = {fd, POLLIN, 0}; + poll_args[0] = pfd; + + conn *c; + int events; + + while (1) { + poll_args_count = 1; + + // connection fds + for (size_t i = 0; i < loop->connection_count; i++) { + c = loop->connections[i]; + + if (!c) { + continue; + } + + events = (c->state == STATE_REQ) ? POLLIN : POLLOUT; + events |= POLLERR; + + struct pollfd pfd = {c->fd, events, 0}; + + poll_args[poll_args_count] = pfd; + poll_args_count++; + + // We do at most 32 connections at a time for now + if (poll_args_count == 32) + break; + } + + // poll for active fds + // the timeout argument doesn't matter here + int rv = poll(poll_args, (nfds_t)poll_args_count, 1000); + if (rv < 0) { + /* die("poll"); */ + return -1; + } + + // process active connections + for (size_t i = 1; i < poll_args_count; ++i) { + if (poll_args[i].revents) { + conn *c = loop->connections[poll_args[i].fd]; + connection_io(c); + + if (c->state == STATE_END) { + // client closed normally, or something bad happened. + // destroy this connection + loop->connections[c->fd] = NULL; + close(c->fd); + free(c); + } + } + } + + // try to accept a new connection if the listening fd is active + if (poll_args[0].revents) { + (void)event_loop_accept(loop, fd); + } + } + + return 0; +} diff --git a/src/picohttpparser.c b/src/picohttpparser.c new file mode 100644 index 0000000..5e5783a --- /dev/null +++ b/src/picohttpparser.c @@ -0,0 +1,665 @@ +/* + * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase, + * Shigeo Mitsunari + * + * The software is licensed under either the MIT License (below) or the Perl + * license. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#ifdef __SSE4_2__ +#ifdef _MSC_VER +#include +#else +#include +#endif +#endif +#include "picohttpparser.h" + +#if __GNUC__ >= 3 +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +#else +#define likely(x) (x) +#define unlikely(x) (x) +#endif + +#ifdef _MSC_VER +#define ALIGNED(n) _declspec(align(n)) +#else +#define ALIGNED(n) __attribute__((aligned(n))) +#endif + +#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u) + +#define CHECK_EOF() \ + if (buf == buf_end) { \ + *ret = -2; \ + return NULL; \ + } + +#define EXPECT_CHAR_NO_CHECK(ch) \ + if (*buf++ != ch) { \ + *ret = -1; \ + return NULL; \ + } + +#define EXPECT_CHAR(ch) \ + CHECK_EOF(); \ + EXPECT_CHAR_NO_CHECK(ch); + +#define ADVANCE_TOKEN(tok, toklen) \ + do { \ + const char *tok_start = buf; \ + static const char ALIGNED(16) ranges2[16] = "\000\040\177\177"; \ + int found2; \ + buf = findchar_fast(buf, buf_end, ranges2, 4, &found2); \ + if (!found2) { \ + CHECK_EOF(); \ + } \ + while (1) { \ + if (*buf == ' ') { \ + break; \ + } else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { \ + if ((unsigned char)*buf < '\040' || *buf == '\177') { \ + *ret = -1; \ + return NULL; \ + } \ + } \ + ++buf; \ + CHECK_EOF(); \ + } \ + tok = tok_start; \ + toklen = buf - tok_start; \ + } while (0) + +static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0" + "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1" + "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0" + "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; + +static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size, int *found) +{ + *found = 0; +#if __SSE4_2__ + if (likely(buf_end - buf >= 16)) { + __m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges); + + size_t left = (buf_end - buf) & ~15; + do { + __m128i b16 = _mm_loadu_si128((const __m128i *)buf); + int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS); + if (unlikely(r != 16)) { + buf += r; + *found = 1; + break; + } + buf += 16; + left -= 16; + } while (likely(left != 0)); + } +#else + /* suppress unused parameter warning */ + (void)buf_end; + (void)ranges; + (void)ranges_size; +#endif + return buf; +} + +static const char *get_token_to_eol(const char *buf, const char *buf_end, const char **token, size_t *token_len, int *ret) +{ + const char *token_start = buf; + +#ifdef __SSE4_2__ + static const char ALIGNED(16) ranges1[16] = "\0\010" /* allow HT */ + "\012\037" /* allow SP and up to but not including DEL */ + "\177\177"; /* allow chars w. MSB set */ + int found; + buf = findchar_fast(buf, buf_end, ranges1, 6, &found); + if (found) + goto FOUND_CTL; +#else + /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */ + while (likely(buf_end - buf >= 8)) { +#define DOIT() \ + do { \ + if (unlikely(!IS_PRINTABLE_ASCII(*buf))) \ + goto NonPrintable; \ + ++buf; \ + } while (0) + DOIT(); + DOIT(); + DOIT(); + DOIT(); + DOIT(); + DOIT(); + DOIT(); + DOIT(); +#undef DOIT + continue; + NonPrintable: + if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) { + goto FOUND_CTL; + } + ++buf; + } +#endif + for (;; ++buf) { + CHECK_EOF(); + if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { + if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) { + goto FOUND_CTL; + } + } + } +FOUND_CTL: + if (likely(*buf == '\015')) { + ++buf; + EXPECT_CHAR('\012'); + *token_len = buf - 2 - token_start; + } else if (*buf == '\012') { + *token_len = buf - token_start; + ++buf; + } else { + *ret = -1; + return NULL; + } + *token = token_start; + + return buf; +} + +static const char *is_complete(const char *buf, const char *buf_end, size_t last_len, int *ret) +{ + int ret_cnt = 0; + buf = last_len < 3 ? buf : buf + last_len - 3; + + while (1) { + CHECK_EOF(); + if (*buf == '\015') { + ++buf; + CHECK_EOF(); + EXPECT_CHAR('\012'); + ++ret_cnt; + } else if (*buf == '\012') { + ++buf; + ++ret_cnt; + } else { + ++buf; + ret_cnt = 0; + } + if (ret_cnt == 2) { + return buf; + } + } + + *ret = -2; + return NULL; +} + +#define PARSE_INT(valp_, mul_) \ + if (*buf < '0' || '9' < *buf) { \ + buf++; \ + *ret = -1; \ + return NULL; \ + } \ + *(valp_) = (mul_) * (*buf++ - '0'); + +#define PARSE_INT_3(valp_) \ + do { \ + int res_ = 0; \ + PARSE_INT(&res_, 100) \ + *valp_ = res_; \ + PARSE_INT(&res_, 10) \ + *valp_ += res_; \ + PARSE_INT(&res_, 1) \ + *valp_ += res_; \ + } while (0) + +/* returned pointer is always within [buf, buf_end), or null */ +static const char *parse_token(const char *buf, const char *buf_end, const char **token, size_t *token_len, char next_char, + int *ret) +{ + /* We use pcmpestri to detect non-token characters. This instruction can take no more than eight character ranges (8*2*8=128 + * bits that is the size of a SSE register). Due to this restriction, characters `|` and `~` are handled in the slow loop. */ + static const char ALIGNED(16) ranges[] = "\x00 " /* control chars and up to SP */ + "\"\"" /* 0x22 */ + "()" /* 0x28,0x29 */ + ",," /* 0x2c */ + "//" /* 0x2f */ + ":@" /* 0x3a-0x40 */ + "[]" /* 0x5b-0x5d */ + "{\xff"; /* 0x7b-0xff */ + const char *buf_start = buf; + int found; + buf = findchar_fast(buf, buf_end, ranges, sizeof(ranges) - 1, &found); + if (!found) { + CHECK_EOF(); + } + while (1) { + if (*buf == next_char) { + break; + } else if (!token_char_map[(unsigned char)*buf]) { + *ret = -1; + return NULL; + } + ++buf; + CHECK_EOF(); + } + *token = buf_start; + *token_len = buf - buf_start; + return buf; +} + +/* returned pointer is always within [buf, buf_end), or null */ +static const char *parse_http_version(const char *buf, const char *buf_end, int *minor_version, int *ret) +{ + /* we want at least [HTTP/1.] to try to parse */ + if (buf_end - buf < 9) { + *ret = -2; + return NULL; + } + EXPECT_CHAR_NO_CHECK('H'); + EXPECT_CHAR_NO_CHECK('T'); + EXPECT_CHAR_NO_CHECK('T'); + EXPECT_CHAR_NO_CHECK('P'); + EXPECT_CHAR_NO_CHECK('/'); + EXPECT_CHAR_NO_CHECK('1'); + EXPECT_CHAR_NO_CHECK('.'); + PARSE_INT(minor_version, 1); + return buf; +} + +static const char *parse_headers(const char *buf, const char *buf_end, struct phr_header *headers, size_t *num_headers, + size_t max_headers, int *ret) +{ + for (;; ++*num_headers) { + CHECK_EOF(); + if (*buf == '\015') { + ++buf; + EXPECT_CHAR('\012'); + break; + } else if (*buf == '\012') { + ++buf; + break; + } + if (*num_headers == max_headers) { + *ret = -1; + return NULL; + } + if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) { + /* parsing name, but do not discard SP before colon, see + * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */ + if ((buf = parse_token(buf, buf_end, &headers[*num_headers].name, &headers[*num_headers].name_len, ':', ret)) == NULL) { + return NULL; + } + if (headers[*num_headers].name_len == 0) { + *ret = -1; + return NULL; + } + ++buf; + for (;; ++buf) { + CHECK_EOF(); + if (!(*buf == ' ' || *buf == '\t')) { + break; + } + } + } else { + headers[*num_headers].name = NULL; + headers[*num_headers].name_len = 0; + } + const char *value; + size_t value_len; + if ((buf = get_token_to_eol(buf, buf_end, &value, &value_len, ret)) == NULL) { + return NULL; + } + /* remove trailing SPs and HTABs */ + const char *value_end = value + value_len; + for (; value_end != value; --value_end) { + const char c = *(value_end - 1); + if (!(c == ' ' || c == '\t')) { + break; + } + } + headers[*num_headers].value = value; + headers[*num_headers].value_len = value_end - value; + } + return buf; +} + +static const char *parse_request(const char *buf, const char *buf_end, const char **method, size_t *method_len, const char **path, + size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers, + size_t max_headers, int *ret) +{ + /* skip first empty line (some clients add CRLF after POST content) */ + CHECK_EOF(); + if (*buf == '\015') { + ++buf; + EXPECT_CHAR('\012'); + } else if (*buf == '\012') { + ++buf; + } + + /* parse request line */ + if ((buf = parse_token(buf, buf_end, method, method_len, ' ', ret)) == NULL) { + return NULL; + } + do { + ++buf; + CHECK_EOF(); + } while (*buf == ' '); + ADVANCE_TOKEN(*path, *path_len); + do { + ++buf; + CHECK_EOF(); + } while (*buf == ' '); + if (*method_len == 0 || *path_len == 0) { + *ret = -1; + return NULL; + } + if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) { + return NULL; + } + if (*buf == '\015') { + ++buf; + EXPECT_CHAR('\012'); + } else if (*buf == '\012') { + ++buf; + } else { + *ret = -1; + return NULL; + } + + return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret); +} + +int phr_parse_request(const char *buf_start, size_t len, const char **method, size_t *method_len, const char **path, + size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len) +{ + const char *buf = buf_start, *buf_end = buf_start + len; + size_t max_headers = *num_headers; + int r; + + *method = NULL; + *method_len = 0; + *path = NULL; + *path_len = 0; + *minor_version = -1; + *num_headers = 0; + + /* if last_len != 0, check if the request is complete (a fast countermeasure + againt slowloris */ + if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) { + return r; + } + + if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, minor_version, headers, num_headers, max_headers, + &r)) == NULL) { + return r; + } + + return (int)(buf - buf_start); +} + +static const char *parse_response(const char *buf, const char *buf_end, int *minor_version, int *status, const char **msg, + size_t *msg_len, struct phr_header *headers, size_t *num_headers, size_t max_headers, int *ret) +{ + /* parse "HTTP/1.x" */ + if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) { + return NULL; + } + /* skip space */ + if (*buf != ' ') { + *ret = -1; + return NULL; + } + do { + ++buf; + CHECK_EOF(); + } while (*buf == ' '); + /* parse status code, we want at least [:digit:][:digit:][:digit:] to try to parse */ + if (buf_end - buf < 4) { + *ret = -2; + return NULL; + } + PARSE_INT_3(status); + + /* get message including preceding space */ + if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) { + return NULL; + } + if (*msg_len == 0) { + /* ok */ + } else if (**msg == ' ') { + /* Remove preceding space. Successful return from `get_token_to_eol` guarantees that we would hit something other than SP + * before running past the end of the given buffer. */ + do { + ++*msg; + --*msg_len; + } while (**msg == ' '); + } else { + /* garbage found after status code */ + *ret = -1; + return NULL; + } + + return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret); +} + +int phr_parse_response(const char *buf_start, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len, + struct phr_header *headers, size_t *num_headers, size_t last_len) +{ + const char *buf = buf_start, *buf_end = buf + len; + size_t max_headers = *num_headers; + int r; + + *minor_version = -1; + *status = 0; + *msg = NULL; + *msg_len = 0; + *num_headers = 0; + + /* if last_len != 0, check if the response is complete (a fast countermeasure + against slowloris */ + if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) { + return r; + } + + if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) == NULL) { + return r; + } + + return (int)(buf - buf_start); +} + +int phr_parse_headers(const char *buf_start, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len) +{ + const char *buf = buf_start, *buf_end = buf + len; + size_t max_headers = *num_headers; + int r; + + *num_headers = 0; + + /* if last_len != 0, check if the response is complete (a fast countermeasure + against slowloris */ + if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) { + return r; + } + + if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) { + return r; + } + + return (int)(buf - buf_start); +} + +enum { + CHUNKED_IN_CHUNK_SIZE, + CHUNKED_IN_CHUNK_EXT, + CHUNKED_IN_CHUNK_DATA, + CHUNKED_IN_CHUNK_CRLF, + CHUNKED_IN_TRAILERS_LINE_HEAD, + CHUNKED_IN_TRAILERS_LINE_MIDDLE +}; + +static int decode_hex(int ch) +{ + if ('0' <= ch && ch <= '9') { + return ch - '0'; + } else if ('A' <= ch && ch <= 'F') { + return ch - 'A' + 0xa; + } else if ('a' <= ch && ch <= 'f') { + return ch - 'a' + 0xa; + } else { + return -1; + } +} + +ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *_bufsz) +{ + size_t dst = 0, src = 0, bufsz = *_bufsz; + ssize_t ret = -2; /* incomplete */ + + while (1) { + switch (decoder->_state) { + case CHUNKED_IN_CHUNK_SIZE: + for (;; ++src) { + int v; + if (src == bufsz) + goto Exit; + if ((v = decode_hex(buf[src])) == -1) { + if (decoder->_hex_count == 0) { + ret = -1; + goto Exit; + } + break; + } + if (decoder->_hex_count == sizeof(size_t) * 2) { + ret = -1; + goto Exit; + } + decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v; + ++decoder->_hex_count; + } + decoder->_hex_count = 0; + decoder->_state = CHUNKED_IN_CHUNK_EXT; + /* fallthru */ + case CHUNKED_IN_CHUNK_EXT: + /* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */ + for (;; ++src) { + if (src == bufsz) + goto Exit; + if (buf[src] == '\012') + break; + } + ++src; + if (decoder->bytes_left_in_chunk == 0) { + if (decoder->consume_trailer) { + decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD; + break; + } else { + goto Complete; + } + } + decoder->_state = CHUNKED_IN_CHUNK_DATA; + /* fallthru */ + case CHUNKED_IN_CHUNK_DATA: { + size_t avail = bufsz - src; + if (avail < decoder->bytes_left_in_chunk) { + if (dst != src) + memmove(buf + dst, buf + src, avail); + src += avail; + dst += avail; + decoder->bytes_left_in_chunk -= avail; + goto Exit; + } + if (dst != src) + memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk); + src += decoder->bytes_left_in_chunk; + dst += decoder->bytes_left_in_chunk; + decoder->bytes_left_in_chunk = 0; + decoder->_state = CHUNKED_IN_CHUNK_CRLF; + } + /* fallthru */ + case CHUNKED_IN_CHUNK_CRLF: + for (;; ++src) { + if (src == bufsz) + goto Exit; + if (buf[src] != '\015') + break; + } + if (buf[src] != '\012') { + ret = -1; + goto Exit; + } + ++src; + decoder->_state = CHUNKED_IN_CHUNK_SIZE; + break; + case CHUNKED_IN_TRAILERS_LINE_HEAD: + for (;; ++src) { + if (src == bufsz) + goto Exit; + if (buf[src] != '\015') + break; + } + if (buf[src++] == '\012') + goto Complete; + decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE; + /* fallthru */ + case CHUNKED_IN_TRAILERS_LINE_MIDDLE: + for (;; ++src) { + if (src == bufsz) + goto Exit; + if (buf[src] == '\012') + break; + } + ++src; + decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD; + break; + default: + assert(!"decoder is corrupt"); + } + } + +Complete: + ret = bufsz - src; +Exit: + if (dst != src) + memmove(buf + dst, buf + src, bufsz - src); + *_bufsz = dst; + return ret; +} + +int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder) +{ + return decoder->_state == CHUNKED_IN_CHUNK_DATA; +} + +#undef CHECK_EOF +#undef EXPECT_CHAR +#undef ADVANCE_TOKEN diff --git a/trie/include/trie.h b/trie/include/trie.h index c32dadc..322ad53 100644 --- a/trie/include/trie.h +++ b/trie/include/trie.h @@ -19,9 +19,9 @@ #include #include -static const char charset[] = +const static char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; -static const size_t charset_len = sizeof(charset) - 1; +const static size_t charset_len = sizeof(charset) - 1; // Length of randomly generated keys #define RANDOM_KEY_LENGTH_SHORT 4 diff --git a/trie/src/trie_node.c b/trie/src/trie_node.c index bb1be5c..0da5163 100644 --- a/trie/src/trie_node.c +++ b/trie/src/trie_node.c @@ -2,46 +2,7 @@ #include #include -#include "trie.h" - -/** - * Represents a node of the binary tree contained within each non-leaf - * TrieNode. - */ -typedef struct tinode { - struct tinode *left; - struct tinode *right; - struct tnode *next; - char key; -} TrieInnerNode; - -/** - * Represents a node inside a Trie. A node can be in one of three states: - * - Internal node: a node that's part of a path to a leaf node. This node will - * always have a size greater than one, and an initialized root. - * - Leaf: a node solely used to represent a string ending there. Its size is 0, - * its ptr is unitialized and represents is true. - * - Full leaf: a leaf node that contains a string. This occurs when a string is - * added whose path is not fully in the tree yet, causing its remaining suffix - * to be stored as a single node. Its size will be zero, represents its true, - * and its string pointer is initialized. - */ -typedef struct tnode { - Entry *entry; - - TrieInnerNode *tree; - uint8_t tree_size; - - // Skips are at most TRIE_MAX_SKIP_SIZE characters, and are stored in the - // nodes - char string[TRIE_MAX_SKIP_SIZE]; - uint8_t string_len; - - bool represents; -} TrieNode; - -// Required for recursively freeing tree structure -void tnode_free(TrieNode *node); +#include "trie_node.h" /** * Allocate and initialize a new TrieInnerNode representing a given diff --git a/trie/src/trie_node.h b/trie/src/trie_node.h new file mode 100644 index 0000000..229f5a6 --- /dev/null +++ b/trie/src/trie_node.h @@ -0,0 +1,53 @@ +#include + +#include "trie.h" + +/** + * Represents a node of the binary tree contained within each non-leaf + * TrieNode. + */ +typedef struct tinode { + struct tinode *left; + struct tinode *right; + struct tnode *next; + char key; +} TrieInnerNode; + +/** + * Represents a node inside a Trie. A node can be in one of three states: + * - Internal node: a node that's part of a path to a leaf node. This node will + * always have a size greater than one, and an initialized root. + * - Leaf: a node solely used to represent a string ending there. Its size is 0, + * its ptr is unitialized and represents is true. + * - Full leaf: a leaf node that contains a string. This occurs when a string is + * added whose path is not fully in the tree yet, causing its remaining suffix + * to be stored as a single node. Its size will be zero, represents its true, + * and its string pointer is initialized. + */ +typedef struct tnode { + Entry *entry; + + TrieInnerNode *tree; + uint8_t tree_size; + + // Skips are at most TRIE_MAX_SKIP_SIZE characters, and are stored in the + // nodes + char string[TRIE_MAX_SKIP_SIZE]; + uint8_t string_len; + + bool represents; +} TrieNode; + +TrieInnerNode *tinode_init(char c); + +TrieNode *tnode_init(); + +void tinode_free_cascade(TrieInnerNode *node); + +void tnode_free(TrieNode *node); + +TrieNode **tnode_search(TrieNode *node, const char c, bool create); + +void tinode_remove(TrieInnerNode *node, const char c); + +void tnode_remove(TrieNode *node, const char c);