feat: switch to C-based compilation, start of simple event loop

c-web-server
Jef Roosens 2023-05-24 09:03:22 +02:00
parent 01eb5ece55
commit 11cd537759
Signed by: Jef Roosens
GPG Key ID: B75D4F293C7052DB
7 changed files with 1238 additions and 94 deletions

143
Makefile
View File

@ -1,69 +1,108 @@
# =====CONFIG===== # https://spin.atomicobject.com/2016/08/26/makefile-c-projects/ was a great
BUILD_DIR := ./build # base for this Makefile
SRC_DIRS := ./src ./trie/src
INCLUDE_DIRS := ./trie/include
TEST_DIR := test
CORES != nproc
SRCS := $(shell find $(SRC_DIRS) $(INCLUDE_DIRS) \( -iname '*.cpp' -or -iname '*.c' -or -iname '*.h' \)) BIN_FILENAME ?= lander
BUILD_DIR ?= build
SRC_DIR ?= src
TEST_DIR ?= test
INC_DIRS ?= include
BIN := $(BUILD_DIR)/$(BIN_FILENAME)
SRCS != find '$(SRC_DIR)' -iname '*.c'
SRCS_H != find $(INC_DIRS) -iname '*.h'
SRCS_H_INTERNAL != find $(SRC_DIR) -iname '*.h'
SRCS_TEST != find '$(TEST_DIR)' -iname '*.c'
OBJS := $(SRCS:%=$(BUILD_DIR)/%.o)
OBJS_TEST := $(SRCS_TEST:%=$(BUILD_DIR)/%.o)
DEPS := $(SRCS:%=$(BUILD_DIR)/%.d) $(SRCS_TEST:%=$(BUILD_DIR)/%.d)
BINS_TEST := $(OBJS_TEST:%.c.o=%)
TARGETS_TEST := $(BINS_TEST:%=test-%)
TARGETS_MEM_TEST := $(BINS_TEST:%=test-mem-%)
INC_FLAGS := $(addprefix -I,$(INC_DIRS))
# -MMD: generate a .d file for every source file. This file can be imported by
# make and makes make aware that a header file has been changed, ensuring an
# object file is also recompiled if only a header is changed.
# -MP: generate a dummy target for every header file (according to the docs it
# prevents some errors when removing header files)
CFLAGS ?= -MMD -MP -g
INTERNALCFLAGS := $(INC_FLAGS) $(CFLAGS) -Wall -Wextra
.PHONY: all
all: bin
# =====RECIPES===== # =====COMPILATION=====
all: build # Utility used by the CI to lint
.PHONY: objs
objs: $(OBJS)
.PHONY: cmake .PHONY: bin
cmake: $(BUILD_DIR)/Debug/Makefile bin: $(BIN)
$(BUILD_DIR)/Debug/Makefile: CMakeLists.txt $(BIN): $(OBJS)
@ cmake -B'$(BUILD_DIR)/Debug' -DCMAKE_BUILD_TYPE=Debug -DCMAKE_EXPORT_COMPILE_COMMANDS=1 . $(CC) $(INTERNALCFLAGS) -o $@ $^
@ ln -sf '$(BUILD_DIR)/Debug/compile_commands.json' compile_commands.json
.PHONY: cmake-test $(BUILD_DIR)/$(SRC_DIR)/%.c.o: $(SRC_DIR)/%.c
cmake-test: $(BUILD_DIR)/Test/Makefile mkdir -p $(dir $@)
$(BUILD_DIR)/Test/Makefile: CMakeLists.txt $(CC) $(INTERNALCFLAGS) -c $< -o $@
@ cmake -B'$(BUILD_DIR)/Test' -DCMAKE_BUILD_TYPE=Test .
.PHONY: build
build: cmake # =====TESTING=====
@ make -C '$(BUILD_DIR)/Debug' .PHONY: test
test: $(TARGETS_TEST)
.PHONY: test-mem
test-mem: $(TARGETS_MEM_TEST)
.PHONY: $(TARGETS_TEST)
$(TARGETS_TEST): test-%: %
./$^
.PHONY: $(TARGETS_MEM_TEST)
$(TARGETS_MEM_TEST): test-mem-%: %
valgrind --tool=memcheck --error-exitcode=1 --track-origins=yes --leak-check=full ./$^
.PHONY: build-test .PHONY: build-test
build-test: cmake-test build-test: $(BINS_TEST)
@ make -C '$(BUILD_DIR)/Test'
.PHONY: cmake-release $(BINS_TEST): %: %.c.o $(BIN)
cmake-release: $(BUILD_DIR)/Release/Makefile $(CC) \
$(BUILD_DIR)/Release/Makefile: CMakeLists.txt $^ -o $@
@ cmake -B'$(BUILD_DIR)/Release' -DCMAKE_BUILD_TYPE=Release .
.PHONY: prod # Along with the include directory, each test includes $(TEST_DIR) (which
prod: cmake-release # contains the acutest.h header file), and the src directory of the module it's
@ make -C '$(BUILD_DIR)/Release' # testing. This allows tests to access internal methods, which aren't publicly
# exposed.
$(BUILD_DIR)/$(TEST_DIR)/%.c.o: $(TEST_DIR)/%.c
mkdir -p $(dir $@)
$(CC) $(INTERNALCFLAGS) -I$(TEST_DIR) \
-I$(dir $(@:$(BUILD_DIR)/$(TEST_DIR)/%=$(SRC_DIR)/%)) \
-c $< -o $@
.PHONY: run # =====MAINTENANCE=====
run: build .PHONY: lint
@ LANDER_DATA_DIR=data LANDER_BASE_URL=http://localhost:18080/ LANDER_API_KEY=test ./build/Debug/lander lint:
clang-format -n --Werror $(SRCS) $(SRCS_H) $(SRCS_H_INTERNAL)
.PHONY: valgrind .PHONY: fmt
valgrind: build fmt:
@ LANDER_DATA_DIR=data LANDER_BASE_URL=http://localhost:18080/ LANDER_API_KEY=test \ clang-format -i $(SRCS) $(SRCS_H) $(SRCS_H_INTERNAL)
valgrind --tool=memcheck --error-exitcode=1 --track-origins=yes --leak-check=full ./build/Debug/lander
.PHONY: gdb
gdb: build
@ LANDER_DATA_DIR=data LANDER_BASE_URL=http://localhost:18080/ LANDER_API_KEY=test gdb --args ./build/Debug/lander
.PHONY: test
test: build-test
@ $(MAKE) -C '$(BUILD_DIR)/Test' test ARGS=-j$(CORES) CTEST_OUTPUT_ON_FAILURE=1
.PHONY: clean .PHONY: clean
clean: clean:
@ rm -rf '$(BUILD_DIR)' compile_commands.json rm -rf $(BUILD_DIR)
.PHONY: lint
lint:
@ clang-format --Werror -n $(SRCS)
.PHONY: format .PHONY: bear
format: bear: clean
@ clang-format -i $(SRCS) bear -- make
bear --append -- make build-test
# Make make aware of the .d files
-include $(DEPS)

View File

@ -0,0 +1,87 @@
/*
* Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase,
* Shigeo Mitsunari
*
* The software is licensed under either the MIT License (below) or the Perl
* license.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef picohttpparser_h
#define picohttpparser_h
#include <sys/types.h>
#ifdef _MSC_VER
#define ssize_t intptr_t
#endif
#ifdef __cplusplus
extern "C" {
#endif
/* contains name and value of a header (name == NULL if is a continuing line
* of a multiline header */
struct phr_header {
const char *name;
size_t name_len;
const char *value;
size_t value_len;
};
/* returns number of bytes consumed if successful, -2 if request is partial,
* -1 if failed */
int phr_parse_request(const char *buf, size_t len, const char **method, size_t *method_len, const char **path, size_t *path_len,
int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len);
/* ditto */
int phr_parse_response(const char *_buf, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len,
struct phr_header *headers, size_t *num_headers, size_t last_len);
/* ditto */
int phr_parse_headers(const char *buf, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len);
/* should be zero-filled before start */
struct phr_chunked_decoder {
size_t bytes_left_in_chunk; /* number of bytes left in current chunk */
char consume_trailer; /* if trailing headers should be consumed */
char _hex_count;
char _state;
};
/* the function rewrites the buffer given as (buf, bufsz) removing the chunked-
* encoding headers. When the function returns without an error, bufsz is
* updated to the length of the decoded data available. Applications should
* repeatedly call the function while it returns -2 (incomplete) every time
* supplying newly arrived data. If the end of the chunked-encoded data is
* found, the function returns a non-negative number indicating the number of
* octets left undecoded, that starts from the offset returned by `*bufsz`.
* Returns -1 on error.
*/
ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *bufsz);
/* returns if the chunked decoder is in middle of chunked data */
int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder);
#ifdef __cplusplus
}
#endif
#endif

339
src/event_loop.c 100644
View File

@ -0,0 +1,339 @@
#include <stdint.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <fcntl.h>
#include <stdio.h>
#include <poll.h>
#include <unistd.h>
#include <netinet/in.h>
#include <errno.h>
#include <stdbool.h>
#include <string.h>
#include "picohttpparser.h"
#define MAX_MSG_SIZE 1024
const char http_200_ok[] =
"HTTP/1.1 200 OK\n"
"Connection: close\n";
static void fd_set_nb(int fd) {
int flags = fcntl(fd, F_GETFL, 0);
/* if (errno) { */
/* die("fcntl error"); */
/* return; */
/* } */
flags |= O_NONBLOCK;
fcntl(fd, F_SETFL, flags);
/* if (errno) { */
/* die("fcntl error"); */
/* } */
}
enum {
STATE_REQ = 0,
STATE_RES = 1,
STATE_END = 2,
};
typedef struct conn {
int fd;
uint32_t state;
// buffer for reading
size_t rbuf_size;
uint8_t rbuf[MAX_MSG_SIZE];
// buffer for writing
size_t wbuf_size;
size_t wbuf_sent;
uint8_t wbuf[MAX_MSG_SIZE];
void (*process_func) (struct conn *);
} conn;
typedef struct event_loop {
conn **connections;
size_t connection_count;
} event_loop;
void event_loop_put(event_loop *loop, conn *c) {
// TODO properly catch realloc
if (c->fd >= loop->connection_count) {
loop->connections = realloc(loop->connections, sizeof(conn) * (c->fd + 1));
loop->connection_count = c->fd + 1;
}
printf("Add fd %i\n", c->fd);
loop->connections[c->fd] = c;
}
int event_loop_accept(event_loop *loop, int fd) {
struct sockaddr_in client_addr;
socklen_t socklen = sizeof(client_addr);
int connfd = accept(fd, (struct sockaddr *)&client_addr, &socklen);
if (connfd < 0) {
printf("accept() error");
return -1; // error
}
// set the new connection fd to nonblocking mode
fd_set_nb(connfd);
// creating the struct Conn
conn *c = calloc(sizeof(conn), 1);
if (!c) {
close(connfd);
return -1;
}
c->fd = connfd;
c->state = STATE_REQ;
event_loop_put(loop, c);
return 0;
}
bool conn_write_to_fd(conn *c) {
ssize_t res = 0;
size_t remain = c->wbuf_size - c->wbuf_sent;
do {
res = write(c->fd, &c->wbuf[c->wbuf_sent], remain);
} while (res < 0 && errno == EINTR);
// EAGAIN doesn't mean there was an error, but rather that there's no more
// data right now, but there might be more later, aka "try again later"
if (res < 0 && errno == EAGAIN) {
return false;
}
// If it's not EGAIN, there was an error writing so we simply end the request
if (res < 0) {
c->state = STATE_END;
return false;
}
c->wbuf_sent += (size_t)res;
// Everything is written from the buffer, so we exit
if (c->wbuf_sent == c->wbuf_size) {
c->state = STATE_END;
/* c->wbuf_sent = 0; */
/* c->wbuf_size = 0; */
return false;
}
// still got some data in wbuf, could try to write again
return true;
}
void try_one_request(conn *c) {
if (c->process_func != NULL) {
c->process_func(c);
}
char *method, *path;
struct phr_header headers[16];
size_t method_len, path_len, num_headers;
int minor_version;
num_headers = sizeof(headers) / sizeof(headers[0]);
int res = phr_parse_request((const char *) c->rbuf, c->rbuf_size, &method, &method_len, &path, &path_len, &minor_version, headers, &num_headers, 0);
if (res > 0) {
} else if (res == -1) {
c->state = STATE_END;
} else if (res == -2) {
// We don't do anything here
}
}
/**
* Read new data into the read buffer. This command performs at most one
* successful read syscall.
*
* Returns whether the function should be retried immediately or not.
*/
bool conn_read_from_fd(conn *c) {
ssize_t res;
size_t cap = MAX_MSG_SIZE - c->rbuf_size;
// Try to read at most cap bytes from the file descriptor
do {
res = read(c->fd, &c->rbuf[c->rbuf_size], cap);
} while (res < 0 && errno == EINTR);
// EGAIN means we try again later
if (res < 0 && errno == EAGAIN) {
return false;
}
// Any other negative error message means the read errored out
if (res < 0) {
c->state = STATE_END;
return false;
}
// An output of 0 zero means we've reached the end of the input
if (res == 0) {
}
// We switch to processing mode if we've reached the end of the data stream,
// or if the read buffer is filled
/* if (res == 0 || c->rbuf_size == MAX_MSG_SIZE) { */
/* c->state = STATE_PROCESS; */
/* return false; */
/* } */
c->rbuf_size += (size_t)res;
printf("rbuf size: %lu", c->rbuf_size);
/* assert(conn->rbuf_size <= sizeof(conn->rbuf)); */
// Try to process requests one by one.
// Try to process requests one by one.
// Why is there a loop? Please read the explanation of "pipelining".
try_one_request(c);
// We can keep reading as long as we're in request mode
return c->state == STATE_REQ;
/* while (try_one_request(conn)) {} */
/* return (conn->state == STATE_REQ); */
}
void conn_state_res(conn *c) {
while (conn_write_to_fd(c)) {}
}
void conn_state_req(conn *c) {
while (conn_read_from_fd(c)) {}
}
/* void conn_state_process(conn *c) { */
/* printf("bruh"); */
/* memcpy(c->wbuf, c->rbuf, c->rbuf_size); */
/* c->wbuf_size = c->rbuf_size; */
/* c->state = STATE_WRITE; */
/* } */
static void connection_io(conn *c) {
c->rbuf[c->rbuf_size - 1] = '\0';
printf("%s\n", c->rbuf);
switch (c->state) {
case STATE_REQ:
conn_state_req(c); break;
case STATE_RES:
conn_state_res(c); break;
}
printf("%i\n", c->state);
}
int main() {
setvbuf(stdout, NULL, _IONBF, 0);
int fd = socket(AF_INET, SOCK_STREAM, 0);
if (fd < 0) {
return -1;
/* die("socket()"); */
}
int val = 1;
setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val));
// bind
struct sockaddr_in addr = {};
addr.sin_family = AF_INET;
addr.sin_port = ntohs(8000);
addr.sin_addr.s_addr = ntohl(0); // wildcard address 0.0.0.0
int rv = bind(fd, (const struct sockaddr *)&addr, sizeof(addr));
if (rv) {
/* die("bind()"); */
return -1;
}
// listen
rv = listen(fd, SOMAXCONN);
if (rv) {
/* die("listen()"); */
return -1;
}
// set the listen fd to nonblocking mode
fd_set_nb(fd);
event_loop *loop = calloc(sizeof(event_loop), 1);
loop->connections = calloc(sizeof(conn), 1);
loop->connection_count = 1;
struct pollfd *poll_args = calloc(sizeof(struct pollfd), 32);
size_t poll_args_count;
// for convenience, the listening fd is put in the first position
struct pollfd pfd = {fd, POLLIN, 0};
poll_args[0] = pfd;
conn *c;
int events;
while (1) {
poll_args_count = 1;
// connection fds
for (size_t i = 0; i < loop->connection_count; i++) {
c = loop->connections[i];
if (!c) {
continue;
}
events = (c->state == STATE_REQ) ? POLLIN : POLLOUT;
events |= POLLERR;
struct pollfd pfd = {c->fd, events, 0};
poll_args[poll_args_count] = pfd;
poll_args_count++;
// We do at most 32 connections at a time for now
if (poll_args_count == 32)
break;
}
// poll for active fds
// the timeout argument doesn't matter here
int rv = poll(poll_args, (nfds_t)poll_args_count, 1000);
if (rv < 0) {
/* die("poll"); */
return -1;
}
// process active connections
for (size_t i = 1; i < poll_args_count; ++i) {
if (poll_args[i].revents) {
conn *c = loop->connections[poll_args[i].fd];
connection_io(c);
if (c->state == STATE_END) {
// client closed normally, or something bad happened.
// destroy this connection
loop->connections[c->fd] = NULL;
close(c->fd);
free(c);
}
}
}
// try to accept a new connection if the listening fd is active
if (poll_args[0].revents) {
(void)event_loop_accept(loop, fd);
}
}
return 0;
}

View File

@ -0,0 +1,665 @@
/*
* Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase,
* Shigeo Mitsunari
*
* The software is licensed under either the MIT License (below) or the Perl
* license.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <assert.h>
#include <stddef.h>
#include <string.h>
#ifdef __SSE4_2__
#ifdef _MSC_VER
#include <nmmintrin.h>
#else
#include <x86intrin.h>
#endif
#endif
#include "picohttpparser.h"
#if __GNUC__ >= 3
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
#else
#define likely(x) (x)
#define unlikely(x) (x)
#endif
#ifdef _MSC_VER
#define ALIGNED(n) _declspec(align(n))
#else
#define ALIGNED(n) __attribute__((aligned(n)))
#endif
#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)
#define CHECK_EOF() \
if (buf == buf_end) { \
*ret = -2; \
return NULL; \
}
#define EXPECT_CHAR_NO_CHECK(ch) \
if (*buf++ != ch) { \
*ret = -1; \
return NULL; \
}
#define EXPECT_CHAR(ch) \
CHECK_EOF(); \
EXPECT_CHAR_NO_CHECK(ch);
#define ADVANCE_TOKEN(tok, toklen) \
do { \
const char *tok_start = buf; \
static const char ALIGNED(16) ranges2[16] = "\000\040\177\177"; \
int found2; \
buf = findchar_fast(buf, buf_end, ranges2, 4, &found2); \
if (!found2) { \
CHECK_EOF(); \
} \
while (1) { \
if (*buf == ' ') { \
break; \
} else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { \
if ((unsigned char)*buf < '\040' || *buf == '\177') { \
*ret = -1; \
return NULL; \
} \
} \
++buf; \
CHECK_EOF(); \
} \
tok = tok_start; \
toklen = buf - tok_start; \
} while (0)
static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
"\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
"\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
"\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size, int *found)
{
*found = 0;
#if __SSE4_2__
if (likely(buf_end - buf >= 16)) {
__m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges);
size_t left = (buf_end - buf) & ~15;
do {
__m128i b16 = _mm_loadu_si128((const __m128i *)buf);
int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
if (unlikely(r != 16)) {
buf += r;
*found = 1;
break;
}
buf += 16;
left -= 16;
} while (likely(left != 0));
}
#else
/* suppress unused parameter warning */
(void)buf_end;
(void)ranges;
(void)ranges_size;
#endif
return buf;
}
static const char *get_token_to_eol(const char *buf, const char *buf_end, const char **token, size_t *token_len, int *ret)
{
const char *token_start = buf;
#ifdef __SSE4_2__
static const char ALIGNED(16) ranges1[16] = "\0\010" /* allow HT */
"\012\037" /* allow SP and up to but not including DEL */
"\177\177"; /* allow chars w. MSB set */
int found;
buf = findchar_fast(buf, buf_end, ranges1, 6, &found);
if (found)
goto FOUND_CTL;
#else
/* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
while (likely(buf_end - buf >= 8)) {
#define DOIT() \
do { \
if (unlikely(!IS_PRINTABLE_ASCII(*buf))) \
goto NonPrintable; \
++buf; \
} while (0)
DOIT();
DOIT();
DOIT();
DOIT();
DOIT();
DOIT();
DOIT();
DOIT();
#undef DOIT
continue;
NonPrintable:
if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
goto FOUND_CTL;
}
++buf;
}
#endif
for (;; ++buf) {
CHECK_EOF();
if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {
if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
goto FOUND_CTL;
}
}
}
FOUND_CTL:
if (likely(*buf == '\015')) {
++buf;
EXPECT_CHAR('\012');
*token_len = buf - 2 - token_start;
} else if (*buf == '\012') {
*token_len = buf - token_start;
++buf;
} else {
*ret = -1;
return NULL;
}
*token = token_start;
return buf;
}
static const char *is_complete(const char *buf, const char *buf_end, size_t last_len, int *ret)
{
int ret_cnt = 0;
buf = last_len < 3 ? buf : buf + last_len - 3;
while (1) {
CHECK_EOF();
if (*buf == '\015') {
++buf;
CHECK_EOF();
EXPECT_CHAR('\012');
++ret_cnt;
} else if (*buf == '\012') {
++buf;
++ret_cnt;
} else {
++buf;
ret_cnt = 0;
}
if (ret_cnt == 2) {
return buf;
}
}
*ret = -2;
return NULL;
}
#define PARSE_INT(valp_, mul_) \
if (*buf < '0' || '9' < *buf) { \
buf++; \
*ret = -1; \
return NULL; \
} \
*(valp_) = (mul_) * (*buf++ - '0');
#define PARSE_INT_3(valp_) \
do { \
int res_ = 0; \
PARSE_INT(&res_, 100) \
*valp_ = res_; \
PARSE_INT(&res_, 10) \
*valp_ += res_; \
PARSE_INT(&res_, 1) \
*valp_ += res_; \
} while (0)
/* returned pointer is always within [buf, buf_end), or null */
static const char *parse_token(const char *buf, const char *buf_end, const char **token, size_t *token_len, char next_char,
int *ret)
{
/* We use pcmpestri to detect non-token characters. This instruction can take no more than eight character ranges (8*2*8=128
* bits that is the size of a SSE register). Due to this restriction, characters `|` and `~` are handled in the slow loop. */
static const char ALIGNED(16) ranges[] = "\x00 " /* control chars and up to SP */
"\"\"" /* 0x22 */
"()" /* 0x28,0x29 */
",," /* 0x2c */
"//" /* 0x2f */
":@" /* 0x3a-0x40 */
"[]" /* 0x5b-0x5d */
"{\xff"; /* 0x7b-0xff */
const char *buf_start = buf;
int found;
buf = findchar_fast(buf, buf_end, ranges, sizeof(ranges) - 1, &found);
if (!found) {
CHECK_EOF();
}
while (1) {
if (*buf == next_char) {
break;
} else if (!token_char_map[(unsigned char)*buf]) {
*ret = -1;
return NULL;
}
++buf;
CHECK_EOF();
}
*token = buf_start;
*token_len = buf - buf_start;
return buf;
}
/* returned pointer is always within [buf, buf_end), or null */
static const char *parse_http_version(const char *buf, const char *buf_end, int *minor_version, int *ret)
{
/* we want at least [HTTP/1.<two chars>] to try to parse */
if (buf_end - buf < 9) {
*ret = -2;
return NULL;
}
EXPECT_CHAR_NO_CHECK('H');
EXPECT_CHAR_NO_CHECK('T');
EXPECT_CHAR_NO_CHECK('T');
EXPECT_CHAR_NO_CHECK('P');
EXPECT_CHAR_NO_CHECK('/');
EXPECT_CHAR_NO_CHECK('1');
EXPECT_CHAR_NO_CHECK('.');
PARSE_INT(minor_version, 1);
return buf;
}
static const char *parse_headers(const char *buf, const char *buf_end, struct phr_header *headers, size_t *num_headers,
size_t max_headers, int *ret)
{
for (;; ++*num_headers) {
CHECK_EOF();
if (*buf == '\015') {
++buf;
EXPECT_CHAR('\012');
break;
} else if (*buf == '\012') {
++buf;
break;
}
if (*num_headers == max_headers) {
*ret = -1;
return NULL;
}
if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) {
/* parsing name, but do not discard SP before colon, see
* http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
if ((buf = parse_token(buf, buf_end, &headers[*num_headers].name, &headers[*num_headers].name_len, ':', ret)) == NULL) {
return NULL;
}
if (headers[*num_headers].name_len == 0) {
*ret = -1;
return NULL;
}
++buf;
for (;; ++buf) {
CHECK_EOF();
if (!(*buf == ' ' || *buf == '\t')) {
break;
}
}
} else {
headers[*num_headers].name = NULL;
headers[*num_headers].name_len = 0;
}
const char *value;
size_t value_len;
if ((buf = get_token_to_eol(buf, buf_end, &value, &value_len, ret)) == NULL) {
return NULL;
}
/* remove trailing SPs and HTABs */
const char *value_end = value + value_len;
for (; value_end != value; --value_end) {
const char c = *(value_end - 1);
if (!(c == ' ' || c == '\t')) {
break;
}
}
headers[*num_headers].value = value;
headers[*num_headers].value_len = value_end - value;
}
return buf;
}
static const char *parse_request(const char *buf, const char *buf_end, const char **method, size_t *method_len, const char **path,
size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers,
size_t max_headers, int *ret)
{
/* skip first empty line (some clients add CRLF after POST content) */
CHECK_EOF();
if (*buf == '\015') {
++buf;
EXPECT_CHAR('\012');
} else if (*buf == '\012') {
++buf;
}
/* parse request line */
if ((buf = parse_token(buf, buf_end, method, method_len, ' ', ret)) == NULL) {
return NULL;
}
do {
++buf;
CHECK_EOF();
} while (*buf == ' ');
ADVANCE_TOKEN(*path, *path_len);
do {
++buf;
CHECK_EOF();
} while (*buf == ' ');
if (*method_len == 0 || *path_len == 0) {
*ret = -1;
return NULL;
}
if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
return NULL;
}
if (*buf == '\015') {
++buf;
EXPECT_CHAR('\012');
} else if (*buf == '\012') {
++buf;
} else {
*ret = -1;
return NULL;
}
return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
}
int phr_parse_request(const char *buf_start, size_t len, const char **method, size_t *method_len, const char **path,
size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len)
{
const char *buf = buf_start, *buf_end = buf_start + len;
size_t max_headers = *num_headers;
int r;
*method = NULL;
*method_len = 0;
*path = NULL;
*path_len = 0;
*minor_version = -1;
*num_headers = 0;
/* if last_len != 0, check if the request is complete (a fast countermeasure
againt slowloris */
if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
return r;
}
if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, minor_version, headers, num_headers, max_headers,
&r)) == NULL) {
return r;
}
return (int)(buf - buf_start);
}
static const char *parse_response(const char *buf, const char *buf_end, int *minor_version, int *status, const char **msg,
size_t *msg_len, struct phr_header *headers, size_t *num_headers, size_t max_headers, int *ret)
{
/* parse "HTTP/1.x" */
if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
return NULL;
}
/* skip space */
if (*buf != ' ') {
*ret = -1;
return NULL;
}
do {
++buf;
CHECK_EOF();
} while (*buf == ' ');
/* parse status code, we want at least [:digit:][:digit:][:digit:]<other char> to try to parse */
if (buf_end - buf < 4) {
*ret = -2;
return NULL;
}
PARSE_INT_3(status);
/* get message including preceding space */
if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
return NULL;
}
if (*msg_len == 0) {
/* ok */
} else if (**msg == ' ') {
/* Remove preceding space. Successful return from `get_token_to_eol` guarantees that we would hit something other than SP
* before running past the end of the given buffer. */
do {
++*msg;
--*msg_len;
} while (**msg == ' ');
} else {
/* garbage found after status code */
*ret = -1;
return NULL;
}
return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
}
int phr_parse_response(const char *buf_start, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len,
struct phr_header *headers, size_t *num_headers, size_t last_len)
{
const char *buf = buf_start, *buf_end = buf + len;
size_t max_headers = *num_headers;
int r;
*minor_version = -1;
*status = 0;
*msg = NULL;
*msg_len = 0;
*num_headers = 0;
/* if last_len != 0, check if the response is complete (a fast countermeasure
against slowloris */
if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
return r;
}
if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) == NULL) {
return r;
}
return (int)(buf - buf_start);
}
int phr_parse_headers(const char *buf_start, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len)
{
const char *buf = buf_start, *buf_end = buf + len;
size_t max_headers = *num_headers;
int r;
*num_headers = 0;
/* if last_len != 0, check if the response is complete (a fast countermeasure
against slowloris */
if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
return r;
}
if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) {
return r;
}
return (int)(buf - buf_start);
}
enum {
CHUNKED_IN_CHUNK_SIZE,
CHUNKED_IN_CHUNK_EXT,
CHUNKED_IN_CHUNK_DATA,
CHUNKED_IN_CHUNK_CRLF,
CHUNKED_IN_TRAILERS_LINE_HEAD,
CHUNKED_IN_TRAILERS_LINE_MIDDLE
};
static int decode_hex(int ch)
{
if ('0' <= ch && ch <= '9') {
return ch - '0';
} else if ('A' <= ch && ch <= 'F') {
return ch - 'A' + 0xa;
} else if ('a' <= ch && ch <= 'f') {
return ch - 'a' + 0xa;
} else {
return -1;
}
}
ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *_bufsz)
{
size_t dst = 0, src = 0, bufsz = *_bufsz;
ssize_t ret = -2; /* incomplete */
while (1) {
switch (decoder->_state) {
case CHUNKED_IN_CHUNK_SIZE:
for (;; ++src) {
int v;
if (src == bufsz)
goto Exit;
if ((v = decode_hex(buf[src])) == -1) {
if (decoder->_hex_count == 0) {
ret = -1;
goto Exit;
}
break;
}
if (decoder->_hex_count == sizeof(size_t) * 2) {
ret = -1;
goto Exit;
}
decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v;
++decoder->_hex_count;
}
decoder->_hex_count = 0;
decoder->_state = CHUNKED_IN_CHUNK_EXT;
/* fallthru */
case CHUNKED_IN_CHUNK_EXT:
/* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */
for (;; ++src) {
if (src == bufsz)
goto Exit;
if (buf[src] == '\012')
break;
}
++src;
if (decoder->bytes_left_in_chunk == 0) {
if (decoder->consume_trailer) {
decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
break;
} else {
goto Complete;
}
}
decoder->_state = CHUNKED_IN_CHUNK_DATA;
/* fallthru */
case CHUNKED_IN_CHUNK_DATA: {
size_t avail = bufsz - src;
if (avail < decoder->bytes_left_in_chunk) {
if (dst != src)
memmove(buf + dst, buf + src, avail);
src += avail;
dst += avail;
decoder->bytes_left_in_chunk -= avail;
goto Exit;
}
if (dst != src)
memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk);
src += decoder->bytes_left_in_chunk;
dst += decoder->bytes_left_in_chunk;
decoder->bytes_left_in_chunk = 0;
decoder->_state = CHUNKED_IN_CHUNK_CRLF;
}
/* fallthru */
case CHUNKED_IN_CHUNK_CRLF:
for (;; ++src) {
if (src == bufsz)
goto Exit;
if (buf[src] != '\015')
break;
}
if (buf[src] != '\012') {
ret = -1;
goto Exit;
}
++src;
decoder->_state = CHUNKED_IN_CHUNK_SIZE;
break;
case CHUNKED_IN_TRAILERS_LINE_HEAD:
for (;; ++src) {
if (src == bufsz)
goto Exit;
if (buf[src] != '\015')
break;
}
if (buf[src++] == '\012')
goto Complete;
decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE;
/* fallthru */
case CHUNKED_IN_TRAILERS_LINE_MIDDLE:
for (;; ++src) {
if (src == bufsz)
goto Exit;
if (buf[src] == '\012')
break;
}
++src;
decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
break;
default:
assert(!"decoder is corrupt");
}
}
Complete:
ret = bufsz - src;
Exit:
if (dst != src)
memmove(buf + dst, buf + src, bufsz - src);
*_bufsz = dst;
return ret;
}
int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder)
{
return decoder->_state == CHUNKED_IN_CHUNK_DATA;
}
#undef CHECK_EOF
#undef EXPECT_CHAR
#undef ADVANCE_TOKEN

View File

@ -19,9 +19,9 @@
#include <stddef.h> #include <stddef.h>
#include <string.h> #include <string.h>
static const char charset[] = const static char charset[] =
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
static const size_t charset_len = sizeof(charset) - 1; const static size_t charset_len = sizeof(charset) - 1;
// Length of randomly generated keys // Length of randomly generated keys
#define RANDOM_KEY_LENGTH_SHORT 4 #define RANDOM_KEY_LENGTH_SHORT 4

View File

@ -2,46 +2,7 @@
#include <stdint.h> #include <stdint.h>
#include <stdlib.h> #include <stdlib.h>
#include "trie.h" #include "trie_node.h"
/**
* Represents a node of the binary tree contained within each non-leaf
* TrieNode.
*/
typedef struct tinode {
struct tinode *left;
struct tinode *right;
struct tnode *next;
char key;
} TrieInnerNode;
/**
* Represents a node inside a Trie. A node can be in one of three states:
* - Internal node: a node that's part of a path to a leaf node. This node will
* always have a size greater than one, and an initialized root.
* - Leaf: a node solely used to represent a string ending there. Its size is 0,
* its ptr is unitialized and represents is true.
* - Full leaf: a leaf node that contains a string. This occurs when a string is
* added whose path is not fully in the tree yet, causing its remaining suffix
* to be stored as a single node. Its size will be zero, represents its true,
* and its string pointer is initialized.
*/
typedef struct tnode {
Entry *entry;
TrieInnerNode *tree;
uint8_t tree_size;
// Skips are at most TRIE_MAX_SKIP_SIZE characters, and are stored in the
// nodes
char string[TRIE_MAX_SKIP_SIZE];
uint8_t string_len;
bool represents;
} TrieNode;
// Required for recursively freeing tree structure
void tnode_free(TrieNode *node);
/** /**
* Allocate and initialize a new TrieInnerNode representing a given * Allocate and initialize a new TrieInnerNode representing a given

View File

@ -0,0 +1,53 @@
#include <stdint.h>
#include "trie.h"
/**
* Represents a node of the binary tree contained within each non-leaf
* TrieNode.
*/
typedef struct tinode {
struct tinode *left;
struct tinode *right;
struct tnode *next;
char key;
} TrieInnerNode;
/**
* Represents a node inside a Trie. A node can be in one of three states:
* - Internal node: a node that's part of a path to a leaf node. This node will
* always have a size greater than one, and an initialized root.
* - Leaf: a node solely used to represent a string ending there. Its size is 0,
* its ptr is unitialized and represents is true.
* - Full leaf: a leaf node that contains a string. This occurs when a string is
* added whose path is not fully in the tree yet, causing its remaining suffix
* to be stored as a single node. Its size will be zero, represents its true,
* and its string pointer is initialized.
*/
typedef struct tnode {
Entry *entry;
TrieInnerNode *tree;
uint8_t tree_size;
// Skips are at most TRIE_MAX_SKIP_SIZE characters, and are stored in the
// nodes
char string[TRIE_MAX_SKIP_SIZE];
uint8_t string_len;
bool represents;
} TrieNode;
TrieInnerNode *tinode_init(char c);
TrieNode *tnode_init();
void tinode_free_cascade(TrieInnerNode *node);
void tnode_free(TrieNode *node);
TrieNode **tnode_search(TrieNode *node, const char c, bool create);
void tinode_remove(TrieInnerNode *node, const char c);
void tnode_remove(TrieNode *node, const char c);