Split off utf8 encoding
parent
25af8ba5fc
commit
010fb66cb6
|
@ -8,20 +8,18 @@ endif()
|
|||
set(SYSTEM_TYPE x64)
|
||||
|
||||
|
||||
# =====COMPILER=====
|
||||
# =====COMMON SETTINGS=====
|
||||
set(CMAKE_C_COMPILER "clang-11")
|
||||
set(CMAKE_C_STANDARD 11)
|
||||
|
||||
|
||||
project(stj VERSION 0.1)
|
||||
|
||||
|
||||
# =====COMPILE FLAGS=====
|
||||
add_definitions(-DVERSION="${CMAKE_PROJECT_VERSION}" -D_XOPEN_SOURCE=600)
|
||||
|
||||
# Debug flags
|
||||
# -g flag gets auto-added by CMake for the debug build
|
||||
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -g -Weverything -O0 -fsanitize=address -fno-omit-frame-pointer")
|
||||
|
||||
# =====BUILD TYPES=====
|
||||
# Debug
|
||||
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -g -Wall -O0 -fsanitize=address -fno-omit-frame-pointer")
|
||||
set(CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS_DEBUG} -fno-omit-frame-pointer -fsanitize=address -pedantic")
|
||||
|
||||
# Arch doesn't use static libraries
|
||||
|
@ -30,13 +28,9 @@ if(NOT EXISTS "/etc/arch-release")
|
|||
set(CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS_DEBUG} -static-libasan")
|
||||
endif()
|
||||
|
||||
# Release flags
|
||||
# Release
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O3 -Werror -pedantic-errors")
|
||||
|
||||
# MinSizeRel flags
|
||||
# This one's just here for fun
|
||||
set(CMAKE_C_FLAGS_MINSIZEREL "${CMAKE_C_FLAGS_MINSIZEREL} -Oz -Werror -pedantic-errors")
|
||||
|
||||
|
||||
# =====EXECUTABLE=====
|
||||
file(GLOB st_SRC "st/*.c" "st/*.h")
|
||||
|
|
105
src/st/st.c
105
src/st/st.c
|
@ -17,6 +17,7 @@
|
|||
#include <unistd.h>
|
||||
#include <wchar.h>
|
||||
|
||||
#include "utf8.h"
|
||||
#include "../win.h"
|
||||
#include "macros.h"
|
||||
#include "st.h"
|
||||
|
@ -30,8 +31,6 @@
|
|||
#endif
|
||||
|
||||
/* Arbitrary sizes */
|
||||
#define UTF_INVALID 0xFFFD
|
||||
#define UTF_SIZE 4
|
||||
#define ESC_BUF_SIZ (128 * UTF_SIZE)
|
||||
#define ESC_ARG_SIZ 16
|
||||
#define STR_BUF_SIZ ESC_BUF_SIZ
|
||||
|
@ -205,10 +204,6 @@ static void selnormalize(void);
|
|||
static void selscroll(int, int);
|
||||
static void selsnap(int *, int *, int);
|
||||
|
||||
static size_t utf8decode(const char *, Rune *, size_t);
|
||||
static Rune utf8decodebyte(char, size_t *);
|
||||
static char utf8encodebyte(Rune, size_t);
|
||||
static size_t utf8validate(Rune *, size_t);
|
||||
|
||||
static char *base64dec(const char *);
|
||||
static char base64dec_getc(const char **);
|
||||
|
@ -224,11 +219,6 @@ static int iofd = 1;
|
|||
static int cmdfd;
|
||||
static pid_t pid;
|
||||
|
||||
static uchar utfbyte[UTF_SIZE + 1] = {0x80, 0, 0xC0, 0xE0, 0xF0};
|
||||
static uchar utfmask[UTF_SIZE + 1] = {0xC0, 0x80, 0xE0, 0xF0, 0xF8};
|
||||
static Rune utfmin[UTF_SIZE + 1] = {0, 0, 0x80, 0x800, 0x10000};
|
||||
static Rune utfmax[UTF_SIZE + 1] = {0x10FFFF, 0x7F, 0x7FF, 0xFFFF, 0x10FFFF};
|
||||
|
||||
/**
|
||||
* Same as write, but ensures that all bytes are written to the descriptor
|
||||
* (write sometimes isn't able to write all bytes)
|
||||
|
@ -299,99 +289,6 @@ char *safe_strdup(char *p_str) {
|
|||
return p_str;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode a given char array into a utf8 Rune
|
||||
*
|
||||
* @param p_char char array to decode
|
||||
* @param p_rune rune pointer to decode to
|
||||
* @param p_char_len length of the char array
|
||||
* @return size of the decoded rune
|
||||
*/
|
||||
size_t utf8decode(const char *p_char, Rune *p_rune, size_t p_char_len) {
|
||||
size_t decoded_len, type, i;
|
||||
Rune decoded_rune;
|
||||
|
||||
*p_rune = UTF_INVALID;
|
||||
if (!p_char_len) // p_char_len is 0, so just return 0
|
||||
return 0;
|
||||
|
||||
decoded_rune = utf8decodebyte(p_char[0], &decoded_len);
|
||||
if (!BETWEEN(decoded_len, 1, UTF_SIZE))
|
||||
return 1;
|
||||
|
||||
for (i = 1; i < p_char_len && i < decoded_len; ++i) {
|
||||
decoded_rune = (decoded_rune << 6) | utf8decodebyte(p_char[i], &type);
|
||||
|
||||
if (type != 0)
|
||||
return i;
|
||||
}
|
||||
|
||||
if (i < decoded_len)
|
||||
return 0;
|
||||
|
||||
*p_rune = decoded_rune;
|
||||
utf8validate(p_rune, decoded_len);
|
||||
|
||||
return decoded_len;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode a single byte to UTF-8
|
||||
*
|
||||
* @param p_char char to decode
|
||||
* @param p_i counter used in internal for loop
|
||||
* @return decoded rune
|
||||
*/
|
||||
Rune utf8decodebyte(char p_char, size_t *p_i) {
|
||||
for (*p_i = 0; *p_i < LEN(utfmask); ++(*p_i)) {
|
||||
if (((uchar)p_char & utfmask[*p_i]) == utfbyte[*p_i])
|
||||
return (uchar)p_char & ~utfmask[*p_i];
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode a UTF-8 rune
|
||||
*
|
||||
* @param p_rune rune to encode
|
||||
* @param p_char char array to encode to
|
||||
* @return
|
||||
*/
|
||||
size_t utf8encode(Rune p_rune, char *p_char) {
|
||||
size_t len, i;
|
||||
|
||||
len = utf8validate(&p_rune, 0);
|
||||
if (len > UTF_SIZE)
|
||||
return 0;
|
||||
|
||||
for (i = len - 1; i != 0; --i) {
|
||||
p_char[i] = utf8encodebyte(p_rune, 0);
|
||||
p_rune >>= 6;
|
||||
}
|
||||
p_char[0] = utf8encodebyte(p_rune, len);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
char utf8encodebyte(Rune u, size_t i) { return utfbyte[i] | (u & ~utfmask[i]); }
|
||||
|
||||
/**
|
||||
* Check if a given rune is a valid UTF-8 rune
|
||||
*
|
||||
* @param p_rune rune to validate
|
||||
* @param i
|
||||
*/
|
||||
size_t utf8validate(Rune *p_rune, size_t i) {
|
||||
if (!BETWEEN(*p_rune, utfmin[i], utfmax[i]) || BETWEEN(*p_rune, 0xD800, 0xDFFF))
|
||||
*p_rune = UTF_INVALID;
|
||||
|
||||
// Count up i until you find a utfmax entry that's greater than *p_rune
|
||||
for (i = 1; *p_rune > utfmax[i]; ++i);
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
static const char base64_digits[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
|
|
@ -7,6 +7,9 @@
|
|||
#include <stdint.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "types.h"
|
||||
#include "utf8.h"
|
||||
|
||||
enum glyph_attribute {
|
||||
ATTR_NULL = 0,
|
||||
ATTR_BOLD = 1 << 0,
|
||||
|
@ -29,12 +32,6 @@ enum selection_type { SEL_REGULAR = 1, SEL_RECTANGULAR = 2 };
|
|||
|
||||
enum selection_snap { SNAP_WORD = 1, SNAP_LINE = 2 };
|
||||
|
||||
typedef unsigned char uchar;
|
||||
typedef unsigned int uint;
|
||||
typedef unsigned long ulong;
|
||||
typedef unsigned short ushort;
|
||||
|
||||
typedef uint_least32_t Rune;
|
||||
|
||||
#define Glyph Glyph_
|
||||
typedef struct {
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
#ifndef TYPES_H
|
||||
#define TYPES_H
|
||||
|
||||
typedef unsigned char uchar;
|
||||
typedef unsigned int uint;
|
||||
typedef unsigned long ulong;
|
||||
typedef unsigned short ushort;
|
||||
|
||||
#endif
|
|
@ -0,0 +1,101 @@
|
|||
#include "utf8.h"
|
||||
#include "macros.h"
|
||||
#include "types.h"
|
||||
|
||||
static uchar utfbyte[UTF_SIZE + 1] = {0x80, 0, 0xC0, 0xE0, 0xF0};
|
||||
static uchar utfmask[UTF_SIZE + 1] = {0xC0, 0x80, 0xE0, 0xF0, 0xF8};
|
||||
static Rune utfmin[UTF_SIZE + 1] = {0, 0, 0x80, 0x800, 0x10000};
|
||||
static Rune utfmax[UTF_SIZE + 1] = {0x10FFFF, 0x7F, 0x7FF, 0xFFFF, 0x10FFFF};
|
||||
|
||||
/**
|
||||
* Decode a given char array into a utf8 Rune
|
||||
*
|
||||
* @param p_char char array to decode
|
||||
* @param p_rune rune pointer to decode to
|
||||
* @param p_char_len length of the char array
|
||||
* @return size of the decoded rune
|
||||
*/
|
||||
size_t utf8decode(const char *p_char, Rune *p_rune, size_t p_char_len) {
|
||||
size_t decoded_len, type, i;
|
||||
Rune decoded_rune;
|
||||
|
||||
*p_rune = UTF_INVALID;
|
||||
if (!p_char_len) // p_char_len is 0, so just return 0
|
||||
return 0;
|
||||
|
||||
decoded_rune = utf8decodebyte(p_char[0], &decoded_len);
|
||||
if (!BETWEEN(decoded_len, 1, UTF_SIZE))
|
||||
return 1;
|
||||
|
||||
for (i = 1; i < p_char_len && i < decoded_len; ++i) {
|
||||
decoded_rune = (decoded_rune << 6) | utf8decodebyte(p_char[i], &type);
|
||||
|
||||
if (type != 0)
|
||||
return i;
|
||||
}
|
||||
|
||||
if (i < decoded_len)
|
||||
return 0;
|
||||
|
||||
*p_rune = decoded_rune;
|
||||
utf8validate(p_rune, decoded_len);
|
||||
|
||||
return decoded_len;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode a single byte to UTF-8
|
||||
*
|
||||
* @param p_char char to decode
|
||||
* @param p_i counter used in internal for loop
|
||||
* @return decoded rune
|
||||
*/
|
||||
Rune utf8decodebyte(char p_char, size_t *p_i) {
|
||||
for (*p_i = 0; *p_i < LEN(utfmask); ++(*p_i)) {
|
||||
if (((uchar)p_char & utfmask[*p_i]) == utfbyte[*p_i])
|
||||
return (uchar)p_char & ~utfmask[*p_i];
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode a UTF-8 rune
|
||||
*
|
||||
* @param p_rune rune to encode
|
||||
* @param p_char char array to encode to
|
||||
* @return
|
||||
*/
|
||||
size_t utf8encode(Rune p_rune, char *p_char) {
|
||||
size_t len, i;
|
||||
|
||||
len = utf8validate(&p_rune, 0);
|
||||
if (len > UTF_SIZE)
|
||||
return 0;
|
||||
|
||||
for (i = len - 1; i != 0; --i) {
|
||||
p_char[i] = utf8encodebyte(p_rune, 0);
|
||||
p_rune >>= 6;
|
||||
}
|
||||
p_char[0] = utf8encodebyte(p_rune, len);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
char utf8encodebyte(Rune u, size_t i) { return utfbyte[i] | (u & ~utfmask[i]); }
|
||||
|
||||
/**
|
||||
* Check if a given rune is a valid UTF-8 rune
|
||||
*
|
||||
* @param p_rune rune to validate
|
||||
* @param i
|
||||
*/
|
||||
size_t utf8validate(Rune *p_rune, size_t i) {
|
||||
if (!BETWEEN(*p_rune, utfmin[i], utfmax[i]) || BETWEEN(*p_rune, 0xD800, 0xDFFF))
|
||||
*p_rune = UTF_INVALID;
|
||||
|
||||
// Count up i until you find a utfmax entry that's greater than *p_rune
|
||||
for (i = 1; *p_rune > utfmax[i]; ++i);
|
||||
|
||||
return i;
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
#ifndef UTF8_H
|
||||
#define UTF8_H
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define UTF_INVALID 0xFFFD
|
||||
#define UTF_SIZE 4
|
||||
|
||||
typedef uint_least32_t Rune;
|
||||
|
||||
size_t utf8decode(const char *, Rune *, size_t);
|
||||
Rune utf8decodebyte(char, size_t *);
|
||||
char utf8encodebyte(Rune, size_t);
|
||||
size_t utf8validate(Rune *, size_t);
|
||||
|
||||
#endif
|
Reference in New Issue