Some refactoring

develop
Jef Roosens 2020-12-02 14:26:01 +01:00
parent 1052dd6244
commit a13536d0c7
1 changed files with 44 additions and 25 deletions

View File

@ -308,67 +308,86 @@ char *safe_strdup(char *p_str) {
* @return size of the decoded rune
*/
size_t utf8decode(const char *p_char, Rune *p_rune, size_t p_char_len) {
size_t decoded_len, type;
Rune rune_decoded;
size_t decoded_len, type, i;
Rune decoded_rune;
*p_rune = UTF_INVALID;
if (!p_char_len) // chr_len is 0, so just return 0
if (!p_char_len) // p_char_len is 0, so just return 0
return 0;
rune_decoded = utf8decodebyte(p_char[0], &decoded_len);
decoded_rune = utf8decodebyte(p_char[0], &decoded_len);
if (!BETWEEN(decoded_len, 1, UTF_SIZE))
return 1;
size_t i, j;
for (i = 1, j = 1; i < p_char_len && j < decoded_len; ++i, ++j) {
rune_decoded = (rune_decoded << 6) | utf8decodebyte(p_char[i], &type);
for (i = 1; i < p_char_len && i < decoded_len; ++i) {
decoded_rune = (decoded_rune << 6) | utf8decodebyte(p_char[i], &type);
if (type != 0)
return j;
return i;
}
if (j < decoded_len)
if (i < decoded_len)
return 0;
*p_rune = rune_decoded;
*p_rune = decoded_rune;
utf8validate(p_rune, decoded_len);
return decoded_len;
}
Rune utf8decodebyte(char p_char, size_t *i) {
for (*i = 0; *i < LEN(utfmask); ++(*i)) {
if (((uchar)p_char & utfmask[*i]) == utfbyte[*i])
return (uchar)p_char & ~utfmask[*i];
/**
* Decode a single byte to UTF-8
*
* @param p_char char to decode
* @param p_i counter used in internal for loop
* @return decoded rune
*/
Rune utf8decodebyte(char p_char, size_t *p_i) {
for (*p_i = 0; *p_i < LEN(utfmask); ++(*p_i)) {
if (((uchar)p_char & utfmask[*p_i]) == utfbyte[*p_i])
return (uchar)p_char & ~utfmask[*p_i];
}
return 0;
}
size_t utf8encode(Rune u, char *c) {
/**
* Encode a UTF-8 rune
*
* @param p_rune rune to encode
* @param p_char char array to encode to
* @return
*/
size_t utf8encode(Rune p_rune, char *p_char) {
size_t len, i;
len = utf8validate(&u, 0);
len = utf8validate(&p_rune, 0);
if (len > UTF_SIZE)
return 0;
for (i = len - 1; i != 0; --i) {
c[i] = utf8encodebyte(u, 0);
u >>= 6;
p_char[i] = utf8encodebyte(p_rune, 0);
p_rune >>= 6;
}
c[0] = utf8encodebyte(u, len);
p_char[0] = utf8encodebyte(p_rune, len);
return len;
}
char utf8encodebyte(Rune u, size_t i) { return utfbyte[i] | (u & ~utfmask[i]); }
size_t utf8validate(Rune *u, size_t i) {
if (!BETWEEN(*u, utfmin[i], utfmax[i]) || BETWEEN(*u, 0xD800, 0xDFFF))
*u = UTF_INVALID;
for (i = 1; *u > utfmax[i]; ++i)
;
/**
* Check if a given rune is a valid UTF-8 rune
*
* @param p_rune rune to validate
* @param i
*/
size_t utf8validate(Rune *p_rune, size_t i) {
if (!BETWEEN(*p_rune, utfmin[i], utfmax[i]) || BETWEEN(*p_rune, 0xD800, 0xDFFF))
*p_rune = UTF_INVALID;
// Count up i until you find a utfmax entry that's greater than *p_rune
for (i = 1; *p_rune > utfmax[i]; ++i);
return i;
}