Some refactoring

develop
Jef Roosens 2020-12-02 14:26:01 +01:00
parent 1052dd6244
commit a13536d0c7
1 changed files with 44 additions and 25 deletions

View File

@ -308,67 +308,86 @@ char *safe_strdup(char *p_str) {
* @return size of the decoded rune * @return size of the decoded rune
*/ */
size_t utf8decode(const char *p_char, Rune *p_rune, size_t p_char_len) { size_t utf8decode(const char *p_char, Rune *p_rune, size_t p_char_len) {
size_t decoded_len, type; size_t decoded_len, type, i;
Rune rune_decoded; Rune decoded_rune;
*p_rune = UTF_INVALID; *p_rune = UTF_INVALID;
if (!p_char_len) // chr_len is 0, so just return 0 if (!p_char_len) // p_char_len is 0, so just return 0
return 0; return 0;
rune_decoded = utf8decodebyte(p_char[0], &decoded_len); decoded_rune = utf8decodebyte(p_char[0], &decoded_len);
if (!BETWEEN(decoded_len, 1, UTF_SIZE)) if (!BETWEEN(decoded_len, 1, UTF_SIZE))
return 1; return 1;
size_t i, j; for (i = 1; i < p_char_len && i < decoded_len; ++i) {
for (i = 1, j = 1; i < p_char_len && j < decoded_len; ++i, ++j) { decoded_rune = (decoded_rune << 6) | utf8decodebyte(p_char[i], &type);
rune_decoded = (rune_decoded << 6) | utf8decodebyte(p_char[i], &type);
if (type != 0) if (type != 0)
return j; return i;
} }
if (j < decoded_len) if (i < decoded_len)
return 0; return 0;
*p_rune = rune_decoded; *p_rune = decoded_rune;
utf8validate(p_rune, decoded_len); utf8validate(p_rune, decoded_len);
return decoded_len; return decoded_len;
} }
Rune utf8decodebyte(char p_char, size_t *i) { /**
* Decode a single byte to UTF-8
for (*i = 0; *i < LEN(utfmask); ++(*i)) { *
if (((uchar)p_char & utfmask[*i]) == utfbyte[*i]) * @param p_char char to decode
return (uchar)p_char & ~utfmask[*i]; * @param p_i counter used in internal for loop
* @return decoded rune
*/
Rune utf8decodebyte(char p_char, size_t *p_i) {
for (*p_i = 0; *p_i < LEN(utfmask); ++(*p_i)) {
if (((uchar)p_char & utfmask[*p_i]) == utfbyte[*p_i])
return (uchar)p_char & ~utfmask[*p_i];
} }
return 0; return 0;
} }
size_t utf8encode(Rune u, char *c) { /**
* Encode a UTF-8 rune
*
* @param p_rune rune to encode
* @param p_char char array to encode to
* @return
*/
size_t utf8encode(Rune p_rune, char *p_char) {
size_t len, i; size_t len, i;
len = utf8validate(&u, 0); len = utf8validate(&p_rune, 0);
if (len > UTF_SIZE) if (len > UTF_SIZE)
return 0; return 0;
for (i = len - 1; i != 0; --i) { for (i = len - 1; i != 0; --i) {
c[i] = utf8encodebyte(u, 0); p_char[i] = utf8encodebyte(p_rune, 0);
u >>= 6; p_rune >>= 6;
} }
c[0] = utf8encodebyte(u, len); p_char[0] = utf8encodebyte(p_rune, len);
return len; return len;
} }
char utf8encodebyte(Rune u, size_t i) { return utfbyte[i] | (u & ~utfmask[i]); } char utf8encodebyte(Rune u, size_t i) { return utfbyte[i] | (u & ~utfmask[i]); }
size_t utf8validate(Rune *u, size_t i) { /**
if (!BETWEEN(*u, utfmin[i], utfmax[i]) || BETWEEN(*u, 0xD800, 0xDFFF)) * Check if a given rune is a valid UTF-8 rune
*u = UTF_INVALID; *
for (i = 1; *u > utfmax[i]; ++i) * @param p_rune rune to validate
; * @param i
*/
size_t utf8validate(Rune *p_rune, size_t i) {
if (!BETWEEN(*p_rune, utfmin[i], utfmax[i]) || BETWEEN(*p_rune, 0xD800, 0xDFFF))
*p_rune = UTF_INVALID;
// Count up i until you find a utfmax entry that's greater than *p_rune
for (i = 1; *p_rune > utfmax[i]; ++i);
return i; return i;
} }