Dillo v3.1.1-98-g318d1f14
|
#include "d_size.h"
Go to the source code of this file.
Functions | |
uint_t | a_Utf8_end_of_char (const char *str, uint_t i) |
Return index of the last byte of the UTF-8-encoded character that str + i points to or into. | |
uint_t | a_Utf8_decode (const char *, const char *end, int *len) |
Decode a single UTF-8-encoded character starting at p. | |
int | a_Utf8_encode (unsigned int ucs, char *buf) |
Write UTF-8 encoding of ucs into buf and return number of bytes written. | |
int | a_Utf8_test (const char *src, unsigned int srclen) |
Examine first srclen bytes of src. | |
bool_t | a_Utf8_ideographic (const char *s, const char *end, int *len) |
Does s point to a UTF-8-encoded ideographic character?. | |
bool_t | a_Utf8_combining_char (int unicode) |
int | a_Utf8_char_count (const char *str, int len) |
Variables | |
static const char | utf8_replacement_char [] = "\xEF\xBF\xBD" |
Unicode replacement character U+FFFD. | |
static const char | utf8_zero_width_space [] = "\xE2\x80\x8B" |
Unicode zero width space U+200B. | |
int a_Utf8_char_count | ( | const char * | str, |
int | len | ||
) |
Definition at line 104 of file utf8.cc.
Referenced by Keys::parseKey().
bool_t a_Utf8_combining_char | ( | int | unicode | ) |
Definition at line 96 of file utf8.cc.
Referenced by a_Misc_expand_tabs().
uint_t a_Utf8_decode | ( | const char * | str, |
const char * | end, | ||
int * | len | ||
) |
Decode a single UTF-8-encoded character starting at p.
The resulting Unicode value (in the range 0-0x10ffff) is returned, and len is set to the number of bytes in the UTF-8 encoding. Note that utf8decode(), if given non-UTF-8 data, will interpret it as ISO-8859-1 or CP1252 if possible.
Definition at line 46 of file utf8.cc.
Referenced by a_Misc_expand_tabs(), a_Utf8_ideographic(), Keys::getKeyCmd(), and Keys::parseKey().
int a_Utf8_encode | ( | unsigned int | ucs, |
char * | buf | ||
) |
Write UTF-8 encoding of ucs into buf and return number of bytes written.
Definition at line 54 of file utf8.cc.
Referenced by Html_parse_numeric_charref().
Return index of the last byte of the UTF-8-encoded character that str + i points to or into.
Definition at line 23 of file utf8.cc.
Referenced by a_Misc_get_content_type_from_data(), a_UIcmd_set_page_title(), and a_Utf8_ideographic().
bool_t a_Utf8_ideographic | ( | const char * | s, |
const char * | end, | ||
int * | len | ||
) |
Does s point to a UTF-8-encoded ideographic character?.
This is based on http://unicode.org/reports/tr14/#ID plus some guesses for what might make the most sense for Dillo. Surprisingly, they include Hangul Compatibility Jamo, but they're the experts, so I'll follow along.
Definition at line 76 of file utf8.cc.
References a_Utf8_decode(), a_Utf8_end_of_char(), FALSE, and TRUE.
Referenced by Html_process_word().
int a_Utf8_test | ( | const char * | src, |
unsigned int | srclen | ||
) |
Examine first srclen bytes of src.
Return 0 if not legal UTF-8, 1 if all ASCII, 2 if all below 0x800, 3 if all below 0x10000, and 4 otherwise.
Definition at line 64 of file utf8.cc.
Referenced by a_Misc_get_content_type_from_data().
|
static |
Unicode replacement character U+FFFD.
"used to replace an incoming character whose value is unknown or otherwise unrepresentable in Unicode"
Definition at line 16 of file utf8.hh.
Referenced by Decode_charset().
|
static |
Unicode zero width space U+200B.
Definition at line 19 of file utf8.hh.
Referenced by Html_process_word().