Dillo v3.1.1-46-g8a360e32
Loading...
Searching...
No Matches
utf8.cc
Go to the documentation of this file.
1/*
2 * File: utf8.c
3 *
4 * Copyright (C) 2009 Jorge Arellano Cid <jcid@dillo.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 3 of the License, or
9 * (at your option) any later version.
10 */
11
12#include <FL/fl_utf8.h>
13
14#include "../dlib/dlib.h" /* TRUE/FALSE */
15#include "utf8.hh"
16
17// C++ functions with C linkage ----------------------------------------------
18
23uint_t a_Utf8_end_of_char(const char *str, uint_t i)
24{
25 /* We can almost get what we want from utf8fwd(p+1,...)-1, but that
26 * does not work for the last character in a string, and the fn makes some
27 * assumptions that do not suit us.
28 * Here's something very simpleminded instead:
29 */
30 if (str && *str && (str[i] & 0x80)) {
31 int internal_bytes = (str[i] & 0x40) ? 0 : 1;
32
33 while (((str[i + 1] & 0xc0) == 0x80) && (++internal_bytes < 4))
34 i++;
35 }
36 return i;
37}
38
46uint_t a_Utf8_decode(const char* str, const char* end, int* len)
47{
48 return fl_utf8decode(str, end, len);
49}
50
54int a_Utf8_encode(unsigned int ucs, char *buf)
55{
56 return fl_utf8encode(ucs, buf);
57}
58
64int a_Utf8_test(const char* src, unsigned int srclen)
65{
66 return fl_utf8test(src, srclen);
67}
68
76bool_t a_Utf8_ideographic(const char *s, const char *end, int *len)
77{
78 bool_t ret = FALSE;
79
80 if ((uchar_t)*s >= 0xe2) {
81 /* Unicode char >= U+2000. */
82 unsigned unicode = a_Utf8_decode(s, end, len);
83
84 if (unicode >= 0x2e80 &&
85 ((unicode <= 0xa4cf) ||
86 (unicode >= 0xf900 && unicode <= 0xfaff) ||
87 (unicode >= 0xff00 && unicode <= 0xff9f))) {
88 ret = TRUE;
89 }
90 } else {
91 *len = 1 + (int)a_Utf8_end_of_char(s, 0);
92 }
93 return ret;
94}
95
97{
98 return ((unicode >= 0x0300 && unicode <= 0x036f) ||
99 (unicode >= 0x1dc0 && unicode <= 0x1dff) ||
100 (unicode >= 0x20d0 && unicode <= 0x20ff) ||
101 (unicode >= 0xfe20 && unicode <= 0xfe2f));
102}
103
104int a_Utf8_char_count(const char *str, int len)
105{
106 return fl_utf_nb_char((const uchar_t*)str, len);
107}
unsigned char uchar_t
Definition d_size.h:17
unsigned int uint_t
Definition d_size.h:20
unsigned char bool_t
Definition d_size.h:21
#define TRUE
Definition dlib.h:23
#define FALSE
Definition dlib.h:19
uint_t a_Utf8_decode(const char *str, const char *end, int *len)
Decode a single UTF-8-encoded character starting at p.
Definition utf8.cc:46
bool_t a_Utf8_ideographic(const char *s, const char *end, int *len)
Does s point to a UTF-8-encoded ideographic character?.
Definition utf8.cc:76
int a_Utf8_char_count(const char *str, int len)
Definition utf8.cc:104
int a_Utf8_test(const char *src, unsigned int srclen)
Examine first srclen bytes of src.
Definition utf8.cc:64
uint_t a_Utf8_end_of_char(const char *str, uint_t i)
Return index of the last byte of the UTF-8-encoded character that str + i points to or into.
Definition utf8.cc:23
int a_Utf8_encode(unsigned int ucs, char *buf)
Write UTF-8 encoding of ucs into buf and return number of bytes written.
Definition utf8.cc:54
bool_t a_Utf8_combining_char(int unicode)
Definition utf8.cc:96