Dillo v3.1.1-119-g140d9ebd
Loading...
Searching...
No Matches
hyphenator.hh
Go to the documentation of this file.
1#ifndef __DW_HYPHENATOR_HH__
2#define __DW_HYPHENATOR_HH__
3
4#include "../lout/object.hh"
5#include "../lout/container.hh"
6#include "../dw/core.hh"
7
8namespace dw {
9
10class Trie {
11 public:
12 struct TrieNode {
13 unsigned char c;
14 uint16_t next;
15 const char *data;
16 };
17
18 private:
20 int size;
23
24 public:
25 Trie (TrieNode *array = NULL, int size = 0, bool freeArray = false,
27 ~Trie ();
28
29 static const int root = 0;
30 inline bool validState (int state) { return state >= 0 && state < size; };
31 inline const char *getData (unsigned char c, int *state)
32 {
33 if (!validState (*state))
34 return NULL;
35
36 TrieNode *tn = array + *state + c;
37
38 if (tn->c == c) {
39 *state = tn->next > 0 ? tn->next : -1;
40 return tn->data;
41 } else {
42 *state = -1;
43 return NULL;
44 }
45 };
46 void save (FILE *file);
47 int load (FILE *file);
48};
49
51 private:
52 struct StackEntry {
53 unsigned char c;
54 int count;
55 int next[256];
56 const char *data[256];
57 const char *data1;
58 };
59
60 struct DataEntry {
61 unsigned char *key;
62 const char *value;
63 };
64
65 int pack;
67 lout::misc::SimpleVector <Trie::TrieNode> *tree;
68 lout::misc::SimpleVector <DataEntry> *dataList;
69 lout::misc::SimpleVector <StackEntry> *stateStack;
71
72 static int keyCompare (const void *p1, const void *p2);
73 void stateStackPush (unsigned char c);
74 int stateStackPop ();
75 int insertState (StackEntry *state, bool root);
76 void insertSorted (unsigned char *key, const char *value);
77
78 public:
79 TrieBuilder (int pack);
80 ~TrieBuilder ();
81
82 void insert (const char *key, const char *value);
84};
85
87{
91
95
96 void insertPattern (TrieBuilder *trieBuilder, char *s);
97 void insertException (char *s);
98
99 void hyphenateSingleWord(core::Platform *platform, char *wordLc, int offset,
100 lout::misc::SimpleVector <int> *breakPos);
101 bool isCharPartOfActualWord (char *s);
102
103public:
104 Hyphenator (const char *patFile, const char *excFile, int pack = 256);
105 ~Hyphenator();
106
107 static Hyphenator *getHyphenator (const char *language);
108 static bool isHyphenationCandidate (const char *word);
109 int *hyphenateWord(core::Platform *platform, const char *word, int *numBreaks);
110 void saveTrie (FILE *fp) { trie->save (fp); };
111};
112
113} // namespace dw
114
115#endif // __DW_HYPHENATOR_HH__
void saveTrie(FILE *fp)
void insertException(char *s)
bool isCharPartOfActualWord(char *s)
Test whether the character on which "s" points (UTF-8) is an actual part of the word.
void hyphenateSingleWord(core::Platform *platform, char *wordLc, int offset, lout::misc::SimpleVector< int > *breakPos)
Hyphenate a single word, which only consists of lowercase characters.
static lout::container::typed::HashTable< lout::object::String, Hyphenator > * hyphenators
Definition hyphenator.hh:89
static Hyphenator * getHyphenator(const char *language)
int * hyphenateWord(core::Platform *platform, const char *word, int *numBreaks)
Given a word, returns a list of the possible hyphenation points.
static bool isHyphenationCandidate(const char *word)
Simple test to avoid much costs.
lout::container::typed::HashTable< lout::object::ConstString, lout::container::typed::Vector< lout::object::Integer > > * exceptions
Definition hyphenator.hh:94
void insertPattern(TrieBuilder *trieBuilder, char *s)
void insertSorted(unsigned char *key, const char *value)
lout::misc::ZoneAllocator * dataZone
Definition hyphenator.hh:70
static int keyCompare(const void *p1, const void *p2)
int insertState(StackEntry *state, bool root)
lout::misc::SimpleVector< Trie::TrieNode > * tree
Definition hyphenator.hh:67
void stateStackPush(unsigned char c)
lout::misc::SimpleVector< StackEntry > * stateStack
Definition hyphenator.hh:69
lout::misc::SimpleVector< DataEntry > * dataList
Definition hyphenator.hh:68
Trie * createTrie()
void insert(const char *key, const char *value)
static Trie::TrieNode trieNodeNull
Definition hyphenator.hh:66
bool freeArray
Definition hyphenator.hh:21
void save(FILE *file)
TrieNode * array
Definition hyphenator.hh:19
bool validState(int state)
Definition hyphenator.hh:30
lout::misc::ZoneAllocator * dataZone
Definition hyphenator.hh:22
const char * getData(unsigned char c, int *state)
Definition hyphenator.hh:31
int load(FILE *file)
static const int root
Definition hyphenator.hh:29
An interface to encapsulate some platform dependencies.
Definition platform.hh:17
Typed version of container::untyped::HashTable.
Definition container.hh:536
Typed version of container::untyped::Vector.
Definition container.hh:447
A simple allocator optimized to handle many small chunks of memory.
Definition misc.hh:631
An object::Object wrapper for constant strings (char*).
Definition object.hh:163
An object::Object wrapper for int's.
Definition object.hh:127
This is the base class for many other classes, which defines very common virtual methods.
Definition object.hh:25
An object::Object wrapper for strings (char*).
Definition object.hh:186
static FltkPlatform * platform
Dw is in this namespace, or sub namespaces of this one.
const char * data[256]
Definition hyphenator.hh:56
unsigned char c
Definition hyphenator.hh:13
const char * data
Definition hyphenator.hh:15