Dillo
hyphenator.hh
Go to the documentation of this file.
1 #ifndef __DW_HYPHENATOR_HH__
2 #define __DW_HYPHENATOR_HH__
3 
4 #include "../lout/object.hh"
5 #include "../lout/container.hh"
6 #include "../dw/core.hh"
7 
8 namespace dw {
9 
10 class Trie {
11  public:
12  struct TrieNode {
13  unsigned char c;
14  uint16_t next;
15  const char *data;
16  };
17 
18  private:
20  int size;
21  bool freeArray;
23 
24  public:
25  Trie (TrieNode *array = NULL, int size = 0, bool freeArray = false,
26  lout::misc::ZoneAllocator *dataZone = NULL);
27  ~Trie ();
28 
29  static const int root = 0;
30  inline bool validState (int state) { return state >= 0 && state < size; };
31  inline const char *getData (unsigned char c, int *state)
32  {
33  if (!validState (*state))
34  return NULL;
35 
36  TrieNode *tn = array + *state + c;
37 
38  if (tn->c == c) {
39  *state = tn->next > 0 ? tn->next : -1;
40  return tn->data;
41  } else {
42  *state = -1;
43  return NULL;
44  }
45  };
46  void save (FILE *file);
47  int load (FILE *file);
48 };
49 
50 class TrieBuilder {
51  private:
52  struct StackEntry {
53  unsigned char c;
54  int count;
55  int next[256];
56  const char *data[256];
57  const char *data1;
58  };
59 
60  struct DataEntry {
61  unsigned char *key;
62  const char *value;
63  };
64 
65  int pack;
71 
72  static int keyCompare (const void *p1, const void *p2);
73  void stateStackPush (unsigned char c);
74  int stateStackPop ();
75  int insertState (StackEntry *state, bool root);
76  void insertSorted (unsigned char *key, const char *value);
77 
78  public:
79  TrieBuilder (int pack);
80  ~TrieBuilder ();
81 
82  void insert (const char *key, const char *value);
83  Trie *createTrie();
84 };
85 
87 {
91 
95 
96  void insertPattern (TrieBuilder *trieBuilder, char *s);
97  void insertException (char *s);
98 
99  void hyphenateSingleWord(core::Platform *platform, char *wordLc, int offset,
101  bool isCharPartOfActualWord (char *s);
102 
103 public:
104  Hyphenator (const char *patFile, const char *excFile, int pack = 256);
105  ~Hyphenator();
106 
107  static Hyphenator *getHyphenator (const char *language);
108  static bool isHyphenationCandidate (const char *word);
109  int *hyphenateWord(core::Platform *platform, const char *word, int *numBreaks);
110  void saveTrie (FILE *fp) { trie->save (fp); };
111 };
112 
113 } // namespace dw
114 
115 #endif // __DW_HYPHENATOR_HH__
const char * value
Definition: hyphenator.hh:62
TrieNode * array
Definition: hyphenator.hh:19
Typed version of container::untyped::Vector.
Definition: container.hh:425
void save(FILE *file)
Definition: hyphenator.cc:544
static Hyphenator * getHyphenator(const char *language)
Definition: hyphenator.cc:115
Definition: hyphenator.hh:12
void insertPattern(TrieBuilder *trieBuilder, char *s)
Definition: hyphenator.cc:148
An object::Object wrapper for strings (char*).
Definition: object.hh:185
void insertException(char *s)
Definition: hyphenator.cc:183
Typed version of container::untyped::HashTable.
Definition: container.hh:514
const char * data1
Definition: hyphenator.hh:57
Trie * trie
Definition: hyphenator.hh:90
This is the base class for many other classes, which defines very common virtual methods.
Definition: object.hh:24
lout::misc::SimpleVector< Trie::TrieNode > * tree
Definition: hyphenator.hh:67
bool validState(int state)
Definition: hyphenator.hh:30
A simple allocator optimized to handle many small chunks of memory. The chunks can not be free'd indi...
Definition: misc.hh:626
int size
Definition: hyphenator.hh:20
uint16_t next
Definition: hyphenator.hh:14
void hyphenateSingleWord(core::Platform *platform, char *wordLc, int offset, lout::misc::SimpleVector< int > *breakPos)
Definition: hyphenator.cc:292
~TrieBuilder()
Definition: hyphenator.cc:387
lout::misc::SimpleVector< StackEntry > * stateStack
Definition: hyphenator.hh:69
~Trie()
Definition: hyphenator.cc:537
Trie(TrieNode *array=NULL, int size=0, bool freeArray=false, lout::misc::ZoneAllocator *dataZone=NULL)
Definition: hyphenator.cc:529
static Trie::TrieNode trieNodeNull
Definition: hyphenator.hh:66
Definition: hyphenator.hh:10
unsigned char c
Definition: hyphenator.hh:13
Definition: hyphenator.hh:52
bool isCharPartOfActualWord(char *s)
Definition: hyphenator.cc:225
void insert(const char *key, const char *value)
Definition: hyphenator.cc:395
TrieBuilder(int pack)
Definition: hyphenator.cc:377
const char * getData(unsigned char c, int *state)
Definition: hyphenator.hh:31
lout::misc::SimpleVector< DataEntry > * dataList
Definition: hyphenator.hh:68
int * hyphenateWord(core::Platform *platform, const char *word, int *numBreaks)
Definition: hyphenator.cc:233
~Hyphenator()
Definition: hyphenator.cc:109
int insertState(StackEntry *state, bool root)
Definition: hyphenator.cc:410
static bool isHyphenationCandidate(const char *word)
Definition: hyphenator.cc:211
Simple (simpler than container::untyped::Vector and container::typed::Vector) template based vector...
Definition: misc.hh:93
bool freeArray
Definition: hyphenator.hh:21
Trie * createTrie()
Definition: hyphenator.cc:487
const char * data[256]
Definition: hyphenator.hh:56
unsigned char c
Definition: hyphenator.hh:53
unsigned char * key
Definition: hyphenator.hh:61
int pack
Definition: hyphenator.hh:65
lout::container::typed::HashTable< lout::object::ConstString, lout::container::typed::Vector< lout::object::Integer > > * exceptions
Definition: hyphenator.hh:94
lout::misc::ZoneAllocator * dataZone
Definition: hyphenator.hh:70
Definition: hyphenator.hh:60
An object::Object wrapper for constant strings (char*).
Definition: object.hh:162
void stateStackPush(unsigned char c)
Definition: hyphenator.cc:460
Dw is in this namespace, or sub namespaces of this one.
Definition: alignedtablecell.cc:28
An object::Object wrapper for int's.
Definition: object.hh:126
Definition: hyphenator.hh:50
static lout::container::typed::HashTable< lout::object::String, Hyphenator > * hyphenators
Definition: hyphenator.hh:89
void insertSorted(unsigned char *key, const char *value)
Definition: hyphenator.cc:507
int load(FILE *file)
Definition: hyphenator.cc:556
An interface to encapsulate some platform dependencies.
Definition: platform.hh:16
void saveTrie(FILE *fp)
Definition: hyphenator.hh:110
int next[256]
Definition: hyphenator.hh:55
lout::misc::ZoneAllocator * dataZone
Definition: hyphenator.hh:22
Hyphenator(const char *patFile, const char *excFile, int pack=256)
Definition: hyphenator.cc:48
static FltkPlatform * platform
Definition: dw_anchors_test.cc:37
const char * data
Definition: hyphenator.hh:15
static int keyCompare(const void *p1, const void *p2)
Definition: hyphenator.cc:402
int count
Definition: hyphenator.hh:54
int stateStackPop()
Definition: hyphenator.cc:468
Definition: hyphenator.hh:86
static const int root
Definition: hyphenator.hh:29