From 4760752af1c9b7507b51917ff4e4d8eb0491e353 Mon Sep 17 00:00:00 2001
Date: Tue, 1 Jun 2010 17:27:23 -0700
Subject: [PATCH] Ehhance hyphenation dictionary reading from character buffer.

Previous file reading is kept and enhanced with mmap.

This is the prepration for reading the dictionary from asset.

issue: 2672163
Change-Id: I0527b7b1260dc103a3be63856b9f4e4c10ed2857
---
 hyphen.c |   70 +++++++++++++++++++++++++++++++++++++++++++++++++++----------
 hyphen.h |    2 +
 2 files changed, 60 insertions(+), 12 deletions(-)

diff --git a/hyphen.c b/hyphen.c
index 974d87f..446d5bd 100644
--- a/hyphen.c
+++ b/hyphen.c
@@ -36,13 +36,13 @@
  * MPL.
  *
  */
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
 #include <stdlib.h> /* for NULL, malloc */
 #include <stdio.h>  /* for fprintf */
 #include <string.h> /* for strdup */
-
-#ifdef UNX
-#include <unistd.h> /* for exit */
-#endif
+#include <unistd.h> /* for close */
 
 #define noVERBOSE
 
@@ -230,12 +230,57 @@ get_state_str (int state)
 }
 #endif
 
+// Get a line from the dictionary contents.
+static char *
+get_line (char *s, int size, const char *dict_contents, int dict_length,
+    int *dict_ptr)
+{
+    int len = 0;
+    while (len < (size - 1) && *dict_ptr < dict_length) {
+        s[len++] = *(dict_contents + *dict_ptr);
+        (*dict_ptr)++;
+        if (s[len - 1] == '\n')
+            break;
+    }
+    s[len] = '\0';
+    if (len > 0) {
+        return s;
+    } else {
+        return NULL;
+    }
+}
+
 HyphenDict *
 hnj_hyphen_load (const char *fn)
 {
+    if (fn == NULL)
+        return NULL;
+    const int fd = open(fn, O_RDONLY);
+    if (fd == -1)
+        return NULL;
+    struct stat sb;
+    if (fstat(fd, &sb) == -1)  {  /* To obtain file size */
+        close(fd);
+        return NULL;
+    }
+
+    const char *addr = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+    if (addr == MAP_FAILED) {
+        close(fd);
+        return NULL;
+    }
+    HyphenDict *dict = hnj_hyphen_load_from_buffer(addr, sb.st_size);
+    munmap((void *)addr, sb.st_size);
+    close(fd);
+
+    return dict;
+}
+
+HyphenDict *
+hnj_hyphen_load_from_buffer (const char *dict_contents, int dict_length)
+{
     HyphenDict *dict[2];
     HashTab *hashtab;
-    FILE *f;
     char buf[MAX_CHARS];
     char word[MAX_CHARS];
     char pattern[MAX_CHARS];
@@ -249,10 +294,10 @@ hnj_hyphen_load (const char *fn)
     HashEntry *e;
     int nextlevel = 0;
 
-    f = fopen (fn, "r");
-    if (f == NULL)
+    if (dict_contents == NULL)
         return NULL;
 
+    int dict_ptr = 0;
 // loading one or two dictionaries (separated by NEXTLEVEL keyword)
     for (k = 0; k == 0 || (k == 1 && nextlevel); k++) { 
         hashtab = hnj_hash_new ();
@@ -277,7 +322,8 @@ hnj_hyphen_load (const char *fn)
         /* read in character set info */
         if (k == 0) {
             for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
-            fgets(dict[k]->cset,  sizeof(dict[k]->cset),f);
+            get_line(dict[k]->cset, sizeof(dict[k]->cset), dict_contents,
+                dict_length, &dict_ptr);
             for (i=0;i<MAX_NAME;i++)
                 if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
                     dict[k]->cset[i] = 0;
@@ -287,7 +333,8 @@ hnj_hyphen_load (const char *fn)
             dict[k]->utf8 = dict[0]->utf8;
         }
 
-        while (fgets (buf, sizeof(buf), f) != NULL)
+        while (get_line(buf, sizeof(buf), dict_contents, dict_length,
+                &dict_ptr) != NULL)
         {
             if (buf[0] != '%')
             {
@@ -446,7 +493,6 @@ hnj_hyphen_load (const char *fn)
 #endif
         state_num = 0;
     }
-    fclose(f);
     if (k == 2) dict[0]->nextlevel = dict[1];
     return dict[0];
 }
@@ -870,8 +916,8 @@ int hnj_hyphen_hyph_(HyphenDict *dict, const char *word, int word_size,
             hyphens2 = hnj_malloc (word_size);
         }
         for (i = 0; i < word_size; i++) rep2[i] = NULL;
-        for (i = 0; i < word_size; i++) if 
-                                            (hyphens[i]&1 || (begin > 0 && i + 1 == word_size)) {
+        for (i = 0; i < word_size; i++)
+            if (hyphens[i]&1 || (begin > 0 && i + 1 == word_size)) {
                 if (i - begin > 1) {
                     int hyph = 0;
                     prep_word[i + 2] = '\0';
diff --git a/hyphen.h b/hyphen.h
index 5d79308..29a0701 100644
--- a/hyphen.h
+++ b/hyphen.h
@@ -91,6 +91,8 @@ struct _HyphenTrans {
 };
 
 HyphenDict *hnj_hyphen_load (const char *fn);
+HyphenDict *hnj_hyphen_load_from_buffer (const char *dict_contents,
+    int dict_length);
 void hnj_hyphen_free (HyphenDict *dict);
 
 /* obsolete, use hnj_hyphen_hyphenate2() or *hyphenate3() functions) */
-- 
1.7.0.1