1 /*
2 * Copyright (c) 2003 Nara Institute of Science and Technology
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name Nara Institute of Science and Technology may not be used to
15 * endorse or promote products derived from this software without
16 * specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY Nara Institute of Science and Technology
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
21 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE Nara Institute
22 * of Science and Technology BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
24 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * $Id: dumpdic.c,v 1.1.1.1 2007/03/13 07:40:10 masayu-a Exp $
31 */
32
33 #include <stdlib.h>
34 #include <stdio.h>
35 #include <limits.h>
36
37 #include "chadic.h"
38 #include "dartsdic.h"
39
40 #define NO_COMPOUND LONG_MAX
41
42 static long
dump_dat(lexicon_t * lex,FILE * datfile,long compound)43 dump_dat(lexicon_t *lex, FILE *datfile, long compound)
44 {
45 long index;
46 da_dat_t dat;
47
48 index = ftell(datfile);
49 dat.stem_len = lex->stem_len;
50 dat.reading_len = lex->reading_len;
51 dat.pron_len = lex->pron_len;
52 dat.base_len = strlen(lex->base);
53 dat.info_len = strlen(lex->info);
54 dat.compound = compound;
55 if (fwrite(&dat, sizeof(dat), 1, datfile) != 1)
56 cha_exit_perror("datfile");
57
58 if (fputs(lex->reading, datfile) < 0 || fputc('\0', datfile) < 0 ||
59 fputs(lex->pron, datfile) < 0 || fputc('\0', datfile) < 0 ||
60 fputs(lex->base, datfile) < 0 || fputc('\0', datfile) < 0 ||
61 fputs(lex->info, datfile) < 0 || fputc('\0', datfile) < 0)
62 cha_exit_perror("datfile");
63
64 if (ftell(datfile) % 2)
65 if (fputc('\0', datfile) < 0)
66 cha_exit_perror("datfile");
67
68 if (index < 0)
69 cha_exit_perror("datfile");
70
71 return index;
72 }
73
74 static long
dump_lex(da_lex_t * lex,FILE * output)75 dump_lex(da_lex_t *lex, FILE *output)
76 {
77 long index;
78
79 index = ftell(output);
80 if (fwrite(lex, sizeof(da_lex_t), 1, output) != 1)
81 cha_exit_perror("lexfile");
82
83 return index;
84 }
85
86 static da_lex_t *
assemble_lex(da_lex_t * lex,lexicon_t * entry,long dat_index)87 assemble_lex(da_lex_t *lex, lexicon_t *entry, long dat_index)
88 {
89 lex->posid = entry->pos;
90 lex->inf_type = entry->inf_type;
91 lex->inf_form = entry->inf_form;
92 lex->weight = entry->weight;
93 lex->con_tbl = entry->con_tbl;
94 lex->dat_index = dat_index;
95
96 return lex;
97 }
98
99 static long
dump_compound(lexicon_t * entries,FILE * lexfile,FILE * datfile)100 dump_compound(lexicon_t *entries, FILE *lexfile, FILE *datfile)
101 {
102 int i;
103 short has_next;
104 long compound_index = ftell(lexfile);
105 long marker = 0L;
106
107 for (i = 1; entries[i].pos; i++) {
108 short hw_len = strlen(entries[i].headword);
109 da_lex_t lex;
110 long dat_index;
111
112 has_next = 1;
113 dat_index = dump_dat(entries + i, datfile, NO_COMPOUND);
114 assemble_lex(&lex, entries + i, dat_index);
115 fwrite(&hw_len, sizeof(short), 1, lexfile);
116 marker = ftell(lexfile);
117 if (fwrite(&has_next, sizeof(short), 1, lexfile) != 1)
118 cha_exit_perror("lexfile");
119 dump_lex(&lex, lexfile);
120 }
121 has_next = 0;
122 fseek(lexfile, marker, SEEK_SET);
123 if (fwrite(&has_next, sizeof(short), 1, lexfile) != 1)
124 cha_exit_perror("lexfile");
125 fseek(lexfile, 0L, SEEK_END);
126
127 return compound_index;
128 }
129
130 int
dump_dic(lexicon_t * entries,FILE * output[],da_build_t * builder)131 dump_dic(lexicon_t *entries, FILE *output[], da_build_t *builder)
132 {
133 FILE *datfile = output[0];
134 FILE *lexfile = output[1];
135 FILE *tmpfile = output[2];
136 long dat_index, lex_index;
137 da_lex_t lex;
138 long compound = NO_COMPOUND;
139
140 if (entries[1].pos)
141 compound = dump_compound(entries, lexfile, datfile);
142
143 dat_index = dump_dat(entries, datfile, compound);
144
145 assemble_lex(&lex, entries, dat_index);
146 if (entries[0].inf_type == 0 || entries[0].inf_form > 0) {
147 lex_index = dump_lex(&lex, tmpfile);
148 da_build_add(builder, entries[0].headword, lex_index);
149 } else {
150 int stem_len = strlen(entries[0].headword);
151 unsigned short con_tbl = lex.con_tbl;
152 int i;
153
154 for (i = 1; Cha_form[lex.inf_type][i].name; i++) {
155 lex.inf_form = i;
156 lex.con_tbl = con_tbl + i - 1;
157 strcpy(entries[0].headword + stem_len,
158 Cha_form[lex.inf_type][i].gobi);
159 if (!entries[0].headword[0])
160 continue;
161 lex_index = dump_lex(&lex, tmpfile);
162 da_build_add(builder, entries[0].headword, lex_index);
163 }
164 }
165
166 return 0;
167 }
168