1 /*************************************************************************************************
2  * Pure Java interface of Hyper Estraier
3  *                                                      Copyright (C) 2004-2007 Mikio Hirabayashi
4  *                                                                           All rights reserved.
5  * This file is part of Hyper Estraier.
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   * Redistributions of source code must retain the above copyright notice, this list of
10  *     conditions and the following disclaimer.
11  *   * Redistributions in binary form must reproduce the above copyright notice, this list of
12  *     conditions and the following disclaimer in the documentation and/or other materials
13  *     provided with the distribution.
14  *   * Neither the name of Mikio Hirabayashi nor the names of its contributors may be used to
15  *     endorse or promote products derived from this software without specific prior written
16  *     permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
19  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
20  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
21  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23  * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
26  * OF THE POSSIBILITY OF SUCH DAMAGE.
27  *************************************************************************************************/
28 
29 
30 package estraier.pure;
31 
32 import java.util.*;
33 import java.io.*;
34 import java.net.*;
35 
36 
37 
38 /**
39  * Abstraction of document.
40  */
41 public class Document {
42   //----------------------------------------------------------------
43   // private fields
44   //----------------------------------------------------------------
45   private int id;
46   private Map attrs;
47   private List dtexts;
48   private List htexts;
49   private Map kwords;
50   private int score;
51   //----------------------------------------------------------------
52   // constructors
53   //----------------------------------------------------------------
54   /**
55    * Create a document object.
56    */
Document()57   public Document(){
58     id = -1;
59     attrs = new HashMap(31);
60     dtexts = new ArrayList(31);
61     htexts = new ArrayList(31);
62     kwords = null;
63     score = -1;
64   }
65   /**
66    * Create a document object made from draft data.
67    * @param draft a string of draft data.
68    */
Document(String draft)69   public Document(String draft){
70     this();
71     String[] lines = Utility.split_lines(draft);
72     int lnum = 0;
73     while(lnum < lines.length){
74       String line = lines[lnum++].trim();
75       if(line.length() < 1) break;
76       if(line.startsWith("%")){
77         if(line.startsWith("%VECTOR\t")){
78           if(kwords == null) kwords = new HashMap(32);
79           String[] fields = Utility.split_fields(line);
80           for(int i = 1; i < fields.length - 1; i += 2){
81             if(fields[i].length() < 0 || fields[i+1].length() < 0 ||
82                fields[i].charAt(0) <= ' ') continue;
83             kwords.put(fields[i], fields[i+1]);
84           }
85         } else if(line.startsWith("%SCORE\t")){
86           String[] fields = Utility.split_fields(line);
87           score = Integer.parseInt(fields[1]);
88         }
89         continue;
90       }
91       int lidx = line.indexOf('=');
92       if(lidx != -1) add_attr(line.substring(0, lidx), line.substring(lidx + 1, line.length()));
93     }
94     while(lnum < lines.length){
95       String line = lines[lnum++];
96       if(line.length() < 1) continue;
97       if(line.charAt(0) == '\t'){
98         line = line.substring(1, line.length());
99         if(line.length() > 0) add_hidden_text(line);
100       } else {
101         add_text(line);
102       }
103     }
104   }
105   //----------------------------------------------------------------
106   // public methods
107   //----------------------------------------------------------------
108   /**
109    * Add an attribute.
110    * @param name the name of an attribute.
111    * @param value the value of the attribute.  If it is `null', the attribute is removed.
112    */
add_attr(String name, String value)113   public void add_attr(String name, String value){
114     if(value != null){
115       attrs.put(name, value.trim());
116     } else {
117       attrs.remove(name);
118     }
119   }
120   /**
121    * Add a sentence of text.
122    * @param text sentence of text.
123    */
add_text(String text)124   public void add_text(String text){
125     text = text.trim();
126     if(text.length() > 0) dtexts.add(text);
127   }
128   /**
129    * Add a hidden sentence.
130    * @param text a hidden sentence.
131    */
add_hidden_text(String text)132   public void add_hidden_text(String text){
133     text = text.trim();
134     if(text.length() > 0) htexts.add(text);
135   }
136   /**
137    * Attach keywords.
138    * @param kwords a map object of keywords.  Keys of the map should be keywords of the document
139    * and values should be their scores in decimal string.
140    */
set_keywords(Map kwords)141   public void set_keywords(Map kwords){
142     this.kwords = kwords;
143   }
144   /**
145    * Set the substitute score.
146    * @param score the substitute score.  It it is negative, the substitute score setting is
147    * nullified.
148    */
set_score(int score)149   private void set_score(int score){
150     this.score = score;
151   }
152   /**
153    * Get the ID number.
154    * @return the ID number.  If this object has never been registered, -1 is returned.
155    */
id()156   public int id(){
157     return id;
158   }
159   /**
160    * Get a list of attribute names.
161    * @return a list object of attribute names.
162    */
attr_names()163   public List attr_names(){
164     List names = new ArrayList(attrs.size());
165     Iterator it = attrs.keySet().iterator();
166     while(it.hasNext()){
167       names.add(it.next());
168     }
169     Collections.sort(names);
170     return names;
171   }
172   /**
173    * Get the value of an attribute.
174    * @param name the name of an attribute.
175    * @return the value of the attribute or `null' if it does not exist.
176    */
attr(String name)177   public String attr(String name){
178     return (String)attrs.get(name);
179   }
180   /**
181    * Get a list of sentences of the text.
182    * @return a list object of sentences of the text.
183    */
texts()184   public List texts(){
185     return dtexts;
186   }
187   /**
188    * Concatenate sentences of the text.
189    * @return concatenated sentences.
190    */
cat_texts()191   public String cat_texts(){
192     StringBuffer sb = new StringBuffer();
193     Iterator it = dtexts.iterator();
194     for(int i = 0; it.hasNext(); i++){
195       if(i > 0) sb.append(" ");
196       sb.append(it.next());
197     }
198     return sb.toString();
199   }
200   /**
201    * Get attached keywords.
202    * @return a map object of keywords and their scores in decimal string.  If no keyword is
203    * attached, `null' is returned.
204    */
keywords()205   public Map keywords(){
206     return kwords;
207   }
208   /**
209    * Get the substitute score.
210    * @return the substitute score or -1 if it is not set.
211    */
score()212   public int score(){
213     if(score < 0) return -1;
214     return score;
215   }
216   /**
217    * Dump draft data.
218    * @return draft data.
219    */
dump_draft()220   public String dump_draft(){
221     StringBuffer sb = new StringBuffer();
222     List names = attr_names();
223     Iterator attrit = names.iterator();
224     while(attrit.hasNext()){
225       String name = (String)attrit.next();
226       sb.append(name);
227       sb.append("=");
228       sb.append((String)attrs.get(name));
229       sb.append("\n");
230     }
231     if(kwords != null){
232       sb.append("%VECTOR");
233       Iterator kwit = kwords.keySet().iterator();
234       while(kwit.hasNext()){
235         String key = (String)kwit.next();
236         sb.append("\t");
237         sb.append(key);
238         sb.append("\t");
239         sb.append((String)kwords.get(key));
240       }
241       sb.append("\n");
242     }
243     if(score >= 0) sb.append("%SCORE\t" + score + "\n");
244     sb.append("\n");
245     Iterator dtit = dtexts.iterator();
246     while(dtit.hasNext()){
247       sb.append(dtit.next());
248       sb.append("\n");
249     }
250     Iterator htit = htexts.iterator();
251     while(htit.hasNext()){
252       sb.append("\t");
253       sb.append(htit.next());
254       sb.append("\n");
255     }
256     return sb.toString();
257   }
258 }
259 
260 
261 
262 /* END OF FILE */
263