1 /************************************************************************************************* 2 * Pure Java interface of Hyper Estraier 3 * Copyright (C) 2004-2007 Mikio Hirabayashi 4 * All rights reserved. 5 * This file is part of Hyper Estraier. 6 * Redistribution and use in source and binary forms, with or without modification, are 7 * permitted provided that the following conditions are met: 8 * 9 * * Redistributions of source code must retain the above copyright notice, this list of 10 * conditions and the following disclaimer. 11 * * Redistributions in binary form must reproduce the above copyright notice, this list of 12 * conditions and the following disclaimer in the documentation and/or other materials 13 * provided with the distribution. 14 * * Neither the name of Mikio Hirabayashi nor the names of its contributors may be used to 15 * endorse or promote products derived from this software without specific prior written 16 * permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS 19 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 20 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 21 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 22 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 23 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 26 * OF THE POSSIBILITY OF SUCH DAMAGE. 27 *************************************************************************************************/ 28 29 30 package estraier.pure; 31 32 import java.util.*; 33 import java.io.*; 34 import java.net.*; 35 36 37 38 /** 39 * Abstraction of document. 40 */ 41 public class Document { 42 //---------------------------------------------------------------- 43 // private fields 44 //---------------------------------------------------------------- 45 private int id; 46 private Map attrs; 47 private List dtexts; 48 private List htexts; 49 private Map kwords; 50 private int score; 51 //---------------------------------------------------------------- 52 // constructors 53 //---------------------------------------------------------------- 54 /** 55 * Create a document object. 56 */ Document()57 public Document(){ 58 id = -1; 59 attrs = new HashMap(31); 60 dtexts = new ArrayList(31); 61 htexts = new ArrayList(31); 62 kwords = null; 63 score = -1; 64 } 65 /** 66 * Create a document object made from draft data. 67 * @param draft a string of draft data. 68 */ Document(String draft)69 public Document(String draft){ 70 this(); 71 String[] lines = Utility.split_lines(draft); 72 int lnum = 0; 73 while(lnum < lines.length){ 74 String line = lines[lnum++].trim(); 75 if(line.length() < 1) break; 76 if(line.startsWith("%")){ 77 if(line.startsWith("%VECTOR\t")){ 78 if(kwords == null) kwords = new HashMap(32); 79 String[] fields = Utility.split_fields(line); 80 for(int i = 1; i < fields.length - 1; i += 2){ 81 if(fields[i].length() < 0 || fields[i+1].length() < 0 || 82 fields[i].charAt(0) <= ' ') continue; 83 kwords.put(fields[i], fields[i+1]); 84 } 85 } else if(line.startsWith("%SCORE\t")){ 86 String[] fields = Utility.split_fields(line); 87 score = Integer.parseInt(fields[1]); 88 } 89 continue; 90 } 91 int lidx = line.indexOf('='); 92 if(lidx != -1) add_attr(line.substring(0, lidx), line.substring(lidx + 1, line.length())); 93 } 94 while(lnum < lines.length){ 95 String line = lines[lnum++]; 96 if(line.length() < 1) continue; 97 if(line.charAt(0) == '\t'){ 98 line = line.substring(1, line.length()); 99 if(line.length() > 0) add_hidden_text(line); 100 } else { 101 add_text(line); 102 } 103 } 104 } 105 //---------------------------------------------------------------- 106 // public methods 107 //---------------------------------------------------------------- 108 /** 109 * Add an attribute. 110 * @param name the name of an attribute. 111 * @param value the value of the attribute. If it is `null', the attribute is removed. 112 */ add_attr(String name, String value)113 public void add_attr(String name, String value){ 114 if(value != null){ 115 attrs.put(name, value.trim()); 116 } else { 117 attrs.remove(name); 118 } 119 } 120 /** 121 * Add a sentence of text. 122 * @param text sentence of text. 123 */ add_text(String text)124 public void add_text(String text){ 125 text = text.trim(); 126 if(text.length() > 0) dtexts.add(text); 127 } 128 /** 129 * Add a hidden sentence. 130 * @param text a hidden sentence. 131 */ add_hidden_text(String text)132 public void add_hidden_text(String text){ 133 text = text.trim(); 134 if(text.length() > 0) htexts.add(text); 135 } 136 /** 137 * Attach keywords. 138 * @param kwords a map object of keywords. Keys of the map should be keywords of the document 139 * and values should be their scores in decimal string. 140 */ set_keywords(Map kwords)141 public void set_keywords(Map kwords){ 142 this.kwords = kwords; 143 } 144 /** 145 * Set the substitute score. 146 * @param score the substitute score. It it is negative, the substitute score setting is 147 * nullified. 148 */ set_score(int score)149 private void set_score(int score){ 150 this.score = score; 151 } 152 /** 153 * Get the ID number. 154 * @return the ID number. If this object has never been registered, -1 is returned. 155 */ id()156 public int id(){ 157 return id; 158 } 159 /** 160 * Get a list of attribute names. 161 * @return a list object of attribute names. 162 */ attr_names()163 public List attr_names(){ 164 List names = new ArrayList(attrs.size()); 165 Iterator it = attrs.keySet().iterator(); 166 while(it.hasNext()){ 167 names.add(it.next()); 168 } 169 Collections.sort(names); 170 return names; 171 } 172 /** 173 * Get the value of an attribute. 174 * @param name the name of an attribute. 175 * @return the value of the attribute or `null' if it does not exist. 176 */ attr(String name)177 public String attr(String name){ 178 return (String)attrs.get(name); 179 } 180 /** 181 * Get a list of sentences of the text. 182 * @return a list object of sentences of the text. 183 */ texts()184 public List texts(){ 185 return dtexts; 186 } 187 /** 188 * Concatenate sentences of the text. 189 * @return concatenated sentences. 190 */ cat_texts()191 public String cat_texts(){ 192 StringBuffer sb = new StringBuffer(); 193 Iterator it = dtexts.iterator(); 194 for(int i = 0; it.hasNext(); i++){ 195 if(i > 0) sb.append(" "); 196 sb.append(it.next()); 197 } 198 return sb.toString(); 199 } 200 /** 201 * Get attached keywords. 202 * @return a map object of keywords and their scores in decimal string. If no keyword is 203 * attached, `null' is returned. 204 */ keywords()205 public Map keywords(){ 206 return kwords; 207 } 208 /** 209 * Get the substitute score. 210 * @return the substitute score or -1 if it is not set. 211 */ score()212 public int score(){ 213 if(score < 0) return -1; 214 return score; 215 } 216 /** 217 * Dump draft data. 218 * @return draft data. 219 */ dump_draft()220 public String dump_draft(){ 221 StringBuffer sb = new StringBuffer(); 222 List names = attr_names(); 223 Iterator attrit = names.iterator(); 224 while(attrit.hasNext()){ 225 String name = (String)attrit.next(); 226 sb.append(name); 227 sb.append("="); 228 sb.append((String)attrs.get(name)); 229 sb.append("\n"); 230 } 231 if(kwords != null){ 232 sb.append("%VECTOR"); 233 Iterator kwit = kwords.keySet().iterator(); 234 while(kwit.hasNext()){ 235 String key = (String)kwit.next(); 236 sb.append("\t"); 237 sb.append(key); 238 sb.append("\t"); 239 sb.append((String)kwords.get(key)); 240 } 241 sb.append("\n"); 242 } 243 if(score >= 0) sb.append("%SCORE\t" + score + "\n"); 244 sb.append("\n"); 245 Iterator dtit = dtexts.iterator(); 246 while(dtit.hasNext()){ 247 sb.append(dtit.next()); 248 sb.append("\n"); 249 } 250 Iterator htit = htexts.iterator(); 251 while(htit.hasNext()){ 252 sb.append("\t"); 253 sb.append(htit.next()); 254 sb.append("\n"); 255 } 256 return sb.toString(); 257 } 258 } 259 260 261 262 /* END OF FILE */ 263