1 /* StreamQualifier.java 2 * 3 * created: Wed Dec 30 1998 4 * 5 * This file is part of Artemis 6 * 7 * Copyright (C) 1998,1999,2000 Genome Research Limited 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License 11 * as published by the Free Software Foundation; either version 2 12 * of the License, or (at your option) any later version. 13 * 14 * This program is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 * GNU General Public License for more details. 18 * 19 * You should have received a copy of the GNU General Public License 20 * along with this program; if not, write to the Free Software 21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 22 * 23 * $Header: //tmp/pathsoft/artemis/uk/ac/sanger/artemis/io/StreamQualifier.java,v 1.3 2008-11-07 17:54:26 tjc Exp $ 24 */ 25 26 package uk.ac.sanger.artemis.io; 27 28 import uk.ac.sanger.artemis.util.*; 29 30 import java.io.BufferedReader; 31 import java.io.IOException; 32 33 /** 34 * This class contains routines for reading and writing Qualifiers. 35 * 36 * @author Kim Rutherford 37 * @version $Id: StreamQualifier.java,v 1.3 2008-11-07 17:54:26 tjc Exp $ 38 **/ 39 40 public // XXX 41 42 class StreamQualifier { 43 /** 44 * Create a new Qualifier object by unquoting the value part of the 45 * Qualifier and the calling the Qualifier constructor. This object 46 * consists of a name and a value. In the raw embl file we have 47 * /name=value. 48 * @param name The name of this qualifier (ie. the text immediately after 49 * the / in the qualifier) 50 * @param value The value of this qualifier (ie the text immediately after 51 * the = in the qualifier). This argument may be null if the qualifier 52 * has no value. Unlike the Qualifier constructor the value String 53 * should include the quote characters (),"" or [] if the original 54 * qualifier contains them. For example if the original qualifier was 55 * /citation=[3] then the value String should be: [3]. 56 * @exception QualifierParseException Thrown if the value String is 57 * incorrectly quoted. 58 **/ 59 public static Qualifier makeStreamQualifier(final String name, final String value, final EntryInformation entry_information)60 makeStreamQualifier (final String name, 61 final String value, 62 final EntryInformation entry_information) 63 throws QualifierParseException { 64 65 if (!entry_information.isValidQualifier (name)) { 66 // use this qualifier value to decide how qualifiers with this name 67 // should be quoted 68 final QualifierInfo new_qualifier_info; 69 if (value.startsWith ("\"")) { 70 new_qualifier_info = 71 new QualifierInfo (name, QualifierInfo.QUOTED_TEXT, 72 null, null, false); 73 } else { 74 new_qualifier_info = 75 new QualifierInfo (name, QualifierInfo.TEXT, null, null, false); 76 } 77 78 try { 79 entry_information.addQualifierInfo (new_qualifier_info); 80 } catch (QualifierInfoException e) { 81 // this shouldn't happen because we have just checked that there is no 82 // qualifier with this name 83 throw new Error ("internal error - unexpected exception: " + e); 84 } 85 } 86 87 return new Qualifier (name, unquote (value)); 88 } 89 90 /** 91 * Return a String version of the given Qualifier. 92 * @param qualifier_info Used to determine how to quote the qualifiers 93 **/ toString(final QualifierInfo qualifier_info, final Qualifier qualifier)94 public static String toString (final QualifierInfo qualifier_info, 95 final Qualifier qualifier) { 96 final StringVector values = qualifier.getValues (); 97 98 if (values == null) { 99 return '/' + qualifier.getName (); 100 } else { 101 // the number we pick for the initial StringBuffer size is not critical, 102 // but should cover most possibilities 103 final StringBuffer buffer = new StringBuffer (50); 104 105 for (int i = 0 ; i < values.size () ; ++i) { 106 buffer.append ('/'); 107 buffer.append (qualifier.getName ()); 108 if (values.elementAt (i) != null) { 109 /* Escape double quotes */ 110 String processedValue = (String)values.elementAt (i).replaceAll("(^|[^\"])\"([^\"]|$)","$1\"\"$2"); 111 /* Mask line breaks in entries (e.g. notes/history) */ 112 processedValue = processedValue.replaceAll("\n", " "); 113 buffer.append ('='); 114 buffer.append (quotedValue(qualifier_info, 115 qualifier.getName(), 116 processedValue)); 117 } 118 } 119 120 return buffer.toString (); 121 } 122 } 123 124 /** 125 * Return a StringVector containing one String (of the form /name=value) 126 * for each of the values of the given qualifier. 127 * @param qualifier_info Used to determine how to quote the qualifiers 128 **/ 129 public static StringVector toStringVector(final QualifierInfo qualifier_info, final Qualifier qualifier)130 toStringVector (final QualifierInfo qualifier_info, 131 final Qualifier qualifier) { 132 final StringVector values = qualifier.getValues (); 133 134 final StringVector return_vector = new StringVector (); 135 136 if (values == null) { 137 return_vector.add ('/' + qualifier.getName ()); 138 } else { 139 for (int i = 0 ; i < values.size () ; ++i) { 140 // the number we pick for the initial StringBuffer size is not 141 // critical 142 final StringBuffer buffer = new StringBuffer (50); 143 144 buffer.append ('/'); 145 buffer.append (qualifier.getName ()); 146 if (values.elementAt (i) != null) { 147 /* Escape double quotes */ 148 String processedValue = (String)values.elementAt (i).replaceAll("(^|[^\"])\"([^\"]|$)","$1\"\"$2"); 149 /* Mask line breaks in entries (e.g. notes/history) */ 150 processedValue = processedValue.replaceAll("\n", " "); 151 buffer.append ('='); 152 buffer.append (quotedValue (qualifier_info, 153 qualifier.getName (), 154 processedValue)); 155 } 156 return_vector.add (buffer.toString ()); 157 } 158 } 159 160 return return_vector; 161 } 162 163 /** 164 * This is used by readFromStream () as temporary storage. It is a class 165 * member rather than a local variable so that we don't need to allocate a 166 * object for each call. The number we pick for the initial StringBuffer 167 * size is not critical, but should cover most possibilities to prevent 168 * reallocation. 169 **/ 170 final private static StringBuffer read_name_string_buffer = 171 new StringBuffer (20); 172 173 /** 174 * Read a qualifier name from a stream. 175 * @param buffered_reader the stream to read from 176 * @return the qualifier name if successful, otherwise null 177 */ readName(final BufferedReader buffered_reader)178 static String readName (final BufferedReader buffered_reader) 179 throws QualifierParseException, IOException { 180 181 int current_char; 182 183 while ((current_char = buffered_reader.read ()) != -1 && 184 0 != current_char // Kaffe 1.00 returns 0 at end of string 185 ) { 186 if (' ' == current_char || 187 '\n' == current_char || 188 '\r' == current_char || 189 '\t' == current_char) { 190 // read a whitespace character so go back to the top of the loop 191 continue; 192 } else { 193 if ('/' == current_char) { 194 // we have found the start of the qualifier name 195 break; 196 } else { 197 // if the character isn't a / or space then something is wrong 198 throw new QualifierParseException ("failed to read a qualifier " + 199 "name from this string: " + 200 (char)current_char + 201 buffered_reader.readLine ()); 202 } 203 } 204 } 205 206 if (-1 == current_char || 207 0 == current_char // Kaffe 1.00 returns 0 at end of string 208 ) { 209 // end of file 210 return null; 211 } 212 213 buffered_reader.mark (1); 214 215 read_name_string_buffer.setLength (0); 216 217 while ((current_char = buffered_reader.read ()) != -1) { 218 if (Character.isLetter ((char) current_char) || 219 Character.isDigit ((char) current_char) || 220 '_' == current_char || 221 '+' == current_char) { 222 223 read_name_string_buffer.append ((char) current_char); 224 225 // save the new position and go around the loop again 226 buffered_reader.mark (1); 227 continue; 228 } else { 229 // we have read one character too many 230 buffered_reader.reset (); 231 break; 232 } 233 } 234 235 final String return_string = read_name_string_buffer.toString (); 236 237 if (return_string.length () == 0) { 238 throw new QualifierParseException ("zero length qualifier name read " + 239 "from this string: " + 240 buffered_reader.readLine ()); 241 } else { 242 return return_string; 243 } 244 } 245 246 /** 247 * This is used by readFromStream () as temporary storage. It is a class 248 * member rather than a local variable so that we don't need to allocate a 249 * object for each call. The number we pick for the initial array 250 * size is not critical, but should cover most possibilities to prevent 251 * reallocation. 252 **/ 253 private static char [] read_value_buffer = new char [5000]; 254 255 /** 256 * The index into read_value_buffer - used by readValue () to keep track of 257 * where to put the next character. 258 **/ 259 private static int buffer_index = 0; 260 261 262 /** 263 * Append the given char to read_value_buffer (at the position 264 * buffer_index), reallocating the buffer if necessary 265 **/ appendToValueBuffer(final char new_char)266 private static void appendToValueBuffer (final char new_char) { 267 if (buffer_index >= read_value_buffer.length) { 268 // reallocate as the buffer is full 269 270 final char [] temp_buffer = new char [read_value_buffer.length*2]; 271 272 System.arraycopy (read_value_buffer, 0, 273 temp_buffer, 0, 274 read_value_buffer.length); 275 read_value_buffer = temp_buffer; 276 } 277 278 read_value_buffer [buffer_index++] = (char) new_char; 279 } 280 281 /** 282 * Read a qualifier value from a stream. 283 * @param buffered_reader the stream to read from 284 * @return the qualifier value if successful, otherwise null 285 * @exception QualifierParseException Thrown if the format of the 286 * value String is not appropriate for a Qualifier with the given name or 287 * if the qualifier can't be read. 288 * Each qualifier has a specific format for the value part which depends 289 * on the name, for example the value part of /codon_start qualifier must 290 * be a number: 1, 2 or 3. 291 */ readValue(final BufferedReader buffered_reader)292 static synchronized String readValue (final BufferedReader buffered_reader) 293 throws QualifierParseException, IOException { 294 295 buffer_index = 0; 296 297 buffered_reader.mark (1); 298 299 int current_char = buffered_reader.read (); 300 301 if (-1 == current_char) { 302 return ""; 303 } 304 305 // this is the character the marks the end of the value string. the 306 // default value of 0 means a '/' should end the string. 307 char final_char = 0; 308 309 // this will be set to ", [ or ( if the value starts with one of those 310 // characters 311 char start_char = 0; 312 313 // this is is used to balance the round or square brackets. it is 314 // incremented each time an open bracket is seen (after the first one) and 315 // decremented each time a close bracket is seen (except for the last). 316 // 317 int bracket_count = 0; 318 319 if ('"' == current_char) { 320 final_char = '"'; 321 } 322 if ('[' == current_char) { 323 final_char = ']'; 324 start_char = '['; 325 ++bracket_count; 326 } 327 if ('(' == current_char) { 328 final_char = ')'; 329 start_char = '('; 330 ++bracket_count; 331 } 332 333 if (0 == final_char) { 334 // the character we read isn't one of the delimiter characters so put it 335 // back 336 buffered_reader.reset (); 337 } else { 338 // append the char now so that loop doesn't stop immediately in the '"' 339 // case 340 appendToValueBuffer ((char) current_char); 341 } 342 343 buffered_reader.mark (1); 344 345 while ((current_char = buffered_reader.read ()) != -1) { 346 347 // change newlines and other control characters to spaces 348 if (Character.isISOControl ((char)current_char) && 349 current_char != '\t') { 350 current_char = ' '; 351 } 352 353 if (current_char != '"') { 354 if (current_char == start_char) { 355 ++bracket_count; 356 } else { 357 if (current_char == final_char) { 358 --bracket_count; 359 } 360 } 361 } 362 363 if (current_char == final_char && bracket_count == 0) { 364 365 if (current_char == '"') { 366 // check for two quotes in a row 367 368 // since the current character is a quote we know we can change the 369 // mark 370 371 buffered_reader.mark (1); 372 373 final int next_char = buffered_reader.read (); 374 375 if (next_char == '"') { 376 // we have hit a quoted quote 377 appendToValueBuffer ('"'); 378 appendToValueBuffer ('"'); 379 continue; 380 } else { 381 // end of line or next qualifier 382 383 if (next_char != -1) { 384 buffered_reader.reset (); 385 } 386 387 appendToValueBuffer ('"'); 388 break; 389 } 390 } else { 391 // end of value 392 appendToValueBuffer ((char) current_char); 393 break; 394 } 395 } else { 396 397 if (0 == final_char && '/' == current_char) { 398 // in this case '/' marks the end of the value. we need to push back 399 // the '/' so that reading the next qualifier will work 400 buffered_reader.reset (); 401 break; 402 } else { 403 appendToValueBuffer ((char) current_char); 404 405 // save the new position and go around the loop again 406 buffered_reader.mark (1); 407 continue; 408 } 409 } 410 } 411 412 if (bracket_count > 0) { 413 throw new QualifierParseException ("hit the end of line while looking " + 414 "for a \"" + final_char + "\""); 415 416 } 417 418 // move buffer_index back past any whitespace 419 while (buffer_index > 0 && 420 Character.isWhitespace (read_value_buffer[buffer_index-1])) { 421 --buffer_index; 422 } 423 424 return new String (read_value_buffer, 0, buffer_index); 425 } 426 427 /** 428 * Return the value part of a Qualifier correctly quoted for insertion into 429 * a embl entry. 430 * @param qualifier_info The type of the qualifier that we will quote. 431 * @param name The name part of qualifier. The quote characters to check 432 * for depend on this name. 433 * @param value Quote this value. 434 **/ quotedValue(final QualifierInfo qualifier_info, final String name, final String value)435 private static String quotedValue (final QualifierInfo qualifier_info, 436 final String name, final String value) { 437 if (qualifier_info != null && 438 (qualifier_info.getType () == QualifierInfo.QUOTED_TEXT || 439 qualifier_info.getType () == QualifierInfo.OPTIONAL_QUOTED_TEXT)) { 440 return '"' + value + '"'; 441 } else { 442 if (value.indexOf ('/') != -1) { 443 // quote it anyway 444 return '"' + value + '"'; 445 } else { 446 return value; 447 } 448 } 449 } 450 451 /** 452 * Return the value part of a qualifier with any quote characters removed. 453 * @param name The name part of qualifier. The quote characters to check 454 * for depend on this name. 455 * @param value This is the value String to strip the quote characters 456 * from. This may be null if this qualifier has no value part (for 457 * example /partial). 458 * @return The unquoted version of the qualifier value or null if the value 459 * passed to unquote() is null. 460 * @exception QualifierParseException Thrown if the value String is 461 * incorrectly quoted for a qualifier with the given name. For example 462 * there is a quote at one end of the value and not the other. 463 **/ unquote(final String value)464 private static String unquote (final String value) 465 throws QualifierParseException { 466 if (value.length () >= 2) { 467 final char first_char = value.charAt (0); 468 final char last_char = value.charAt (value.length () - 1); 469 470 if (first_char == '"' && last_char == '"') { 471 return value.substring (1, value.length () - 1); 472 } 473 if (first_char != '"' && last_char != '"') { 474 return value; 475 } 476 477 throw new QualifierParseException ("unbalanced quotes: " + value); 478 } else { 479 return value; 480 } 481 } 482 } 483