1 /* StreamQualifier.java
2  *
3  * created: Wed Dec 30 1998
4  *
5  * This file is part of Artemis
6  *
7  * Copyright (C) 1998,1999,2000  Genome Research Limited
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License
11  * as published by the Free Software Foundation; either version 2
12  * of the License, or (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
22  *
23  * $Header: //tmp/pathsoft/artemis/uk/ac/sanger/artemis/io/StreamQualifier.java,v 1.3 2008-11-07 17:54:26 tjc Exp $
24  */
25 
26 package uk.ac.sanger.artemis.io;
27 
28 import uk.ac.sanger.artemis.util.*;
29 
30 import java.io.BufferedReader;
31 import java.io.IOException;
32 
33 /**
34  *  This class contains routines for reading and writing Qualifiers.
35  *
36  *  @author Kim Rutherford
37  *  @version $Id: StreamQualifier.java,v 1.3 2008-11-07 17:54:26 tjc Exp $
38  **/
39 
40 public // XXX
41 
42 class StreamQualifier {
43   /**
44    *  Create a new Qualifier object by unquoting the value part of the
45    *  Qualifier and the calling the Qualifier constructor.  This object
46    *  consists of a name and a value.  In the raw embl file we have
47    *  /name=value.
48    *  @param name The name of this qualifier (ie. the text immediately after
49    *    the / in the qualifier)
50    *  @param value The value of this qualifier (ie the text immediately after
51    *    the = in the qualifier).  This argument may be null if the qualifier
52    *    has no value.  Unlike the Qualifier constructor the value String
53    *    should include the quote characters (),"" or [] if the original
54    *    qualifier contains them.  For example if the original qualifier was
55    *    /citation=[3] then the value String should be: [3].
56    *  @exception QualifierParseException Thrown if the value String is
57    *    incorrectly quoted.
58    **/
59   public static Qualifier
makeStreamQualifier(final String name, final String value, final EntryInformation entry_information)60     makeStreamQualifier (final String name,
61                          final String value,
62                          final EntryInformation entry_information)
63       throws QualifierParseException {
64 
65     if (!entry_information.isValidQualifier (name)) {
66       // use this qualifier value to decide how qualifiers with this name
67       // should be quoted
68       final QualifierInfo new_qualifier_info;
69       if (value.startsWith ("\"")) {
70         new_qualifier_info =
71           new QualifierInfo (name, QualifierInfo.QUOTED_TEXT,
72                              null, null, false);
73       } else {
74         new_qualifier_info =
75           new QualifierInfo (name, QualifierInfo.TEXT, null, null, false);
76       }
77 
78       try {
79         entry_information.addQualifierInfo (new_qualifier_info);
80       } catch (QualifierInfoException e) {
81         // this shouldn't happen because we have just checked that there is no
82         // qualifier with this name
83         throw new Error ("internal error - unexpected exception: " + e);
84       }
85     }
86 
87     return new Qualifier (name, unquote (value));
88   }
89 
90   /**
91    *  Return a String version of the given Qualifier.
92    *  @param qualifier_info Used to determine how to quote the qualifiers
93    **/
toString(final QualifierInfo qualifier_info, final Qualifier qualifier)94   public static String toString (final QualifierInfo qualifier_info,
95                                  final Qualifier qualifier) {
96     final StringVector values = qualifier.getValues ();
97 
98     if (values == null) {
99       return '/' + qualifier.getName ();
100     } else {
101       // the number we pick for the initial StringBuffer size is not critical,
102       // but should cover most possibilities
103       final StringBuffer buffer = new StringBuffer (50);
104 
105       for (int i = 0 ; i < values.size () ; ++i) {
106         buffer.append ('/');
107         buffer.append (qualifier.getName ());
108         if (values.elementAt (i) != null) {
109           /* Escape double quotes */
110           String processedValue = (String)values.elementAt (i).replaceAll("(^|[^\"])\"([^\"]|$)","$1\"\"$2");
111           /* Mask line breaks in entries (e.g. notes/history) */
112           processedValue = processedValue.replaceAll("\n", " ");
113           buffer.append ('=');
114           buffer.append (quotedValue(qualifier_info,
115                                       qualifier.getName(),
116                                       processedValue));
117         }
118       }
119 
120       return buffer.toString ();
121     }
122   }
123 
124   /**
125    *  Return a StringVector containing one String (of the form /name=value)
126    *  for each of the values of the given qualifier.
127    *  @param qualifier_info Used to determine how to quote the qualifiers
128    **/
129   public static StringVector
toStringVector(final QualifierInfo qualifier_info, final Qualifier qualifier)130     toStringVector (final QualifierInfo qualifier_info,
131                     final Qualifier qualifier) {
132     final StringVector values = qualifier.getValues ();
133 
134     final StringVector return_vector = new StringVector ();
135 
136     if (values == null) {
137       return_vector.add ('/' + qualifier.getName ());
138     } else {
139       for (int i = 0 ; i < values.size () ; ++i) {
140         // the number we pick for the initial StringBuffer size is not
141         // critical
142         final StringBuffer buffer = new StringBuffer (50);
143 
144         buffer.append ('/');
145         buffer.append (qualifier.getName ());
146         if (values.elementAt (i) != null) {
147           /* Escape double quotes */
148           String processedValue = (String)values.elementAt (i).replaceAll("(^|[^\"])\"([^\"]|$)","$1\"\"$2");
149           /* Mask line breaks in entries (e.g. notes/history) */
150           processedValue = processedValue.replaceAll("\n", " ");
151           buffer.append ('=');
152           buffer.append (quotedValue (qualifier_info,
153                                       qualifier.getName (),
154                                       processedValue));
155         }
156         return_vector.add (buffer.toString ());
157       }
158     }
159 
160     return return_vector;
161   }
162 
163   /**
164    *  This is used by readFromStream () as temporary storage.  It is a class
165    *  member rather than a local variable so that we don't need to allocate a
166    *  object for each call.  The number we pick for the initial StringBuffer
167    *  size is not critical, but should cover most possibilities to prevent
168    *  reallocation.
169    **/
170   final private static StringBuffer read_name_string_buffer =
171     new StringBuffer (20);
172 
173   /**
174    *  Read a qualifier name from a stream.
175    *  @param buffered_reader the stream to read from
176    *  @return the qualifier name if successful, otherwise null
177    */
readName(final BufferedReader buffered_reader)178   static String readName (final BufferedReader buffered_reader)
179       throws QualifierParseException, IOException {
180 
181     int current_char;
182 
183     while ((current_char = buffered_reader.read ()) != -1 &&
184            0 != current_char    // Kaffe 1.00 returns 0 at end of string
185            ) {
186       if (' ' == current_char ||
187           '\n' == current_char ||
188           '\r' == current_char ||
189           '\t' == current_char) {
190         // read a whitespace character so go back to the top of the loop
191         continue;
192       } else {
193         if ('/' == current_char) {
194           // we have found the start of the qualifier name
195           break;
196         } else {
197           // if the character isn't a / or space then something is wrong
198           throw new QualifierParseException ("failed to read a qualifier " +
199                                              "name from this string: " +
200                                              (char)current_char +
201                                              buffered_reader.readLine ());
202         }
203       }
204     }
205 
206     if (-1 == current_char ||
207         0 == current_char       // Kaffe 1.00 returns 0 at end of string
208         ) {
209       // end of file
210       return null;
211     }
212 
213     buffered_reader.mark (1);
214 
215     read_name_string_buffer.setLength (0);
216 
217     while ((current_char = buffered_reader.read ()) != -1) {
218       if (Character.isLetter ((char) current_char) ||
219           Character.isDigit ((char) current_char) ||
220           '_' == current_char ||
221           '+' == current_char) {
222 
223         read_name_string_buffer.append ((char) current_char);
224 
225         // save the new position and go around the loop again
226         buffered_reader.mark (1);
227         continue;
228       } else {
229         // we have read one character too many
230         buffered_reader.reset ();
231         break;
232       }
233     }
234 
235     final String return_string = read_name_string_buffer.toString ();
236 
237     if (return_string.length () == 0) {
238       throw new QualifierParseException ("zero length qualifier name read " +
239                                          "from this string: " +
240                                          buffered_reader.readLine ());
241     } else {
242       return return_string;
243     }
244   }
245 
246   /**
247    *  This is used by readFromStream () as temporary storage.  It is a class
248    *  member rather than a local variable so that we don't need to allocate a
249    *  object for each call.  The number we pick for the initial array
250    *  size is not critical, but should cover most possibilities to prevent
251    *  reallocation.
252    **/
253   private static char [] read_value_buffer = new char [5000];
254 
255   /**
256    *  The index into read_value_buffer - used by readValue () to keep track of
257    *  where to put the next character.
258    **/
259   private static int buffer_index = 0;
260 
261 
262   /**
263    *  Append the given char to read_value_buffer (at the position
264    *  buffer_index), reallocating the buffer if necessary
265    **/
appendToValueBuffer(final char new_char)266   private static void appendToValueBuffer (final char new_char) {
267     if (buffer_index >= read_value_buffer.length) {
268       // reallocate as the buffer is full
269 
270       final char [] temp_buffer = new char [read_value_buffer.length*2];
271 
272       System.arraycopy (read_value_buffer, 0,
273                         temp_buffer, 0,
274                         read_value_buffer.length);
275       read_value_buffer = temp_buffer;
276     }
277 
278     read_value_buffer [buffer_index++] = (char) new_char;
279   }
280 
281   /**
282    *  Read a qualifier value from a stream.
283    *  @param buffered_reader the stream to read from
284    *  @return the qualifier value if successful, otherwise null
285    *  @exception QualifierParseException Thrown if the format of the
286    *    value String is not appropriate for a Qualifier with the given name or
287    *    if the qualifier can't be read.
288    *    Each qualifier has a specific format for the value part which depends
289    *    on the name, for example the value part of /codon_start qualifier must
290    *    be a number: 1, 2 or 3.
291    */
readValue(final BufferedReader buffered_reader)292   static synchronized String readValue (final BufferedReader buffered_reader)
293       throws QualifierParseException, IOException {
294 
295     buffer_index = 0;
296 
297     buffered_reader.mark (1);
298 
299     int current_char = buffered_reader.read ();
300 
301     if (-1 == current_char) {
302       return "";
303     }
304 
305     // this is the character the marks the end of the value string.  the
306     // default value of 0 means a '/' should end the string.
307     char final_char = 0;
308 
309     // this will be set to ", [ or ( if the value starts with one of those
310     // characters
311     char start_char = 0;
312 
313     // this is is used to balance the round or square brackets.  it is
314     // incremented each time an open bracket is seen (after the first one) and
315     // decremented each time a close bracket is seen (except for the last).
316     //
317     int bracket_count = 0;
318 
319     if ('"' == current_char) {
320       final_char = '"';
321     }
322     if ('[' == current_char) {
323       final_char = ']';
324       start_char = '[';
325       ++bracket_count;
326     }
327     if ('(' == current_char) {
328       final_char = ')';
329       start_char = '(';
330       ++bracket_count;
331     }
332 
333     if (0 == final_char) {
334       // the character we read isn't one of the delimiter characters so put it
335       // back
336       buffered_reader.reset ();
337     } else {
338       // append the char now so that loop doesn't stop immediately in the '"'
339       // case
340       appendToValueBuffer ((char) current_char);
341     }
342 
343     buffered_reader.mark (1);
344 
345     while ((current_char = buffered_reader.read ()) != -1) {
346 
347       // change newlines and other control characters to spaces
348       if (Character.isISOControl ((char)current_char) &&
349           current_char != '\t') {
350         current_char = ' ';
351       }
352 
353       if (current_char != '"') {
354         if (current_char == start_char) {
355           ++bracket_count;
356         } else {
357           if (current_char == final_char) {
358             --bracket_count;
359           }
360         }
361       }
362 
363       if (current_char == final_char && bracket_count == 0) {
364 
365         if (current_char == '"') {
366           // check for two quotes in a row
367 
368           // since the current character is a quote we know we can change the
369           // mark
370 
371           buffered_reader.mark (1);
372 
373           final int next_char = buffered_reader.read ();
374 
375           if (next_char == '"') {
376             // we have hit a quoted quote
377             appendToValueBuffer ('"');
378             appendToValueBuffer ('"');
379             continue;
380           } else {
381             // end of line or next qualifier
382 
383             if (next_char != -1) {
384               buffered_reader.reset ();
385             }
386 
387             appendToValueBuffer ('"');
388             break;
389           }
390         } else {
391           // end of value
392           appendToValueBuffer ((char) current_char);
393           break;
394         }
395       } else {
396 
397         if (0 == final_char && '/' == current_char) {
398           // in this case '/' marks the end of the value. we need to push back
399           // the '/' so that reading the next qualifier will work
400           buffered_reader.reset ();
401           break;
402         } else {
403           appendToValueBuffer ((char) current_char);
404 
405           // save the new position and go around the loop again
406           buffered_reader.mark (1);
407           continue;
408         }
409       }
410     }
411 
412     if (bracket_count > 0) {
413       throw new QualifierParseException ("hit the end of line while looking " +
414                                          "for a \"" + final_char + "\"");
415 
416     }
417 
418     // move buffer_index back past any whitespace
419     while (buffer_index > 0 &&
420            Character.isWhitespace (read_value_buffer[buffer_index-1])) {
421       --buffer_index;
422     }
423 
424     return new String (read_value_buffer, 0, buffer_index);
425   }
426 
427   /**
428    *  Return the value part of a Qualifier correctly quoted for insertion into
429    *  a embl entry.
430    *  @param qualifier_info The type of the qualifier that we will quote.
431    *  @param name The name part of qualifier.  The quote characters to check
432    *    for depend on this name.
433    *  @param value Quote this value.
434    **/
quotedValue(final QualifierInfo qualifier_info, final String name, final String value)435   private static String quotedValue (final QualifierInfo qualifier_info,
436                                      final String name, final String value) {
437     if (qualifier_info != null &&
438         (qualifier_info.getType () == QualifierInfo.QUOTED_TEXT ||
439          qualifier_info.getType () == QualifierInfo.OPTIONAL_QUOTED_TEXT)) {
440       return '"' + value + '"';
441     } else {
442       if (value.indexOf ('/') != -1) {
443         // quote it anyway
444         return '"' + value + '"';
445       } else {
446         return value;
447       }
448     }
449   }
450 
451   /**
452    *  Return the value part of a qualifier with any quote characters removed.
453    *  @param name The name part of qualifier.  The quote characters to check
454    *    for depend on this name.
455    *  @param value This is the value String to strip the quote characters
456    *    from.  This may be null if this qualifier has no value part (for
457    *    example /partial).
458    *  @return The unquoted version of the qualifier value or null if the value
459    *    passed to unquote() is null.
460    *  @exception QualifierParseException Thrown if the value String is
461    *    incorrectly quoted for a qualifier with the given name.  For example
462    *    there is a quote at one end of the value and not the other.
463    **/
unquote(final String value)464   private static String unquote (final String value)
465       throws QualifierParseException {
466     if (value.length () >= 2) {
467       final char first_char = value.charAt (0);
468       final char last_char = value.charAt (value.length () - 1);
469 
470       if (first_char == '"' && last_char == '"') {
471         return value.substring (1, value.length () - 1);
472       }
473       if (first_char != '"' && last_char != '"') {
474         return value;
475       }
476 
477       throw new QualifierParseException ("unbalanced quotes: " + value);
478     } else {
479       return value;
480     }
481   }
482 }
483