1 /* textPreProcessor.java --
2    Copyright (C) 2005 Free Software Foundation, Inc.
3 
4 This file is part of GNU Classpath.
5 
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10 
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING.  If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA.
20 
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library.  Thus, the terms and
23 conditions of the GNU General Public License cover the whole
24 combination.
25 
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module.  An independent module is a module which is not derived from
33 or based on this library.  If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so.  If you do not wish to do so, delete this
36 exception statement from your version. */
37 
38 
39 package gnu.javax.swing.text.html.parser.support;
40 
41 import gnu.javax.swing.text.html.parser.support.low.Constants;
42 
43 /**
44  * Pre - processes text in text parts of the html document.
45  *
46  * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
47  */
48 public class textPreProcessor
49 {
50   /**
51    * Pre - process non-preformatted text. \t, \r and \n mutate into spaces, then
52    * multiple spaces mutate into single one, all whitespace around tags is
53    * consumed. The content of the passed buffer is destroyed.
54    *
55    * @param a_text A text to pre-process.
56    */
preprocess(StringBuffer a_text)57   public char[] preprocess(StringBuffer a_text)
58   {
59     if (a_text.length() == 0)
60       return null;
61 
62     char[] text = toCharArray(a_text);
63 
64     int a = 0;
65     int b = text.length - 1;
66 
67     // Remove leading/trailing whitespace, leaving at most one character
68     int len = text.length;
69     while (a + 1 < len && Constants.bWHITESPACE.get(text[a])
70            && Constants.bWHITESPACE.get(text[a + 1]))
71       a++;
72 
73     while (b > a && Constants.bWHITESPACE.get(text[b])
74                && Constants.bWHITESPACE.get(text[b - 1]))
75       b--;
76 
77     a_text.setLength(0);
78 
79     boolean spacesWere = false;
80     boolean spaceNow;
81     char c;
82 
83     chars: for (int i = a; i <= b; i++)
84       {
85         c = text[i];
86         spaceNow = Constants.bWHITESPACE.get(c);
87         if (spacesWere && spaceNow)
88           continue chars;
89         if (spaceNow)
90           a_text.append(' ');
91         else
92           a_text.append(c);
93         spacesWere = spaceNow;
94       }
95 
96     if (a_text.length() == text.length)
97       {
98         a_text.getChars(0, a_text.length(), text, 0);
99         return text;
100       }
101     else
102       return toCharArray(a_text);
103   }
104 
105   /**
106    * Pre - process pre-formatted text.
107    * Heading/closing spaces and tabs preserved.
108    * ONE  bounding \r, \n or \r\n is removed.
109    * \r or \r\n mutate into \n. Tabs are
110    * preserved.
111    * The content of the passed buffer is destroyed.
112    * @param a_text
113    * @return
114    */
preprocessPreformatted(StringBuffer a_text)115   public char[] preprocessPreformatted(StringBuffer a_text)
116   {
117     if (a_text.length() == 0)
118       return null;
119 
120     char[] text = toCharArray(a_text);
121 
122     int a = 0;
123     int n = text.length - 1;
124     int b = n;
125 
126     if (text [ 0 ] == '\n')
127       a++;
128     else
129       {
130         if (text [ 0 ] == '\r')
131           {
132             a++;
133             if (text.length > 1 && text [ 1 ] == '\n')
134               a++;
135           }
136       }
137 
138     if (text [ n ] == '\r')
139       b--;
140     else
141       {
142         if (text [ n ] == '\n')
143           {
144             b--;
145             if (n > 0 && text [ n - 1 ] == '\r')
146               b--;
147           }
148       }
149 
150     a_text.setLength(0);
151 
152     if (a > b)
153       return null;
154 
155     char c;
156 
157     for (int i = a; i <= b; i++)
158       {
159         c = text [ i ];
160         if (c == '\r')
161           {
162             if (i == b || text [ i + 1 ] != '\n')
163               a_text.append('\n');
164           }
165         else
166           a_text.append(c);
167       }
168 
169     if (a_text.length() == text.length)
170       {
171         a_text.getChars(0, a_text.length(), text, 0);
172         return text;
173       }
174     else
175       return toCharArray(a_text);
176   }
177 
178   /**
179    * Return array of chars, present in the given buffer.
180    * @param a_text The buffer
181    * @return
182    */
toCharArray(StringBuffer a_text)183   private static char[] toCharArray(StringBuffer a_text)
184   {
185     char[] text = new char[ a_text.length() ];
186     a_text.getChars(0, text.length, text, 0);
187     return text;
188   }
189 }
190