1 /* textPreProcessor.java -- 2 Copyright (C) 2005 Free Software Foundation, Inc. 3 4 This file is part of GNU Classpath. 5 6 GNU Classpath is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2, or (at your option) 9 any later version. 10 11 GNU Classpath is distributed in the hope that it will be useful, but 12 WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GNU Classpath; see the file COPYING. If not, write to the 18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19 02110-1301 USA. 20 21 Linking this library statically or dynamically with other modules is 22 making a combined work based on this library. Thus, the terms and 23 conditions of the GNU General Public License cover the whole 24 combination. 25 26 As a special exception, the copyright holders of this library give you 27 permission to link this library with independent modules to produce an 28 executable, regardless of the license terms of these independent 29 modules, and to copy and distribute the resulting executable under 30 terms of your choice, provided that you also meet, for each linked 31 independent module, the terms and conditions of the license of that 32 module. An independent module is a module which is not derived from 33 or based on this library. If you modify this library, you may extend 34 this exception to your version of the library, but you are not 35 obligated to do so. If you do not wish to do so, delete this 36 exception statement from your version. */ 37 38 39 package gnu.javax.swing.text.html.parser.support; 40 41 import gnu.javax.swing.text.html.parser.support.low.Constants; 42 43 /** 44 * Pre - processes text in text parts of the html document. 45 * 46 * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org) 47 */ 48 public class textPreProcessor 49 { 50 /** 51 * Pre - process non-preformatted text. \t, \r and \n mutate into spaces, then 52 * multiple spaces mutate into single one, all whitespace around tags is 53 * consumed. The content of the passed buffer is destroyed. 54 * 55 * @param a_text A text to pre-process. 56 */ preprocess(StringBuffer a_text)57 public char[] preprocess(StringBuffer a_text) 58 { 59 if (a_text.length() == 0) 60 return null; 61 62 char[] text = toCharArray(a_text); 63 64 int a = 0; 65 int b = text.length - 1; 66 67 // Remove leading/trailing whitespace, leaving at most one character 68 int len = text.length; 69 while (a + 1 < len && Constants.bWHITESPACE.get(text[a]) 70 && Constants.bWHITESPACE.get(text[a + 1])) 71 a++; 72 73 while (b > a && Constants.bWHITESPACE.get(text[b]) 74 && Constants.bWHITESPACE.get(text[b - 1])) 75 b--; 76 77 a_text.setLength(0); 78 79 boolean spacesWere = false; 80 boolean spaceNow; 81 char c; 82 83 chars: for (int i = a; i <= b; i++) 84 { 85 c = text[i]; 86 spaceNow = Constants.bWHITESPACE.get(c); 87 if (spacesWere && spaceNow) 88 continue chars; 89 if (spaceNow) 90 a_text.append(' '); 91 else 92 a_text.append(c); 93 spacesWere = spaceNow; 94 } 95 96 if (a_text.length() == text.length) 97 { 98 a_text.getChars(0, a_text.length(), text, 0); 99 return text; 100 } 101 else 102 return toCharArray(a_text); 103 } 104 105 /** 106 * Pre - process pre-formatted text. 107 * Heading/closing spaces and tabs preserved. 108 * ONE bounding \r, \n or \r\n is removed. 109 * \r or \r\n mutate into \n. Tabs are 110 * preserved. 111 * The content of the passed buffer is destroyed. 112 * @param a_text 113 * @return 114 */ preprocessPreformatted(StringBuffer a_text)115 public char[] preprocessPreformatted(StringBuffer a_text) 116 { 117 if (a_text.length() == 0) 118 return null; 119 120 char[] text = toCharArray(a_text); 121 122 int a = 0; 123 int n = text.length - 1; 124 int b = n; 125 126 if (text [ 0 ] == '\n') 127 a++; 128 else 129 { 130 if (text [ 0 ] == '\r') 131 { 132 a++; 133 if (text.length > 1 && text [ 1 ] == '\n') 134 a++; 135 } 136 } 137 138 if (text [ n ] == '\r') 139 b--; 140 else 141 { 142 if (text [ n ] == '\n') 143 { 144 b--; 145 if (n > 0 && text [ n - 1 ] == '\r') 146 b--; 147 } 148 } 149 150 a_text.setLength(0); 151 152 if (a > b) 153 return null; 154 155 char c; 156 157 for (int i = a; i <= b; i++) 158 { 159 c = text [ i ]; 160 if (c == '\r') 161 { 162 if (i == b || text [ i + 1 ] != '\n') 163 a_text.append('\n'); 164 } 165 else 166 a_text.append(c); 167 } 168 169 if (a_text.length() == text.length) 170 { 171 a_text.getChars(0, a_text.length(), text, 0); 172 return text; 173 } 174 else 175 return toCharArray(a_text); 176 } 177 178 /** 179 * Return array of chars, present in the given buffer. 180 * @param a_text The buffer 181 * @return 182 */ toCharArray(StringBuffer a_text)183 private static char[] toCharArray(StringBuffer a_text) 184 { 185 char[] text = new char[ a_text.length() ]; 186 a_text.getChars(0, text.length, text, 0); 187 return text; 188 } 189 } 190