1 // Jericho HTML Parser - Java based library for analysing and manipulating HTML 2 // Version 3.2 3 // Copyright (C) 2004-2009 Martin Jericho 4 // http://jericho.htmlparser.net/ 5 // 6 // This library is free software; you can redistribute it and/or 7 // modify it under the terms of either one of the following licences: 8 // 9 // 1. The Eclipse Public License (EPL) version 1.0, 10 // included in this distribution in the file licence-epl-1.0.html 11 // or available at http://www.eclipse.org/legal/epl-v10.html 12 // 13 // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, 14 // included in this distribution in the file licence-lgpl-2.1.txt 15 // or available at http://www.gnu.org/licenses/lgpl.txt 16 // 17 // This library is distributed on an "AS IS" basis, 18 // WITHOUT WARRANTY OF ANY KIND, either express or implied. 19 // See the individual licence texts for more details. 20 21 package net.htmlparser.jericho; 22 23 import java.util.*; 24 25 /** 26 * Represents a <em>field</em> in an HTML <a target="_blank" href="http://www.w3.org/TR/html401/interact/forms.html">form</a>, 27 * a <em>field</em> being defined as the group of all {@linkplain FormControl form controls} 28 * having the same {@linkplain FormControl#getName() name}. 29 * <p> 30 * The {@link #getFormControls()} method can be used to obtain the collection of this field's constituent 31 * {@link FormControl} objects. 32 * <p> 33 * The {@link FormFields} class, which represents a collection of <code>FormField</code> objects, provides the highest level 34 * interface for dealing with form fields and controls. For the most common tasks it can be used directly without 35 * the need to work with its constituent <code>FormField</code> or {@link FormControl} objects. 36 * <p> 37 * The <code>FormField</code> class serves two main purposes: 38 * <ol> 39 * <li style="margin-bottom: 1.5em"> 40 * Provide methods for the modification and retrieval of form control <a href="FormControl.html#SubmissionValue">submission values</a> 41 * while ensuring that the states of all the field's constituent form controls remain consistent with each other. 42 * <p> 43 * The methods available for this purpose are:<br /> 44 * {@link #getValues() List getValues()}<br /> 45 * {@link #clearValues() void clearValues()}<br /> 46 * {@link #setValues(Collection) void setValues(Collection)}<br /> 47 * {@link #setValue(String) boolean setValue(String)}<br /> 48 * {@link #addValue(String) boolean addValue(String)}<br /> 49 * <p> 50 * Although the {@link FormControl} class provides methods for directly modifying the submission values 51 * of individual form controls, it is generally recommended to use the interface provided by the {@link FormFields} class 52 * unless there is a specific requirement for the lower level functionality. 53 * The {@link FormFields} class contains convenience methods providing most of the functionality of the above methods, 54 * as well as some higher level functionality such as the ability to set the form 55 * <a href="#SubmissionValue">submission values</a> as a complete <a href="FormFields.html#FieldDataSet">field data set</a> 56 * using the {@link FormFields#setDataSet(Map)} method. 57 * <li><a name="DataStructureProperties"></a> 58 * Provide a means of determining the data structure of the field, allowing a server receiving a 59 * <a target="_blank" href="http://www.w3.org/TR/html401/interact/forms.html#submit-format">submitted</a> 60 * <a target="_blank" href="http://www.w3.org/TR/html401/interact/forms.html#form-data-set">form data set</a> 61 * to interpret and store the data in an appropriate way. 62 * <p> 63 * The properties available for this purpose are:<br /> 64 * {@link #allowsMultipleValues() boolean allowsMultipleValues()}<br /> 65 * {@link #getUserValueCount() int getUserValueCount()}<br /> 66 * {@link #getPredefinedValues() Collection getPredefinedValues()}<br /> 67 * <p> 68 * The {@link FormFields#getColumnLabels()} and {@link FormFields#getColumnValues(Map)} methods utilise these properties 69 * to convert data from a <a target="_blank" href="http://www.w3.org/TR/html401/interact/forms.html#form-data-set">form data set</a> 70 * (represented as a <a href="#FieldDataSet">field data set</a>) into a simple array format, 71 * suitable for storage in a tabular format such as a database table or <code>.CSV</code> file. 72 * <p> 73 * The properties need only be utilised directly in the event that a 74 * <a target="_blank" href="http://www.w3.org/TR/html401/interact/forms.html#form-data-set">form data set</a> is to be converted 75 * from its <a href="FormFields.html#FieldDataSet">normal format</a> into some other type of data structure. 76 * </ol> 77 * A form field which allows user values normally consists of a single 78 * <a href="FormControl.html#UserValueControl">user value control</a>, 79 * such as a {@link FormControlType#TEXT TEXT} control. 80 * <p> 81 * When a form field consists of more than one control, these controls are normally all 82 * <a href="FormControl.html#PredefinedValueControl">predefined value controls</a> of the same 83 * {@linkplain FormControlType type}, such as {@link FormControlType#CHECKBOX CHECKBOX} controls. 84 * <p> 85 * Form fields consisting of more than one control do not necessarily return {@linkplain #allowsMultipleValues() multiple values}. 86 * A form field consisting of {@link FormControlType#CHECKBOX CHECKBOX} controls can return multiple values, whereas 87 * a form field consisting of {@link FormControlType#CHECKBOX RADIO} controls returns at most one value. 88 * <p> 89 * The HTML author can disregard convention and mix all types of controls with the same name in the same form, 90 * or include multiple <a href="FormControl.html#UserValueControl">user value controls</a> of the same name. 91 * The evidence that such an unusual combination is present is when {@link #getUserValueCount()}<code>>1</code>. 92 * <p> 93 * <code>FormField</code> instances are created automatically with the creation of a {@link FormFields} collection. 94 * <p> 95 * The case sensitivity of form field names is determined by the static 96 * {@link Config#CurrentCompatibilityMode}<code>.</code>{@link Config.CompatibilityMode#isFormFieldNameCaseInsensitive() FormFieldNameCaseInsensitive} property. 97 * 98 * @see FormFields 99 * @see FormControl 100 * @see FormControlType 101 */ 102 public final class FormField { 103 private final String name; 104 private int userValueCount=0; 105 private boolean allowsMultipleValues=false; 106 private LinkedHashSet<String> predefinedValues=null; // String objects, null if none 107 private final LinkedHashSet<FormControl> formControls=new LinkedHashSet<FormControl>(); 108 private transient FormControl firstFormControl=null; // this field is simply a cache for the getFirstFormControl() method 109 int columnIndex; // see FormFields.initColumns() 110 111 /** Constructor called from FormFields class. */ FormField(final String name)112 FormField(final String name) { 113 this.name=name; 114 } 115 116 /** 117 * Returns the <a target="_blank" href="http://www.w3.org/TR/html401/interact/forms.html#control-name">control name</a> shared by all of this field's constituent {@linkplain FormControl controls}. 118 * <p> 119 * If the static {@link Config#CurrentCompatibilityMode}<code>.</code>{@link Config.CompatibilityMode#isFormFieldNameCaseInsensitive() isFormFieldNameCaseInsensitive()} 120 * property is set to <code>true</code>, the grouping of the controls by name is case insensitive 121 * and this method always returns the name in lower case. 122 * <p> 123 * Since a form field is simply a group of controls with the same name, the terms <i>control name</i> and 124 * <i>field name</i> are for the most part synonymous, with only a possible difference in case differentiating them. 125 * 126 * @return the <a target="_blank" href="http://www.w3.org/TR/html401/interact/forms.html#control-name">control name</a> shared by all of this field's constituent {@linkplain FormControl controls}. 127 * @see FormControl#getName() 128 */ getName()129 public String getName() { 130 return name; 131 } 132 133 /** 134 * Returns a collection of all the constituent {@linkplain FormControl form controls} in this field. 135 * <p> 136 * An iterator over this collection returns the controls in the order of appearance in the source. 137 * 138 * @return a collection of all the constituent {@linkplain FormControl form controls} in this field. 139 * @see #getFormControl() 140 * @see #getFormControl(String predefinedValue) 141 */ getFormControls()142 public Collection<FormControl> getFormControls() { 143 return formControls; 144 } 145 146 /** 147 * Returns the constituent {@link FormControl} with the specified {@linkplain FormControl#getPredefinedValue() predefined value}. 148 * <p> 149 * Specifying a predefined value of <code>null</code> returns the first control without a predefined value. 150 * 151 * @param predefinedValue the predefined value of the control to be returned, or <code>null</code> to return the first control without a predefined value. 152 * @return the constituent {@link FormControl} with the specified {@linkplain FormControl#getPredefinedValue() predefined value}, or <code>null</code> if none exists. 153 * @see #getFormControl() 154 * @see #getFormControls() 155 */ getFormControl(final String predefinedValue)156 public FormControl getFormControl(final String predefinedValue) { 157 if (predefinedValue==null) { 158 for (FormControl formControl : formControls) { 159 if (!formControl.getFormControlType().hasPredefinedValue()) return formControl; 160 if (formControl.getFormControlType().getElementName()!=HTMLElementName.SELECT && formControl.getPredefinedValue()==null) return formControl; 161 } 162 } else { 163 for (FormControl formControl : formControls) { 164 if (formControl.getFormControlType().getElementName()==HTMLElementName.SELECT) { 165 if (formControl.getPredefinedValues().contains(predefinedValue)) return formControl; 166 } else { 167 if (predefinedValue.equals(formControl.getPredefinedValue())) return formControl; 168 } 169 } 170 } 171 return null; 172 } 173 174 /** 175 * Returns the first {@link FormControl} from this field. 176 * @return the first {@link FormControl} from this field, guaranteed not <code>null</code>. 177 * @see #getFormControl(String predefinedValue) 178 * @see #getFormControls() 179 */ getFormControl()180 public FormControl getFormControl() { 181 return formControls.iterator().next(); 182 } 183 184 /** 185 * Indicates whether the field allows multiple values. 186 * <p> 187 * Returns <code>false</code> in any one of the following circumstances: 188 * <ul> 189 * <li>The field consists of only one control (unless it is a 190 * {@linkplain FormControlType#SELECT_MULTIPLE multiple select} with more than one option) 191 * <li>The field consists entirely of {@linkplain FormControlType#RADIO radio buttons} 192 * <li>The field consists entirely of {@linkplain FormControlType#isSubmit() submit} buttons 193 * </ul> 194 * If none of these three conditions are met, the method returns <code>true</code>. 195 * 196 * @return <code>true</code> if the field allows multiple values, otherwise <code>false</code>. 197 */ allowsMultipleValues()198 public boolean allowsMultipleValues() { 199 return allowsMultipleValues; 200 } 201 202 /** 203 * Returns the number of constituent <a href="FormControl.html#UserValueControl">user value controls</a> in this field. 204 * This should in most cases be either <code>0</code> or <code>1</code>. 205 * <p> 206 * A value of <code>0</code> indicates the field values consist only of 207 * {@linkplain #getPredefinedValues() predefined values}, which is the case when the field consists only of 208 * <a href="FormControl.html#PredefinedValueControl">predefined value controls</a>. 209 * <p> 210 * A value of <code>1</code> indicates the field values consist of at most one value set by the user. 211 * It is still possible in this case to receive multiple values in the unlikely event that the HTML author mixed 212 * controls of different types with the same name, but any other values would consist only of 213 * {@linkplain #getPredefinedValues() predefined values}. 214 * <p> 215 * A value greater than <code>1</code> indicates that the HTML author has included more than one 216 * <a href="FormControl.html#UserValueControl">user value control</a> with the same name. 217 * This would nearly always indicate an unintentional error in the HTML source document, 218 * in which case your application can either log a warning that a poorly designed form has been encountered, 219 * or take special action to try to interpret the multiple user values that might be submitted. 220 * 221 * @return the number of constituent <a href="FormControl.html#UserValueControl">user value controls</a> in this field. 222 */ getUserValueCount()223 public int getUserValueCount() { 224 return userValueCount; 225 } 226 227 /** 228 * Returns a collection of the {@linkplain FormControl#getPredefinedValue() predefined values} of all constituent {@linkplain FormControl controls} in this field. 229 * <p> 230 * All objects in the returned collection are of type <code>String</code>, with no <code>null</code> entries. 231 * <p> 232 * An interator over this collection returns the values in the order of appearance in the source document. 233 * 234 * @return a collection of the {@linkplain FormControl#getPredefinedValue() predefined values} of all constituent {@linkplain FormControl controls} in this field, or <code>null</code> if none. 235 * @see FormControl#getPredefinedValues() 236 */ getPredefinedValues()237 public Collection<String> getPredefinedValues() { 238 if (predefinedValues==null) return Collections.emptySet(); 239 return predefinedValues; 240 } 241 242 /** 243 * Returns a list of the <a href="#FieldSubmissionValues">field submission values</a> in order of appearance. 244 * <p> 245 * The term <i><a name="FieldSubmissionValues">field submission values</a></i> is used in this library to refer to the aggregate of all the 246 * <a href="FormControl.html#SubmissionValue">submission values</a> of a field's constituent {@linkplain #getFormControls() form controls}. 247 * <p> 248 * All objects in the returned list are of type <code>String</code>, with no <code>null</code> entries. 249 * <p> 250 * The list may contain duplicates if the this field has multiple controls with the same value. 251 * 252 * @return a list of the <a href="#FieldSubmissionValues">field submission values</a> in order of appearance, guaranteed not <code>null</code>. 253 */ getValues()254 public List<String> getValues() { 255 final List<String> values=new ArrayList<String>(); 256 for (FormControl formControl : formControls) formControl.addValuesTo(values); 257 return values; 258 } 259 260 /** 261 * Clears the <a href="FormControl.html#SubmissionValue">submission values</a> of all the constituent {@linkplain #getFormControls() form controls} in this field. 262 * @see FormControl#clearValues() 263 */ clearValues()264 public void clearValues() { 265 for (FormControl formControl : formControls) formControl.clearValues(); 266 } 267 268 /** 269 * Sets the <a href="#FieldSubmissionValues">field submission values</a> of this field to the specified values. 270 * <p> 271 * This is equivalent to calling {@link #clearValues()} followed by {@link #addValue(String) addValue(value)} for each 272 * value in the specified collection. 273 * <p> 274 * The specified collection must not contain any <code>null</code> values. 275 * 276 * @param values the new <a href="#FieldSubmissionValues">field submission values</a> of this field. 277 * @see #addValue(String value) 278 */ setValues(final Collection<String> values)279 public void setValues(final Collection<String> values) { 280 clearValues(); 281 addValues(values); 282 } 283 284 /** 285 * Sets the <a href="#FieldSubmissionValues">field submission values</a> of this field to the single specified value. 286 * <p> 287 * This is equivalent to calling {@link #clearValues()} followed by {@link #addValue(String) addValue(value)}. 288 * <p> 289 * The return value indicates whether any of the constituent form controls "accepted" the value. 290 * A return value of <code>false</code> implies an error condition as the specified value is not compatible with this field. 291 * <p> 292 * Specifying a <code>null</code> value is equivalent to calling {@link #clearValues()} alone, and always returns <code>true</code>. 293 * <p> 294 * See the {@link #addValue(String value)} method for more information. 295 * 296 * @param value the new <a href="#FieldSubmissionValues">field submission value</a> of this field, or <code>null</code> to {@linkplain #clearValues() clear} the field of all submission values. 297 * @return <code>true</code> if one of the constituent {@linkplain #getFormControls() form controls} accepts the value, otherwise <code>false</code>. 298 * @see FormFields#setValue(String fieldName, String value) 299 */ setValue(final String value)300 public boolean setValue(final String value) { 301 clearValues(); 302 return value!=null ? addValue(value) : true; 303 } 304 305 /** 306 * Adds the specified value to the <a href="#FieldSubmissionValues">field submission values</a> of this field. 307 * <p> 308 * This is achieved internally by attempting to {@linkplain FormControl#addValue(String) add the value} to every constituent 309 * {@linkplain #getFormControls() form control} until one "accepts" it. 310 * <p> 311 * The return value indicates whether any of the constituent form controls accepted the value. 312 * A return value of <code>false</code> implies an error condition as the specified value is not compatible with this field. 313 * <p> 314 * In the unusual case that this field consists of multiple form controls, but not all of them are 315 * <a href="FormControl.html#PredefinedValueControl">predefined value controls</a>, priority is given to the predefined value controls 316 * before attempting to add the value to the <a href="FormControl.html#UserValueControl">user value controls</a>. 317 * 318 * @param value the new <a href="#FieldSubmissionValues">field submission value</a> to add to this field, must not be <code>null</code>. 319 * @return <code>true</code> if one of the constituent {@linkplain #getFormControls() form controls} accepts the value, otherwise <code>false</code>. 320 */ addValue(final String value)321 public boolean addValue(final String value) { 322 if (value==null) throw new IllegalArgumentException("value argument must not be null"); 323 if (formControls.size()==1) return getFirstFormControl().addValue(value); 324 List<FormControl> userValueControls=null; 325 for (FormControl formControl : formControls) { 326 if (!formControl.getFormControlType().hasPredefinedValue()) { 327 // A user value control has been found, but is not the only control with this name. 328 // This shouldn't normally happen in a well designed form, but we will save the user value control 329 // for later and give all predefined value controls first opportunity to take the value. 330 if (userValueControls==null) userValueControls=new LinkedList<FormControl>(); 331 userValueControls.add(formControl); 332 continue; 333 } 334 if (formControl.addValue(value)) return true; // return value of true from formControl.addValue(value) means the value was taken by the control 335 } 336 if (userValueControls==null) return false; 337 for (FormControl userFormControl : userValueControls) { 338 if (userFormControl.addValue(value)) return true; 339 } 340 return false; 341 } 342 343 /** 344 * Returns a string representation of this object useful for debugging purposes. 345 * @return a string representation of this object useful for debugging purposes. 346 */ getDebugInfo()347 public String getDebugInfo() { 348 final StringBuilder sb=new StringBuilder(); 349 sb.append("Field: ").append(name).append(", UserValueCount=").append(userValueCount).append(", AllowsMultipleValues=").append(allowsMultipleValues); 350 if (predefinedValues!=null) { 351 for (String predefinedValue : predefinedValues) sb.append(Config.NewLine).append("PredefinedValue: ").append(predefinedValue); 352 } 353 for (FormControl formControl : formControls) sb.append(Config.NewLine).append("FormControl: ").append(formControl.getDebugInfo()); 354 sb.append(Config.NewLine).append(Config.NewLine); 355 return sb.toString(); 356 } 357 358 /** 359 * Returns a string representation of this object useful for debugging purposes. 360 * <p> 361 * This is equivalent to {@link #getDebugInfo()}. 362 * 363 * @return a string representation of this object useful for debugging purposes. 364 */ toString()365 public String toString() { 366 return getDebugInfo(); 367 } 368 addValues(final Collection<String> values)369 void addValues(final Collection<String> values) { 370 if (values!=null) for (String value : values) addValue(value); 371 } 372 addValues(final String[] values)373 void addValues(final String[] values) { 374 if (values!=null) for (String value : values) addValue(value); 375 } 376 addFormControl(final FormControl formControl, final String predefinedValue)377 void addFormControl(final FormControl formControl, final String predefinedValue) { 378 // predefinedValue==null if we are adding a user value 379 if (predefinedValue==null) { 380 userValueCount++; 381 } else { 382 if (predefinedValues==null) predefinedValues=new LinkedHashSet<String>(); 383 predefinedValues.add(predefinedValue); 384 } 385 formControls.add(formControl); 386 allowsMultipleValues=calculateAllowsMultipleValues(formControl); 387 } 388 calculateAllowsMultipleValues(final FormControl newFormControl)389 private boolean calculateAllowsMultipleValues(final FormControl newFormControl) { 390 // false if only one control (unless it is a multiple select with more than one option), 391 // or all of the controls are radio buttons, or all of the controls are submit buttons 392 if (allowsMultipleValues || userValueCount>1) return true; 393 if (userValueCount==1) return predefinedValues!=null; 394 // at this stage we know userValueCount==0 && predefinedValues.size()>=1 395 if (predefinedValues.size()==1) return false; 396 final FormControlType newFormControlType=newFormControl.getFormControlType(); 397 if (formControls.size()==1) return newFormControlType==FormControlType.SELECT_MULTIPLE; 398 // at this stage we know there are multiple predefined values in multiple controls. 399 // if all of the controls are radio buttons or all are submit buttons, allowsMultipleValues is false, otherwise true. 400 // checking only the first control and the new control is equivalent to checking them all because if they weren't all 401 // the same allowsMultipleValues would already be true. 402 final FormControlType firstFormControlType=getFirstFormControl().getFormControlType(); 403 if (newFormControlType==FormControlType.RADIO && firstFormControlType==FormControlType.RADIO) return false; 404 if (newFormControlType.isSubmit() && firstFormControlType.isSubmit()) return false; 405 return true; 406 } 407 getFirstFormControl()408 FormControl getFirstFormControl() { 409 // formControls must be ordered collection for this method to work. 410 // It has to return the first FormControl entered into the collection 411 // for the algorithm in calculateAllowsMultipleValues() to work. 412 if (firstFormControl==null) firstFormControl=formControls.iterator().next(); 413 return firstFormControl; 414 } 415 416 /** only called from FormFields class */ merge(final FormField formField)417 void merge(final FormField formField) { 418 if (formField.userValueCount>userValueCount) userValueCount=formField.userValueCount; 419 allowsMultipleValues=allowsMultipleValues || formField.allowsMultipleValues; 420 if (predefinedValues==null) { 421 predefinedValues=formField.predefinedValues; 422 } else if (formField.predefinedValues!=null) { 423 for (String predefinedValue : predefinedValues) predefinedValues.add(predefinedValue); 424 } 425 for (FormControl formControl : formField.getFormControls()) formControls.add(formControl); 426 } 427 } 428 429