1 // Jericho HTML Parser - Java based library for analysing and manipulating HTML
2 // Version 3.2
3 // Copyright (C) 2004-2009 Martin Jericho
4 // http://jericho.htmlparser.net/
5 //
6 // This library is free software; you can redistribute it and/or
7 // modify it under the terms of either one of the following licences:
8 //
9 // 1. The Eclipse Public License (EPL) version 1.0,
10 // included in this distribution in the file licence-epl-1.0.html
11 // or available at http://www.eclipse.org/legal/epl-v10.html
12 //
13 // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
14 // included in this distribution in the file licence-lgpl-2.1.txt
15 // or available at http://www.gnu.org/licenses/lgpl.txt
16 //
17 // This library is distributed on an "AS IS" basis,
18 // WITHOUT WARRANTY OF ANY KIND, either express or implied.
19 // See the individual licence texts for more details.
20 
21 package net.htmlparser.jericho;
22 
23 import java.util.*;
24 
25 /**
26  * Represents a <em>field</em> in an HTML <a target="_blank" href="http://www.w3.org/TR/html401/interact/forms.html">form</a>,
27  * a <em>field</em> being defined as the group of all {@linkplain FormControl form controls}
28  * having the same {@linkplain FormControl#getName() name}.
29  * <p>
30  * The {@link #getFormControls()} method can be used to obtain the collection of this field's constituent
31  * {@link FormControl} objects.
32  * <p>
33  * The {@link FormFields} class, which represents a collection of <code>FormField</code> objects, provides the highest level
34  * interface for dealing with form fields and controls.  For the most common tasks it can be used directly without
35  * the need to work with its constituent <code>FormField</code> or {@link FormControl} objects.
36  * <p>
37  * The <code>FormField</code> class serves two main purposes:
38  * <ol>
39  *  <li style="margin-bottom: 1.5em">
40  *   Provide methods for the modification and retrieval of form control <a href="FormControl.html#SubmissionValue">submission values</a>
41  *   while ensuring that the states of all the field's constituent form controls remain consistent with each other.
42  *   <p>
43  *   The methods available for this purpose are:<br />
44  *   {@link #getValues() List getValues()}<br />
45  *   {@link #clearValues() void clearValues()}<br />
46  *   {@link #setValues(Collection) void setValues(Collection)}<br />
47  *   {@link #setValue(String) boolean setValue(String)}<br />
48  *   {@link #addValue(String) boolean addValue(String)}<br />
49  *   <p>
50  *   Although the {@link FormControl} class provides methods for directly modifying the submission values
51  *   of individual form controls, it is generally recommended to use the interface provided by the {@link FormFields} class
52  *   unless there is a specific requirement for the lower level functionality.
53  *   The {@link FormFields} class contains convenience methods providing most of the functionality of the above methods,
54  *   as well as some higher level functionality such as the ability to set the form
55  *   <a href="#SubmissionValue">submission values</a> as a complete <a href="FormFields.html#FieldDataSet">field data set</a>
56  *   using the {@link FormFields#setDataSet(Map)} method.
57  *  <li><a name="DataStructureProperties"></a>
58  *   Provide a means of determining the data structure of the field, allowing a server receiving a
59  *   <a target="_blank" href="http://www.w3.org/TR/html401/interact/forms.html#submit-format">submitted</a>
60  *   <a target="_blank" href="http://www.w3.org/TR/html401/interact/forms.html#form-data-set">form data set</a>
61  *   to interpret and store the data in an appropriate way.
62  *   <p>
63  *   The properties available for this purpose are:<br />
64  *   {@link #allowsMultipleValues() boolean allowsMultipleValues()}<br />
65  *   {@link #getUserValueCount() int getUserValueCount()}<br />
66  *   {@link #getPredefinedValues() Collection getPredefinedValues()}<br />
67  *   <p>
68  *   The {@link FormFields#getColumnLabels()} and {@link FormFields#getColumnValues(Map)} methods utilise these properties
69  *   to convert data from a <a target="_blank" href="http://www.w3.org/TR/html401/interact/forms.html#form-data-set">form data set</a>
70  *   (represented as a <a href="#FieldDataSet">field data set</a>) into a simple array format,
71  *   suitable for storage in a tabular format such as a database table or <code>.CSV</code> file.
72  *   <p>
73  *   The properties need only be utilised directly in the event that a
74  *   <a target="_blank" href="http://www.w3.org/TR/html401/interact/forms.html#form-data-set">form data set</a> is to be converted
75  *   from its <a href="FormFields.html#FieldDataSet">normal format</a> into some other type of data structure.
76  * </ol>
77  * A form field which allows user values normally consists of a single
78  * <a href="FormControl.html#UserValueControl">user value control</a>,
79  * such as a {@link FormControlType#TEXT TEXT} control.
80  * <p>
81  * When a form field consists of more than one control, these controls are normally all
82  * <a href="FormControl.html#PredefinedValueControl">predefined value controls</a> of the same
83  * {@linkplain FormControlType type}, such as {@link FormControlType#CHECKBOX CHECKBOX} controls.
84  * <p>
85  * Form fields consisting of more than one control do not necessarily return {@linkplain #allowsMultipleValues() multiple values}.
86  * A form field consisting of {@link FormControlType#CHECKBOX CHECKBOX} controls can return multiple values, whereas
87  * a form field consisting of {@link FormControlType#CHECKBOX RADIO} controls returns at most one value.
88  * <p>
89  * The HTML author can disregard convention and mix all types of controls with the same name in the same form,
90  * or include multiple <a href="FormControl.html#UserValueControl">user value controls</a> of the same name.
91  * The evidence that such an unusual combination is present is when {@link #getUserValueCount()}<code>&gt;1</code>.
92  * <p>
93  * <code>FormField</code> instances are created automatically with the creation of a {@link FormFields} collection.
94  * <p>
95  * The case sensitivity of form field names is determined by the static
96  * {@link Config#CurrentCompatibilityMode}<code>.</code>{@link Config.CompatibilityMode#isFormFieldNameCaseInsensitive() FormFieldNameCaseInsensitive} property.
97  *
98  * @see FormFields
99  * @see FormControl
100  * @see FormControlType
101  */
102 public final class FormField {
103 	private final String name;
104 	private int userValueCount=0;
105 	private boolean allowsMultipleValues=false;
106 	private LinkedHashSet<String> predefinedValues=null; // String objects, null if none
107 	private final LinkedHashSet<FormControl> formControls=new LinkedHashSet<FormControl>();
108 	private transient FormControl firstFormControl=null; // this field is simply a cache for the getFirstFormControl() method
109 	int columnIndex; // see FormFields.initColumns()
110 
111 	/** Constructor called from FormFields class. */
FormField(final String name)112 	FormField(final String name) {
113 		this.name=name;
114 	}
115 
116 	/**
117 	 * Returns the <a target="_blank" href="http://www.w3.org/TR/html401/interact/forms.html#control-name">control name</a> shared by all of this field's constituent {@linkplain FormControl controls}.
118 	 * <p>
119 	 * If the static {@link Config#CurrentCompatibilityMode}<code>.</code>{@link Config.CompatibilityMode#isFormFieldNameCaseInsensitive() isFormFieldNameCaseInsensitive()}
120 	 * property is set to <code>true</code>, the grouping of the controls by name is case insensitive
121 	 * and this method always returns the name in lower case.
122 	 * <p>
123 	 * Since a form field is simply a group of controls with the same name, the terms <i>control name</i> and
124 	 * <i>field name</i> are for the most part synonymous, with only a possible difference in case differentiating them.
125 	 *
126 	 * @return the <a target="_blank" href="http://www.w3.org/TR/html401/interact/forms.html#control-name">control name</a> shared by all of this field's constituent {@linkplain FormControl controls}.
127 	 * @see FormControl#getName()
128 	 */
getName()129 	public String getName() {
130 		return name;
131 	}
132 
133 	/**
134 	 * Returns a collection of all the constituent {@linkplain FormControl form controls} in this field.
135 	 * <p>
136 	 * An iterator over this collection returns the controls in the order of appearance in the source.
137 	 *
138 	 * @return a collection of all the constituent {@linkplain FormControl form controls} in this field.
139 	 * @see #getFormControl()
140 	 * @see #getFormControl(String predefinedValue)
141 	 */
getFormControls()142 	public Collection<FormControl> getFormControls() {
143 		return formControls;
144 	}
145 
146 	/**
147 	 * Returns the constituent {@link FormControl} with the specified {@linkplain FormControl#getPredefinedValue() predefined value}.
148 	 * <p>
149 	 * Specifying a predefined value of <code>null</code> returns the first control without a predefined value.
150 	 *
151 	 * @param predefinedValue  the predefined value of the control to be returned, or <code>null</code> to return the first control without a predefined value.
152 	 * @return the constituent {@link FormControl} with the specified {@linkplain FormControl#getPredefinedValue() predefined value}, or <code>null</code> if none exists.
153 	 * @see #getFormControl()
154 	 * @see #getFormControls()
155 	 */
getFormControl(final String predefinedValue)156 	public FormControl getFormControl(final String predefinedValue) {
157 		if (predefinedValue==null) {
158 			for (FormControl formControl : formControls) {
159 				if (!formControl.getFormControlType().hasPredefinedValue()) return formControl;
160 				if (formControl.getFormControlType().getElementName()!=HTMLElementName.SELECT && formControl.getPredefinedValue()==null) return formControl;
161 			}
162 		} else {
163 			for (FormControl formControl : formControls) {
164 				if (formControl.getFormControlType().getElementName()==HTMLElementName.SELECT) {
165 					if (formControl.getPredefinedValues().contains(predefinedValue)) return formControl;
166 				} else {
167 					if (predefinedValue.equals(formControl.getPredefinedValue())) return formControl;
168 				}
169 			}
170 		}
171 		return null;
172 	}
173 
174 	/**
175 	 * Returns the first {@link FormControl} from this field.
176 	 * @return the first {@link FormControl} from this field, guaranteed not <code>null</code>.
177 	 * @see #getFormControl(String predefinedValue)
178 	 * @see #getFormControls()
179 	 */
getFormControl()180 	public FormControl getFormControl() {
181 		return formControls.iterator().next();
182 	}
183 
184 	/**
185 	 * Indicates whether the field allows multiple values.
186 	 * <p>
187 	 * Returns <code>false</code> in any one of the following circumstances:
188 	 * <ul>
189 	 *  <li>The field consists of only one control (unless it is a
190 	 *   {@linkplain FormControlType#SELECT_MULTIPLE multiple select} with more than one option)
191 	 *  <li>The field consists entirely of {@linkplain FormControlType#RADIO radio buttons}
192 	 *  <li>The field consists entirely of {@linkplain FormControlType#isSubmit() submit} buttons
193 	 * </ul>
194 	 * If none of these three conditions are met, the method returns <code>true</code>.
195 	 *
196 	 * @return <code>true</code> if the field allows multiple values, otherwise <code>false</code>.
197 	 */
allowsMultipleValues()198 	public boolean allowsMultipleValues() {
199 		return allowsMultipleValues;
200 	}
201 
202 	/**
203 	 * Returns the number of constituent <a href="FormControl.html#UserValueControl">user value controls</a> in this field.
204 	 * This should in most cases be either <code>0</code> or <code>1</code>.
205 	 * <p>
206 	 * A value of <code>0</code> indicates the field values consist only of
207 	 * {@linkplain #getPredefinedValues() predefined values}, which is the case when the field consists only of
208 	 * <a href="FormControl.html#PredefinedValueControl">predefined value controls</a>.
209 	 * <p>
210 	 * A value of <code>1</code> indicates the field values consist of at most one value set by the user.
211 	 * It is still possible in this case to receive multiple values in the unlikely event that the HTML author mixed
212 	 * controls of different types with the same name, but any other values would consist only of
213 	 * {@linkplain #getPredefinedValues() predefined values}.
214 	 * <p>
215 	 * A value greater than <code>1</code> indicates that the HTML author has included more than one
216 	 * <a href="FormControl.html#UserValueControl">user value control</a> with the same name.
217 	 * This would nearly always indicate an unintentional error in the HTML source document,
218 	 * in which case your application can either log a warning that a poorly designed form has been encountered,
219 	 * or take special action to try to interpret the multiple user values that might be submitted.
220 	 *
221 	 * @return the number of constituent <a href="FormControl.html#UserValueControl">user value controls</a> in this field.
222 	 */
getUserValueCount()223 	public int getUserValueCount() {
224 		return userValueCount;
225 	}
226 
227 	/**
228 	 * Returns a collection of the {@linkplain FormControl#getPredefinedValue() predefined values} of all constituent {@linkplain FormControl controls} in this field.
229 	 * <p>
230 	 * All objects in the returned collection are of type <code>String</code>, with no <code>null</code> entries.
231 	 * <p>
232 	 * An interator over this collection returns the values in the order of appearance in the source document.
233 	 *
234 	 * @return a collection of the {@linkplain FormControl#getPredefinedValue() predefined values} of all constituent {@linkplain FormControl controls} in this field, or <code>null</code> if none.
235 	 * @see FormControl#getPredefinedValues()
236 	 */
getPredefinedValues()237 	public Collection<String> getPredefinedValues() {
238 		if (predefinedValues==null) return Collections.emptySet();
239 		return predefinedValues;
240 	}
241 
242 	/**
243 	 * Returns a list of the <a href="#FieldSubmissionValues">field submission values</a> in order of appearance.
244 	 * <p>
245 	 * The term <i><a name="FieldSubmissionValues">field submission values</a></i> is used in this library to refer to the aggregate of all the
246 	 * <a href="FormControl.html#SubmissionValue">submission values</a> of a field's constituent {@linkplain #getFormControls() form controls}.
247 	 * <p>
248 	 * All objects in the returned list are of type <code>String</code>, with no <code>null</code> entries.
249 	 * <p>
250 	 * The list may contain duplicates if the this field has multiple controls with the same value.
251 	 *
252 	 * @return a list of the <a href="#FieldSubmissionValues">field submission values</a> in order of appearance, guaranteed not <code>null</code>.
253 	 */
getValues()254 	public List<String> getValues() {
255 		final List<String> values=new ArrayList<String>();
256 		for (FormControl formControl : formControls) formControl.addValuesTo(values);
257 		return values;
258 	}
259 
260 	/**
261 	 * Clears the <a href="FormControl.html#SubmissionValue">submission values</a> of all the constituent {@linkplain #getFormControls() form controls} in this field.
262 	 * @see FormControl#clearValues()
263 	 */
clearValues()264 	public void clearValues() {
265 		for (FormControl formControl : formControls) formControl.clearValues();
266 	}
267 
268 	/**
269 	 * Sets the <a href="#FieldSubmissionValues">field submission values</a> of this field to the specified values.
270 	 * <p>
271 	 * This is equivalent to calling {@link #clearValues()} followed by {@link #addValue(String) addValue(value)} for each
272 	 * value in the specified collection.
273 	 * <p>
274 	 * The specified collection must not contain any <code>null</code> values.
275 	 *
276 	 * @param values  the new <a href="#FieldSubmissionValues">field submission values</a> of this field.
277 	 * @see #addValue(String value)
278 	 */
setValues(final Collection<String> values)279 	public void setValues(final Collection<String> values) {
280 		clearValues();
281 		addValues(values);
282 	}
283 
284 	/**
285 	 * Sets the <a href="#FieldSubmissionValues">field submission values</a> of this field to the single specified value.
286 	 * <p>
287 	 * This is equivalent to calling {@link #clearValues()} followed by {@link #addValue(String) addValue(value)}.
288 	 * <p>
289 	 * The return value indicates whether any of the constituent form controls "accepted" the value.
290 	 * A return value of <code>false</code> implies an error condition as the specified value is not compatible with this field.
291 	 * <p>
292 	 * Specifying a <code>null</code> value is equivalent to calling {@link #clearValues()} alone, and always returns <code>true</code>.
293 	 * <p>
294 	 * See the {@link #addValue(String value)} method for more information.
295 	 *
296 	 * @param value  the new <a href="#FieldSubmissionValues">field submission value</a> of this field, or <code>null</code> to {@linkplain #clearValues() clear} the field of all submission values.
297 	 * @return <code>true</code> if one of the constituent {@linkplain #getFormControls() form controls} accepts the value, otherwise <code>false</code>.
298 	 * @see FormFields#setValue(String fieldName, String value)
299 	 */
setValue(final String value)300 	public boolean setValue(final String value) {
301 		clearValues();
302 		return value!=null ? addValue(value) : true;
303 	}
304 
305 	/**
306 	 * Adds the specified value to the <a href="#FieldSubmissionValues">field submission values</a> of this field.
307 	 * <p>
308 	 * This is achieved internally by attempting to {@linkplain FormControl#addValue(String) add the value} to every constituent
309 	 * {@linkplain #getFormControls() form control} until one "accepts" it.
310 	 * <p>
311 	 * The return value indicates whether any of the constituent form controls accepted the value.
312 	 * A return value of <code>false</code> implies an error condition as the specified value is not compatible with this field.
313 	 * <p>
314 	 * In the unusual case that this field consists of multiple form controls, but not all of them are
315 	 * <a href="FormControl.html#PredefinedValueControl">predefined value controls</a>, priority is given to the predefined value controls
316 	 * before attempting to add the value to the <a href="FormControl.html#UserValueControl">user value controls</a>.
317 	 *
318 	 * @param value  the new <a href="#FieldSubmissionValues">field submission value</a> to add to this field, must not be <code>null</code>.
319 	 * @return <code>true</code> if one of the constituent {@linkplain #getFormControls() form controls} accepts the value, otherwise <code>false</code>.
320 	 */
addValue(final String value)321 	public boolean addValue(final String value) {
322 		if (value==null) throw new IllegalArgumentException("value argument must not be null");
323 		if (formControls.size()==1) return getFirstFormControl().addValue(value);
324 		List<FormControl> userValueControls=null;
325 		for (FormControl formControl : formControls) {
326 			if (!formControl.getFormControlType().hasPredefinedValue()) {
327 				// A user value control has been found, but is not the only control with this name.
328 				// This shouldn't normally happen in a well designed form, but we will save the user value control
329 				// for later and give all predefined value controls first opportunity to take the value.
330 				if (userValueControls==null) userValueControls=new LinkedList<FormControl>();
331 				userValueControls.add(formControl);
332 				continue;
333 			}
334 			if (formControl.addValue(value)) return true; // return value of true from formControl.addValue(value) means the value was taken by the control
335 		}
336 		if (userValueControls==null) return false;
337 		for (FormControl userFormControl : userValueControls) {
338 			if (userFormControl.addValue(value)) return true;
339 		}
340 		return false;
341 	}
342 
343 	/**
344 	 * Returns a string representation of this object useful for debugging purposes.
345 	 * @return a string representation of this object useful for debugging purposes.
346 	 */
getDebugInfo()347 	public String getDebugInfo() {
348 		final StringBuilder sb=new StringBuilder();
349 		sb.append("Field: ").append(name).append(", UserValueCount=").append(userValueCount).append(", AllowsMultipleValues=").append(allowsMultipleValues);
350 		if (predefinedValues!=null) {
351 			for (String predefinedValue : predefinedValues) sb.append(Config.NewLine).append("PredefinedValue: ").append(predefinedValue);
352 		}
353 		for (FormControl formControl : formControls) sb.append(Config.NewLine).append("FormControl: ").append(formControl.getDebugInfo());
354 		sb.append(Config.NewLine).append(Config.NewLine);
355 		return sb.toString();
356 	}
357 
358 	/**
359 	 * Returns a string representation of this object useful for debugging purposes.
360 	 * <p>
361 	 * This is equivalent to {@link #getDebugInfo()}.
362 	 *
363 	 * @return a string representation of this object useful for debugging purposes.
364 	 */
toString()365 	public String toString() {
366 		return getDebugInfo();
367 	}
368 
addValues(final Collection<String> values)369 	void addValues(final Collection<String> values) {
370 		if (values!=null) for (String value : values) addValue(value);
371 	}
372 
addValues(final String[] values)373 	void addValues(final String[] values) {
374 		if (values!=null) for (String value : values) addValue(value);
375 	}
376 
addFormControl(final FormControl formControl, final String predefinedValue)377 	void addFormControl(final FormControl formControl, final String predefinedValue) {
378 		// predefinedValue==null if we are adding a user value
379 		if (predefinedValue==null) {
380 			userValueCount++;
381 		} else {
382 			if (predefinedValues==null) predefinedValues=new LinkedHashSet<String>();
383 			predefinedValues.add(predefinedValue);
384 		}
385 		formControls.add(formControl);
386 		allowsMultipleValues=calculateAllowsMultipleValues(formControl);
387 	}
388 
calculateAllowsMultipleValues(final FormControl newFormControl)389 	private boolean calculateAllowsMultipleValues(final FormControl newFormControl) {
390 		// false if only one control (unless it is a multiple select with more than one option),
391 		// or all of the controls are radio buttons, or all of the controls are submit buttons
392 		if (allowsMultipleValues || userValueCount>1) return true;
393 		if (userValueCount==1) return predefinedValues!=null;
394 		// at this stage we know userValueCount==0  && predefinedValues.size()>=1
395 		if (predefinedValues.size()==1) return false;
396 		final FormControlType newFormControlType=newFormControl.getFormControlType();
397 		if (formControls.size()==1) return newFormControlType==FormControlType.SELECT_MULTIPLE;
398 		// at this stage we know there are multiple predefined values in multiple controls.
399 		// if all of the controls are radio buttons or all are submit buttons, allowsMultipleValues is false, otherwise true.
400 		// checking only the first control and the new control is equivalent to checking them all because if they weren't all
401 		// the same allowsMultipleValues would already be true.
402 		final FormControlType firstFormControlType=getFirstFormControl().getFormControlType();
403 		if (newFormControlType==FormControlType.RADIO && firstFormControlType==FormControlType.RADIO) return false;
404 		if (newFormControlType.isSubmit() && firstFormControlType.isSubmit()) return false;
405 		return true;
406 	}
407 
getFirstFormControl()408 	FormControl getFirstFormControl() {
409 		// formControls must be ordered collection for this method to work.
410 		// It has to return the first FormControl entered into the collection
411 		// for the algorithm in calculateAllowsMultipleValues() to work.
412 		if (firstFormControl==null) firstFormControl=formControls.iterator().next();
413 		return firstFormControl;
414 	}
415 
416 	/** only called from FormFields class */
merge(final FormField formField)417 	void merge(final FormField formField) {
418 		if (formField.userValueCount>userValueCount) userValueCount=formField.userValueCount;
419 		allowsMultipleValues=allowsMultipleValues || formField.allowsMultipleValues;
420 		if (predefinedValues==null) {
421 			predefinedValues=formField.predefinedValues;
422 		} else if (formField.predefinedValues!=null) {
423 			for (String predefinedValue : predefinedValues) predefinedValues.add(predefinedValue);
424 		}
425 		for (FormControl formControl : formField.getFormControls()) formControls.add(formControl);
426 	}
427 }
428 
429