1 // Jericho HTML Parser - Java based library for analysing and manipulating HTML
2 // Version 3.2
3 // Copyright (C) 2004-2009 Martin Jericho
4 // http://jericho.htmlparser.net/
5 //
6 // This library is free software; you can redistribute it and/or
7 // modify it under the terms of either one of the following licences:
8 //
9 // 1. The Eclipse Public License (EPL) version 1.0,
10 // included in this distribution in the file licence-epl-1.0.html
11 // or available at http://www.eclipse.org/legal/epl-v10.html
12 //
13 // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
14 // included in this distribution in the file licence-lgpl-2.1.txt
15 // or available at http://www.gnu.org/licenses/lgpl.txt
16 //
17 // This library is distributed on an "AS IS" basis,
18 // WITHOUT WARRANTY OF ANY KIND, either express or implied.
19 // See the individual licence texts for more details.
20 
21 package net.htmlparser.jericho;
22 
23 import java.util.*;
24 
25 /**
26  * Represents the <a target="_blank" href="http://www.w3.org/TR/html401/intro/sgmltut.html#didx-element-3">end tag</a> of an
27  * {@linkplain Element element} in a specific {@linkplain Source source} document.
28  * <p>
29  * An end tag always has a {@linkplain #getTagType() type} that is a subclass of {@link EndTagType}, meaning it
30  * always starts with the characters '<code>&lt;/</code>'.
31  * <p>
32  * <code>EndTag</code> instances are obtained using one of the following methods:
33  * <ul>
34  *  <li>{@link Element#getEndTag()}
35  *  <li>{@link Tag#getNextTag()}
36  *  <li>{@link Tag#getPreviousTag()}
37  *  <li>{@link Source#getPreviousEndTag(int pos)}
38  *  <li>{@link Source#getPreviousEndTag(int pos, String name)}
39  *  <li>{@link Source#getPreviousTag(int pos)}
40  *  <li>{@link Source#getPreviousTag(int pos, TagType)}
41  *  <li>{@link Source#getNextEndTag(int pos)}
42  *  <li>{@link Source#getNextEndTag(int pos, String name)}
43  *  <li>{@link Source#getNextEndTag(int pos, String name, EndTagType)}
44  *  <li>{@link Source#getNextTag(int pos)}
45  *  <li>{@link Source#getNextTag(int pos, TagType)}
46  *  <li>{@link Source#getEnclosingTag(int pos)}
47  *  <li>{@link Source#getEnclosingTag(int pos, TagType)}
48  *  <li>{@link Source#getTagAt(int pos)}
49  *  <li>{@link Segment#getAllTags()}
50  *  <li>{@link Segment#getAllTags(TagType)}
51  * </ul>
52  * <p>
53  * The {@link Tag} superclass defines the {@link Tag#getName() getName()} method used to get the name of this end tag.
54  * <p>
55  * See also the XML 1.0 specification for <a target="_blank" href="http://www.w3.org/TR/REC-xml#dt-etag">end tags</a>.
56  *
57  * @see Tag
58  * @see StartTag
59  * @see Element
60  */
61 public final class EndTag extends Tag {
62 	private final EndTagType endTagType;
63 
64 	/**
65 	 * Constructs a new <code>EndTag</code>.
66 	 *
67 	 * @param source  the {@link Source} document.
68 	 * @param begin  the character position in the source document where this tag {@linkplain Segment#getBegin() begins}.
69 	 * @param end  the character position in the source document where this tag {@linkplain Segment#getEnd() ends}.
70 	 * @param endTagType  the {@linkplain #getEndTagType() type} of the end tag.
71 	 * @param name  the {@linkplain Tag#getName() name} of the tag.
72 	 */
EndTag(final Source source, final int begin, final int end, final EndTagType endTagType, final String name)73 	EndTag(final Source source, final int begin, final int end, final EndTagType endTagType, final String name) {
74 		super(source,begin,end,name);
75 		this.endTagType=endTagType;
76 	}
77 
78 	/**
79 	 * Returns the {@linkplain Element element} that is ended by this end tag.
80 	 * <p>
81 	 * Returns <code>null</code> if this end tag is not properly matched to any {@linkplain StartTag start tag} in the source document.
82 	 * <p>
83 	 * This method is much less efficient than the {@link StartTag#getElement()} method.
84 	 * <p>
85 	 * IMPLEMENTATION NOTE: The explanation for why this method is relatively inefficient lies in the fact that more than one
86 	 * {@linkplain StartTagType start tag type} can have the same
87 	 * {@linkplain StartTagType#getCorrespondingEndTagType() corresponding end tag type}, so it is not possible to know for certain
88 	 * which type of start tag this end tag is matched to (see {@link EndTagType#getCorrespondingStartTagType()} for more explanation).
89 	 * Because of this uncertainty, the implementation of this method must check every start tag preceding this end tag, calling its
90 	 * {@link StartTag#getElement()} method to see whether it is terminated by this end tag.
91 	 *
92 	 * @return the {@linkplain Element element} that is ended by this end tag.
93 	 */
getElement()94 	public Element getElement() {
95 		if (element!=Element.NOT_CACHED) return element;
96 		int pos=begin;
97 		while (pos!=0) {
98 			StartTag startTag=source.getPreviousStartTag(pos-1);
99 			if (startTag==null) break;
100 			Element foundElement=startTag.getElement(); // this automatically sets foundElement.getEndTag().element cache
101 			if (foundElement.getEndTag()==this) return foundElement; // no need to set element as it was already done in previous statement
102 			pos=startTag.begin;
103 		}
104 		return element=null;
105 	}
106 
107 	/**
108 	 * Returns the {@linkplain EndTagType type} of this end tag.
109 	 * <p>
110 	 * This is equivalent to <code>(EndTagType)</code>{@link #getTagType()}.
111 	 *
112 	 * @return the {@linkplain EndTagType type} of this end tag.
113 	 */
getEndTagType()114 	public EndTagType getEndTagType() {
115 		return endTagType;
116 	}
117 
118 	// Documentation inherited from Tag
getTagType()119 	public TagType getTagType() {
120 		return endTagType;
121 	}
122 
123 	// Documentation inherited from Tag
isUnregistered()124 	public boolean isUnregistered() {
125 		return endTagType==EndTagType.UNREGISTERED;
126 	}
127 
128 	/**
129 	 * Returns an XML representation of this end tag.
130 	 * <p>
131 	 * This method is included for symmetry with the {@link StartTag#tidy()} method and simply
132 	 * returns the {@linkplain Segment#toString() source text} of the tag.
133 	 *
134 	 * @return an XML representation of this end tag.
135 	 */
tidy()136 	public String tidy() {
137 		return toString();
138 	}
139 
140 	/**
141 	 * Generates the HTML text of a {@linkplain EndTagType#NORMAL normal} end tag with the specified tag {@linkplain #getName() name}.
142 	 * <p>
143 	 * <dl>
144 	 *  <dt>Example:</dt>
145 	 *  <dd>
146 	 *   <p>
147 	 *   The following method call:
148 	 *   <blockquote class="code">
149 	 *    <code>EndTag.generateHTML("INPUT")</code>
150 	 *   </blockquote>
151 	 *   returns the following output:
152 	 *   <blockquote class="code">
153 	 *    <code>&lt;/INPUT&gt;</code>
154 	 *   </blockquote>
155 	 *  </dd>
156 	 * </dl>
157 	 *
158 	 * @param tagName  the {@linkplain #getName() name} of the end tag.
159 	 * @return the HTML text of a {@linkplain EndTagType#NORMAL normal} end tag with the specified tag {@linkplain #getName() name}.
160 	 * @see StartTag#generateHTML(String tagName, Map attributesMap, boolean emptyElementTag)
161 	 */
generateHTML(final String tagName)162 	public static String generateHTML(final String tagName) {
163 		return EndTagType.NORMAL.generateHTML(tagName);
164 	}
165 
getDebugInfo()166 	public String getDebugInfo() {
167 		final StringBuilder sb=new StringBuilder();
168 		sb.append(this).append(' ');
169 		if (endTagType!=EndTagType.NORMAL) sb.append('(').append(endTagType.getDescription()).append(") ");
170 		sb.append(super.getDebugInfo());
171 		return sb.toString();
172 	}
173 
174 	/**
175 	 * Returns the previous end tag matching the specified {@linkplain #getName() name} and {@linkplain EndTagType type}, starting at the specified position.
176 	 * <p>
177 	 * Called from {@link Source#getPreviousEndTag(int pos, String name)}.
178 	 *
179 	 * @param source  the {@link Source} document.
180 	 * @param pos  the position to search from.
181 	 * @param name  the {@linkplain #getName() name} of the tag including its {@linkplain TagType#getNamePrefix() prefix} (must be lower case, may be null).
182 	 * @param endTagType the {@linkplain EndTagType type} of end tag to search for.
183 	 * @return the previous end tag matching the specified {@linkplain #getName() name} and {@linkplain EndTagType type}, starting at the specified position, or null if none is found.
184 	 */
getPrevious(final Source source, final int pos, final String name, final EndTagType endTagType)185 	static EndTag getPrevious(final Source source, final int pos, final String name, final EndTagType endTagType) {
186 		if (name==null) return (EndTag)Tag.getPreviousTag(source,pos,endTagType);
187 		if (name.length()==0) throw new IllegalArgumentException("name argument must not be zero length");
188 		final String searchString=endTagType.START_DELIMITER_PREFIX+name;
189 		try {
190 			final ParseText parseText=source.getParseText();
191 			int begin=pos;
192 			do {
193 				begin=parseText.lastIndexOf(searchString,begin);
194 				if (begin==-1) return null;
195 				final EndTag endTag=(EndTag)source.getTagAt(begin);
196 				if (endTag!=null && endTag.getEndTagType()==endTagType && name.equals(endTag.getName())) return endTag;
197 			} while ((begin-=1)>=0);
198 		} catch (IndexOutOfBoundsException ex) {
199 			// this should never happen during a get previous operation so rethrow it:
200 			throw ex;
201 		}
202 		return null;
203 	}
204 
205 	/**
206 	 * Returns the next end tag matching the specified {@linkplain #getName() name} and {@linkplain EndTagType type}, starting at the specified position.
207 	 * <p>
208 	 * Called from {@link Source#getNextEndTag(int pos, String name, EndTagType endTagType)}.
209 	 *
210 	 * @param source  the {@link Source} document.
211 	 * @param pos  the position to search from.
212 	 * @param name  the {@linkplain #getName() name} of the tag including its {@linkplain TagType#getNamePrefix() prefix} (must be lower case, may be null).
213 	 * @param endTagType the {@linkplain EndTagType type} of end tag to search for.
214 	 * @return the next end tag matching the specified {@linkplain #getName() name} and {@linkplain EndTagType type}, starting at the specified position, or null if none is found.
215 	 */
getNext(final Source source, final int pos, final String name, final EndTagType endTagType)216 	static EndTag getNext(final Source source, final int pos, final String name, final EndTagType endTagType) {
217 		if (name==null) return (EndTag)Tag.getNextTag(source,pos,endTagType);
218 		if (name.length()==0) throw new IllegalArgumentException("name argument must not be zero length");
219 		final String searchString=endTagType.START_DELIMITER_PREFIX+name;
220 		try {
221 			final ParseText parseText=source.getParseText();
222 			int begin=pos;
223 			do {
224 				begin=parseText.indexOf(searchString,begin);
225 				if (begin==-1) return null;
226 				final EndTag endTag=(EndTag)source.getTagAt(begin);
227 				if (endTag!=null && endTag.getEndTagType()==endTagType && name.equals(endTag.getName())) return endTag;
228 			} while ((begin+=1)<source.end);
229 		} catch (IndexOutOfBoundsException ex) {
230 			// this should only happen when the end of file is reached in the middle of a tag.
231 			// we don't have to do anything to handle it as there will be no more tags anyway.
232 		}
233 		return null;
234 	}
235 
getPrevious(final Source source, int pos)236 	static EndTag getPrevious(final Source source, int pos) {
237 		while (true) {
238 			final Tag tag=Tag.getPreviousTag(source,pos);
239 			if (tag==null) return null;
240 			if (tag instanceof EndTag) return (EndTag)tag;
241 			pos-=1;
242 		}
243 	}
244 
getNext(final Source source, int pos)245 	static EndTag getNext(final Source source, int pos) {
246 		while (true) {
247 			final Tag tag=Tag.getNextTag(source,pos);
248 			if (tag==null) return null;
249 			if (tag instanceof EndTag) return (EndTag)tag;
250 			pos+=1;
251 		}
252 	}
253 }
254 
255