1 // Jericho HTML Parser - Java based library for analysing and manipulating HTML
2 // Version 3.2
3 // Copyright (C) 2004-2009 Martin Jericho
4 // http://jericho.htmlparser.net/
5 //
6 // This library is free software; you can redistribute it and/or
7 // modify it under the terms of either one of the following licences:
8 //
9 // 1. The Eclipse Public License (EPL) version 1.0,
10 // included in this distribution in the file licence-epl-1.0.html
11 // or available at http://www.eclipse.org/legal/epl-v10.html
12 //
13 // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
14 // included in this distribution in the file licence-lgpl-2.1.txt
15 // or available at http://www.gnu.org/licenses/lgpl.txt
16 //
17 // This library is distributed on an "AS IS" basis,
18 // WITHOUT WARRANTY OF ANY KIND, either express or implied.
19 // See the individual licence texts for more details.
20 
21 package net.htmlparser.jericho;
22 
23 import java.util.*;
24 
25 /**
26  * Iterates over the "nodes" in a segment.
27  * <p>
28  * Every object returned is a Segment.  All tags found with the Segment.getAllTags() method are included, as well as segments representing the plain text in between them,
29  * and character references within the plain text are also included as separate nodes.
30  */
31 class NodeIterator implements Iterator<Segment> {
32 	private final Segment segment;
33 	private final Source source;
34 	private int pos;
35 	private Tag nextTag;
36 	private CharacterReference characterReferenceAtCurrentPosition=null;
37 
38 	private final boolean legacyIteratorCompatabilityMode=Source.LegacyIteratorCompatabilityMode;
39 
NodeIterator(final Segment segment)40 	public NodeIterator(final Segment segment) {
41 		this.segment=segment;
42 		source=segment.source;
43 		if (segment==source) source.fullSequentialParse();
44 		pos=segment.begin;
45 		nextTag=source.getNextTag(pos);
46 		if (nextTag!=null && nextTag.begin>=segment.end) nextTag=null;
47 	}
48 
hasNext()49 	public boolean hasNext() {
50 		return pos<segment.end || nextTag!=null;
51 	}
52 
next()53 	public Segment next() {
54 		final int oldPos=pos;
55 		if (nextTag!=null) {
56 			if (oldPos<nextTag.begin) return nextNonTagSegment(oldPos,nextTag.begin);
57 			final Tag tag=nextTag;
58 			nextTag=nextTag.getNextTag();
59 			if (nextTag!=null && nextTag.begin>=segment.end) nextTag=null;
60 			if (pos<tag.end) pos=tag.end;
61 			return tag;
62 		} else {
63 			if (!hasNext()) throw new NoSuchElementException();
64 			return nextNonTagSegment(oldPos,segment.end);
65 		}
66 	}
67 
nextNonTagSegment(final int begin, final int end)68 	private Segment nextNonTagSegment(final int begin, final int end) {
69 		if (!legacyIteratorCompatabilityMode) {
70 			final CharacterReference characterReference=characterReferenceAtCurrentPosition;
71 			if (characterReference!=null) {
72 				characterReferenceAtCurrentPosition=null;
73 				pos=characterReference.end;
74 				return characterReference;
75 			}
76 			final ParseText parseText=source.getParseText();
77 			int potentialCharacterReferenceBegin=parseText.indexOf('&',begin,end);
78 			while (potentialCharacterReferenceBegin!=-1) {
79 				final CharacterReference nextCharacterReference=CharacterReference.construct(source,potentialCharacterReferenceBegin,Config.UnterminatedCharacterReferenceSettings.ACCEPT_ALL);
80 				if (nextCharacterReference!=null) {
81 					if (potentialCharacterReferenceBegin==begin) {
82 						pos=nextCharacterReference.end;
83 						return nextCharacterReference;
84 					} else {
85 						pos=nextCharacterReference.begin;
86 						characterReferenceAtCurrentPosition=nextCharacterReference;
87 						return new Segment(source,begin,pos);
88 					}
89 				}
90 				potentialCharacterReferenceBegin=parseText.indexOf('&',potentialCharacterReferenceBegin+1,end);
91 			}
92 		}
93 		return new Segment(source,begin,pos=end);
94 	}
95 
skipToPos(final int pos)96 	public void skipToPos(final int pos) {
97 		if (pos<this.pos) return; // can't go backwards
98 		this.pos=pos;
99 		nextTag=source.getNextTag(pos);
100 	}
101 
remove()102 	public void remove() {
103 		throw new UnsupportedOperationException();
104 	}
105 }
106