1 // Jericho HTML Parser - Java based library for analysing and manipulating HTML 2 // Version 3.2 3 // Copyright (C) 2004-2009 Martin Jericho 4 // http://jericho.htmlparser.net/ 5 // 6 // This library is free software; you can redistribute it and/or 7 // modify it under the terms of either one of the following licences: 8 // 9 // 1. The Eclipse Public License (EPL) version 1.0, 10 // included in this distribution in the file licence-epl-1.0.html 11 // or available at http://www.eclipse.org/legal/epl-v10.html 12 // 13 // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, 14 // included in this distribution in the file licence-lgpl-2.1.txt 15 // or available at http://www.gnu.org/licenses/lgpl.txt 16 // 17 // This library is distributed on an "AS IS" basis, 18 // WITHOUT WARRANTY OF ANY KIND, either express or implied. 19 // See the individual licence texts for more details. 20 21 package net.htmlparser.jericho; 22 23 import java.util.*; 24 25 /** 26 * Iterates over the "nodes" in a segment. 27 * <p> 28 * Every object returned is a Segment. All tags found with the Segment.getAllTags() method are included, as well as segments representing the plain text in between them, 29 * and character references within the plain text are also included as separate nodes. 30 */ 31 class NodeIterator implements Iterator<Segment> { 32 private final Segment segment; 33 private final Source source; 34 private int pos; 35 private Tag nextTag; 36 private CharacterReference characterReferenceAtCurrentPosition=null; 37 38 private final boolean legacyIteratorCompatabilityMode=Source.LegacyIteratorCompatabilityMode; 39 NodeIterator(final Segment segment)40 public NodeIterator(final Segment segment) { 41 this.segment=segment; 42 source=segment.source; 43 if (segment==source) source.fullSequentialParse(); 44 pos=segment.begin; 45 nextTag=source.getNextTag(pos); 46 if (nextTag!=null && nextTag.begin>=segment.end) nextTag=null; 47 } 48 hasNext()49 public boolean hasNext() { 50 return pos<segment.end || nextTag!=null; 51 } 52 next()53 public Segment next() { 54 final int oldPos=pos; 55 if (nextTag!=null) { 56 if (oldPos<nextTag.begin) return nextNonTagSegment(oldPos,nextTag.begin); 57 final Tag tag=nextTag; 58 nextTag=nextTag.getNextTag(); 59 if (nextTag!=null && nextTag.begin>=segment.end) nextTag=null; 60 if (pos<tag.end) pos=tag.end; 61 return tag; 62 } else { 63 if (!hasNext()) throw new NoSuchElementException(); 64 return nextNonTagSegment(oldPos,segment.end); 65 } 66 } 67 nextNonTagSegment(final int begin, final int end)68 private Segment nextNonTagSegment(final int begin, final int end) { 69 if (!legacyIteratorCompatabilityMode) { 70 final CharacterReference characterReference=characterReferenceAtCurrentPosition; 71 if (characterReference!=null) { 72 characterReferenceAtCurrentPosition=null; 73 pos=characterReference.end; 74 return characterReference; 75 } 76 final ParseText parseText=source.getParseText(); 77 int potentialCharacterReferenceBegin=parseText.indexOf('&',begin,end); 78 while (potentialCharacterReferenceBegin!=-1) { 79 final CharacterReference nextCharacterReference=CharacterReference.construct(source,potentialCharacterReferenceBegin,Config.UnterminatedCharacterReferenceSettings.ACCEPT_ALL); 80 if (nextCharacterReference!=null) { 81 if (potentialCharacterReferenceBegin==begin) { 82 pos=nextCharacterReference.end; 83 return nextCharacterReference; 84 } else { 85 pos=nextCharacterReference.begin; 86 characterReferenceAtCurrentPosition=nextCharacterReference; 87 return new Segment(source,begin,pos); 88 } 89 } 90 potentialCharacterReferenceBegin=parseText.indexOf('&',potentialCharacterReferenceBegin+1,end); 91 } 92 } 93 return new Segment(source,begin,pos=end); 94 } 95 skipToPos(final int pos)96 public void skipToPos(final int pos) { 97 if (pos<this.pos) return; // can't go backwards 98 this.pos=pos; 99 nextTag=source.getNextTag(pos); 100 } 101 remove()102 public void remove() { 103 throw new UnsupportedOperationException(); 104 } 105 } 106