1<?php
2    // $Id: parser_test.php,v 1.1 2005/11/09 23:41:18 gsmet Exp $
3
4    require_once(dirname(__FILE__) . '/../parser.php');
5
6    Mock::generate('SimpleSaxParser');
7
8    class TestOfParallelRegex extends UnitTestCase {
9
10        function testNoPatterns() {
11            $regex = &new ParallelRegex(false);
12            $this->assertFalse($regex->match("Hello", $match));
13            $this->assertEqual($match, "");
14        }
15
16        function testNoSubject() {
17            $regex = &new ParallelRegex(false);
18            $regex->addPattern(".*");
19            $this->assertTrue($regex->match("", $match));
20            $this->assertEqual($match, "");
21        }
22
23        function testMatchAll() {
24            $regex = &new ParallelRegex(false);
25            $regex->addPattern(".*");
26            $this->assertTrue($regex->match("Hello", $match));
27            $this->assertEqual($match, "Hello");
28        }
29
30        function testCaseSensitive() {
31            $regex = &new ParallelRegex(true);
32            $regex->addPattern("abc");
33            $this->assertTrue($regex->match("abcdef", $match));
34            $this->assertEqual($match, "abc");
35            $this->assertTrue($regex->match("AAABCabcdef", $match));
36            $this->assertEqual($match, "abc");
37        }
38
39        function testCaseInsensitive() {
40            $regex = &new ParallelRegex(false);
41            $regex->addPattern("abc");
42            $this->assertTrue($regex->match("abcdef", $match));
43            $this->assertEqual($match, "abc");
44            $this->assertTrue($regex->match("AAABCabcdef", $match));
45            $this->assertEqual($match, "ABC");
46        }
47
48        function testMatchMultiple() {
49            $regex = &new ParallelRegex(true);
50            $regex->addPattern("abc");
51            $regex->addPattern("ABC");
52            $this->assertTrue($regex->match("abcdef", $match));
53            $this->assertEqual($match, "abc");
54            $this->assertTrue($regex->match("AAABCabcdef", $match));
55            $this->assertEqual($match, "ABC");
56            $this->assertFalse($regex->match("Hello", $match));
57        }
58
59        function testPatternLabels() {
60            $regex = &new ParallelRegex(false);
61            $regex->addPattern("abc", "letter");
62            $regex->addPattern("123", "number");
63            $this->assertIdentical($regex->match("abcdef", $match), "letter");
64            $this->assertEqual($match, "abc");
65            $this->assertIdentical($regex->match("0123456789", $match), "number");
66            $this->assertEqual($match, "123");
67        }
68    }
69
70    class TestOfStateStack extends UnitTestCase {
71
72        function testStartState() {
73            $stack = &new SimpleStateStack("one");
74            $this->assertEqual($stack->getCurrent(), "one");
75        }
76
77        function testExhaustion() {
78            $stack = &new SimpleStateStack("one");
79            $this->assertFalse($stack->leave());
80        }
81
82        function testStateMoves() {
83            $stack = &new SimpleStateStack("one");
84            $stack->enter("two");
85            $this->assertEqual($stack->getCurrent(), "two");
86            $stack->enter("three");
87            $this->assertEqual($stack->getCurrent(), "three");
88            $this->assertTrue($stack->leave());
89            $this->assertEqual($stack->getCurrent(), "two");
90            $stack->enter("third");
91            $this->assertEqual($stack->getCurrent(), "third");
92            $this->assertTrue($stack->leave());
93            $this->assertTrue($stack->leave());
94            $this->assertEqual($stack->getCurrent(), "one");
95        }
96    }
97
98    class TestParser {
99
100        function accept() {
101        }
102
103        function a() {
104        }
105
106        function b() {
107        }
108    }
109    Mock::generate('TestParser');
110
111    class TestOfLexer extends UnitTestCase {
112
113        function testEmptyPage() {
114            $handler = &new MockTestParser($this);
115            $handler->expectNever("accept");
116            $handler->setReturnValue("accept", true);
117            $handler->expectNever("accept");
118            $handler->setReturnValue("accept", true);
119            $lexer = &new SimpleLexer($handler);
120            $lexer->addPattern("a+");
121            $this->assertTrue($lexer->parse(""));
122        }
123
124        function testSinglePattern() {
125            $handler = &new MockTestParser($this);
126            $handler->expectArgumentsAt(0, "accept", array("aaa", LEXER_MATCHED));
127            $handler->expectArgumentsAt(1, "accept", array("x", LEXER_UNMATCHED));
128            $handler->expectArgumentsAt(2, "accept", array("a", LEXER_MATCHED));
129            $handler->expectArgumentsAt(3, "accept", array("yyy", LEXER_UNMATCHED));
130            $handler->expectArgumentsAt(4, "accept", array("a", LEXER_MATCHED));
131            $handler->expectArgumentsAt(5, "accept", array("x", LEXER_UNMATCHED));
132            $handler->expectArgumentsAt(6, "accept", array("aaa", LEXER_MATCHED));
133            $handler->expectArgumentsAt(7, "accept", array("z", LEXER_UNMATCHED));
134            $handler->expectCallCount("accept", 8);
135            $handler->setReturnValue("accept", true);
136            $lexer = &new SimpleLexer($handler);
137            $lexer->addPattern("a+");
138            $this->assertTrue($lexer->parse("aaaxayyyaxaaaz"));
139            $handler->tally();
140        }
141
142        function testMultiplePattern() {
143            $handler = &new MockTestParser($this);
144            $target = array("a", "b", "a", "bb", "x", "b", "a", "xxxxxx", "a", "x");
145            for ($i = 0; $i < count($target); $i++) {
146                $handler->expectArgumentsAt($i, "accept", array($target[$i], '*'));
147            }
148            $handler->expectCallCount("accept", count($target));
149            $handler->setReturnValue("accept", true);
150            $lexer = &new SimpleLexer($handler);
151            $lexer->addPattern("a+");
152            $lexer->addPattern("b+");
153            $this->assertTrue($lexer->parse("ababbxbaxxxxxxax"));
154            $handler->tally();
155        }
156    }
157
158    class TestOfLexerModes extends UnitTestCase {
159
160        function testIsolatedPattern() {
161            $handler = &new MockTestParser($this);
162            $handler->expectArgumentsAt(0, "a", array("a", LEXER_MATCHED));
163            $handler->expectArgumentsAt(1, "a", array("b", LEXER_UNMATCHED));
164            $handler->expectArgumentsAt(2, "a", array("aa", LEXER_MATCHED));
165            $handler->expectArgumentsAt(3, "a", array("bxb", LEXER_UNMATCHED));
166            $handler->expectArgumentsAt(4, "a", array("aaa", LEXER_MATCHED));
167            $handler->expectArgumentsAt(5, "a", array("x", LEXER_UNMATCHED));
168            $handler->expectArgumentsAt(6, "a", array("aaaa", LEXER_MATCHED));
169            $handler->expectArgumentsAt(7, "a", array("x", LEXER_UNMATCHED));
170            $handler->expectCallCount("a", 8);
171            $handler->setReturnValue("a", true);
172            $lexer = &new SimpleLexer($handler, "a");
173            $lexer->addPattern("a+", "a");
174            $lexer->addPattern("b+", "b");
175            $this->assertTrue($lexer->parse("abaabxbaaaxaaaax"));
176            $handler->tally();
177        }
178
179        function testModeChange() {
180            $handler = &new MockTestParser($this);
181            $handler->expectArgumentsAt(0, "a", array("a", LEXER_MATCHED));
182            $handler->expectArgumentsAt(1, "a", array("b", LEXER_UNMATCHED));
183            $handler->expectArgumentsAt(2, "a", array("aa", LEXER_MATCHED));
184            $handler->expectArgumentsAt(3, "a", array("b", LEXER_UNMATCHED));
185            $handler->expectArgumentsAt(4, "a", array("aaa", LEXER_MATCHED));
186            $handler->expectArgumentsAt(0, "b", array(":", LEXER_ENTER));
187            $handler->expectArgumentsAt(1, "b", array("a", LEXER_UNMATCHED));
188            $handler->expectArgumentsAt(2, "b", array("b", LEXER_MATCHED));
189            $handler->expectArgumentsAt(3, "b", array("a", LEXER_UNMATCHED));
190            $handler->expectArgumentsAt(4, "b", array("bb", LEXER_MATCHED));
191            $handler->expectArgumentsAt(5, "b", array("a", LEXER_UNMATCHED));
192            $handler->expectArgumentsAt(6, "b", array("bbb", LEXER_MATCHED));
193            $handler->expectArgumentsAt(7, "b", array("a", LEXER_UNMATCHED));
194            $handler->expectCallCount("a", 5);
195            $handler->expectCallCount("b", 8);
196            $handler->setReturnValue("a", true);
197            $handler->setReturnValue("b", true);
198            $lexer = &new SimpleLexer($handler, "a");
199            $lexer->addPattern("a+", "a");
200            $lexer->addEntryPattern(":", "a", "b");
201            $lexer->addPattern("b+", "b");
202            $this->assertTrue($lexer->parse("abaabaaa:ababbabbba"));
203            $handler->tally();
204        }
205
206        function testNesting() {
207            $handler = &new MockTestParser($this);
208            $handler->setReturnValue("a", true);
209            $handler->setReturnValue("b", true);
210            $handler->expectArgumentsAt(0, "a", array("aa", LEXER_MATCHED));
211            $handler->expectArgumentsAt(1, "a", array("b", LEXER_UNMATCHED));
212            $handler->expectArgumentsAt(2, "a", array("aa", LEXER_MATCHED));
213            $handler->expectArgumentsAt(3, "a", array("b", LEXER_UNMATCHED));
214            $handler->expectArgumentsAt(0, "b", array("(", LEXER_ENTER));
215            $handler->expectArgumentsAt(1, "b", array("bb", LEXER_MATCHED));
216            $handler->expectArgumentsAt(2, "b", array("a", LEXER_UNMATCHED));
217            $handler->expectArgumentsAt(3, "b", array("bb", LEXER_MATCHED));
218            $handler->expectArgumentsAt(4, "b", array(")", LEXER_EXIT));
219            $handler->expectArgumentsAt(4, "a", array("aa", LEXER_MATCHED));
220            $handler->expectArgumentsAt(5, "a", array("b", LEXER_UNMATCHED));
221            $handler->expectCallCount("a", 6);
222            $handler->expectCallCount("b", 5);
223            $lexer = &new SimpleLexer($handler, "a");
224            $lexer->addPattern("a+", "a");
225            $lexer->addEntryPattern("(", "a", "b");
226            $lexer->addPattern("b+", "b");
227            $lexer->addExitPattern(")", "b");
228            $this->assertTrue($lexer->parse("aabaab(bbabb)aab"));
229            $handler->tally();
230        }
231
232        function testSingular() {
233            $handler = &new MockTestParser($this);
234            $handler->setReturnValue("a", true);
235            $handler->setReturnValue("b", true);
236            $handler->expectArgumentsAt(0, "a", array("aa", LEXER_MATCHED));
237            $handler->expectArgumentsAt(1, "a", array("aa", LEXER_MATCHED));
238            $handler->expectArgumentsAt(2, "a", array("xx", LEXER_UNMATCHED));
239            $handler->expectArgumentsAt(3, "a", array("xx", LEXER_UNMATCHED));
240            $handler->expectArgumentsAt(0, "b", array("b", LEXER_SPECIAL));
241            $handler->expectArgumentsAt(1, "b", array("bbb", LEXER_SPECIAL));
242            $handler->expectCallCount("a", 4);
243            $handler->expectCallCount("b", 2);
244            $lexer = &new SimpleLexer($handler, "a");
245            $lexer->addPattern("a+", "a");
246            $lexer->addSpecialPattern("b+", "a", "b");
247            $this->assertTrue($lexer->parse("aabaaxxbbbxx"));
248            $handler->tally();
249        }
250
251        function testUnwindTooFar() {
252            $handler = &new MockTestParser($this);
253            $handler->setReturnValue("a", true);
254            $handler->expectArgumentsAt(0, "a", array("aa", LEXER_MATCHED));
255            $handler->expectArgumentsAt(1, "a", array(")", LEXER_EXIT));
256            $handler->expectCallCount("a", 2);
257            $lexer = &new SimpleLexer($handler, "a");
258            $lexer->addPattern("a+", "a");
259            $lexer->addExitPattern(")", "a");
260            $this->assertFalse($lexer->parse("aa)aa"));
261            $handler->tally();
262        }
263    }
264
265    class TestOfLexerHandlers extends UnitTestCase {
266
267        function testModeMapping() {
268            $handler = &new MockTestParser($this);
269            $handler->setReturnValue("a", true);
270            $handler->expectArgumentsAt(0, "a", array("aa", LEXER_MATCHED));
271            $handler->expectArgumentsAt(1, "a", array("(", LEXER_ENTER));
272            $handler->expectArgumentsAt(2, "a", array("bb", LEXER_MATCHED));
273            $handler->expectArgumentsAt(3, "a", array("a", LEXER_UNMATCHED));
274            $handler->expectArgumentsAt(4, "a", array("bb", LEXER_MATCHED));
275            $handler->expectArgumentsAt(5, "a", array(")", LEXER_EXIT));
276            $handler->expectArgumentsAt(6, "a", array("b", LEXER_UNMATCHED));
277            $handler->expectCallCount("a", 7);
278            $lexer = &new SimpleLexer($handler, "mode_a");
279            $lexer->addPattern("a+", "mode_a");
280            $lexer->addEntryPattern("(", "mode_a", "mode_b");
281            $lexer->addPattern("b+", "mode_b");
282            $lexer->addExitPattern(")", "mode_b");
283            $lexer->mapHandler("mode_a", "a");
284            $lexer->mapHandler("mode_b", "a");
285            $this->assertTrue($lexer->parse("aa(bbabb)b"));
286            $handler->tally();
287        }
288    }
289
290    Mock::generate("HtmlSaxParser");
291
292    class TestOfHtmlLexer extends UnitTestCase {
293        var $_handler;
294        var $_lexer;
295
296        function setUp() {
297            $this->_handler = &new MockSimpleSaxParser($this);
298            $this->_handler->setReturnValue("acceptStartToken", true);
299            $this->_handler->setReturnValue("acceptEndToken", true);
300            $this->_handler->setReturnValue("acceptAttributeToken", true);
301            $this->_handler->setReturnValue("acceptEntityToken", true);
302            $this->_handler->setReturnValue("acceptTextToken", true);
303            $this->_handler->setReturnValue("ignore", true);
304            $this->_lexer = &SimpleSaxParser::createLexer($this->_handler);
305        }
306
307        function tearDown() {
308            $this->_handler->tally();
309        }
310
311        function testUninteresting() {
312            $this->_handler->expectOnce("acceptTextToken", array("<html></html>", "*"));
313            $this->assertTrue($this->_lexer->parse("<html></html>"));
314        }
315
316        function testSkipCss() {
317            $this->_handler->expectMaximumCallCount("acceptTextToken", 0);
318            $this->_handler->expectAtLeastOnce("ignore");
319            $this->assertTrue($this->_lexer->parse("<style>Lot's of styles</style>"));
320        }
321
322        function testSkipJavaScript() {
323            $this->_handler->expectMaximumCallCount("acceptTextToken", 0);
324            $this->_handler->expectAtLeastOnce("ignore");
325            $this->assertTrue($this->_lexer->parse("<SCRIPT>Javascript code {';:^%^%�$'@\"*(}</SCRIPT>"));
326        }
327
328        function testSkipComments() {
329            $this->_handler->expectMaximumCallCount("acceptTextToken", 0);
330            $this->_handler->expectAtLeastOnce("ignore");
331            $this->assertTrue($this->_lexer->parse("<!-- <style>Lot's of styles</style> -->"));
332        }
333
334        function testTitleTag() {
335            $this->_handler->expectArgumentsAt(0, "acceptStartToken", array("<title", "*"));
336            $this->_handler->expectArgumentsAt(1, "acceptStartToken", array(">", "*"));
337            $this->_handler->expectCallCount("acceptStartToken", 2);
338            $this->_handler->expectOnce("acceptTextToken", array("Hello", "*"));
339            $this->_handler->expectOnce("acceptEndToken", array("</title>", "*"));
340            $this->assertTrue($this->_lexer->parse("<title>Hello</title>"));
341        }
342
343        function testFramesetTag() {
344            $this->_handler->expectArgumentsAt(0, "acceptStartToken", array("<frameset", "*"));
345            $this->_handler->expectArgumentsAt(1, "acceptStartToken", array(">", "*"));
346            $this->_handler->expectCallCount("acceptStartToken", 2);
347            $this->_handler->expectOnce("acceptTextToken", array("Frames", "*"));
348            $this->_handler->expectOnce("acceptEndToken", array("</frameset>", "*"));
349            $this->assertTrue($this->_lexer->parse("<frameset>Frames</frameset>"));
350        }
351
352        function testInputTag() {
353            $this->_handler->expectArgumentsAt(0, "acceptStartToken", array("<input", "*"));
354            $this->_handler->expectArgumentsAt(1, "acceptStartToken", array("name", "*"));
355            $this->_handler->expectArgumentsAt(2, "acceptStartToken", array("value", "*"));
356            $this->_handler->expectArgumentsAt(3, "acceptStartToken", array(">", "*"));
357            $this->_handler->expectArgumentsAt(0, "acceptAttributeToken", array("=a.b.c", "*"));
358            $this->_handler->expectArgumentsAt(1, "acceptAttributeToken", array("= d", "*"));
359            $this->assertTrue($this->_lexer->parse("<input name=a.b.c value = d>"));
360        }
361
362        function testEmptyLink() {
363            $this->_handler->expectArgumentsAt(0, "acceptStartToken", array("<a", "*"));
364            $this->_handler->expectArgumentsAt(1, "acceptStartToken", array(">", "*"));
365            $this->_handler->expectCallCount("acceptStartToken", 2);
366            $this->_handler->expectOnce("acceptEndToken", array("</a>", "*"));
367            $this->assertTrue($this->_lexer->parse("<html><a></a></html>"));
368        }
369
370        function testLabelledLink() {
371            $this->_handler->expectArgumentsAt(0, "acceptStartToken", array("<a", "*"));
372            $this->_handler->expectArgumentsAt(1, "acceptStartToken", array(">", "*"));
373            $this->_handler->expectCallCount("acceptStartToken", 2);
374            $this->_handler->expectOnce("acceptEndToken", array("</a>", "*"));
375            $this->_handler->expectArgumentsAt(0, "acceptTextToken", array("<html>", "*"));
376            $this->_handler->expectArgumentsAt(1, "acceptTextToken", array("label", "*"));
377            $this->_handler->expectArgumentsAt(2, "acceptTextToken", array("</html>", "*"));
378            $this->_handler->expectCallCount("acceptTextToken", 3);
379            $this->assertTrue($this->_lexer->parse("<html><a>label</a></html>"));
380        }
381
382        function testLinkAddress() {
383            $this->_handler->expectArgumentsAt(0, "acceptTextToken", array("<html>", "*"));
384            $this->_handler->expectArgumentsAt(1, "acceptTextToken", array("label", "*"));
385            $this->_handler->expectArgumentsAt(2, "acceptTextToken", array("</html>", "*"));
386            $this->_handler->expectCallCount("acceptTextToken", 3);
387            $this->_handler->expectArgumentsAt(0, "acceptStartToken", array("<a", "*"));
388            $this->_handler->expectArgumentsAt(1, "acceptStartToken", array("href", "*"));
389            $this->_handler->expectArgumentsAt(2, "acceptStartToken", array(">", "*"));
390            $this->_handler->expectCallCount("acceptStartToken", 3);
391            $this->_handler->expectArgumentsAt(0, "acceptAttributeToken", array("= '", "*"));
392            $this->_handler->expectArgumentsAt(1, "acceptAttributeToken", array("here.html", "*"));
393            $this->_handler->expectArgumentsAt(2, "acceptAttributeToken", array("'", "*"));
394            $this->_handler->expectCallCount("acceptAttributeToken", 3);
395            $this->assertTrue($this->_lexer->parse("<html><a href = 'here.html'>label</a></html>"));
396        }
397
398        function testEncodedLinkAddress() {
399            $this->_handler->expectArgumentsAt(0, "acceptTextToken", array("<html>", "*"));
400            $this->_handler->expectArgumentsAt(1, "acceptTextToken", array("label", "*"));
401            $this->_handler->expectArgumentsAt(2, "acceptTextToken", array("</html>", "*"));
402            $this->_handler->expectCallCount("acceptTextToken", 3);
403            $this->_handler->expectArgumentsAt(0, "acceptStartToken", array("<a", "*"));
404            $this->_handler->expectArgumentsAt(1, "acceptStartToken", array("href", "*"));
405            $this->_handler->expectArgumentsAt(2, "acceptStartToken", array(">", "*"));
406            $this->_handler->expectCallCount("acceptStartToken", 3);
407            $this->_handler->expectArgumentsAt(0, "acceptAttributeToken", array("= '", "*"));
408            $this->_handler->expectArgumentsAt(1, "acceptAttributeToken", array("here&amp;there.html", "*"));
409            $this->_handler->expectArgumentsAt(2, "acceptAttributeToken", array("'", "*"));
410            $this->_handler->expectCallCount("acceptAttributeToken", 3);
411            $this->assertTrue($this->_lexer->parse("<html><a href = 'here&amp;there.html'>label</a></html>"));
412        }
413
414        function testEmptyLinkWithId() {
415            $this->_handler->expectArgumentsAt(0, "acceptTextToken", array("<html>", "*"));
416            $this->_handler->expectArgumentsAt(1, "acceptTextToken", array("label", "*"));
417            $this->_handler->expectArgumentsAt(2, "acceptTextToken", array("</html>", "*"));
418            $this->_handler->expectCallCount("acceptTextToken", 3);
419            $this->_handler->expectArgumentsAt(0, "acceptStartToken", array("<a", "*"));
420            $this->_handler->expectArgumentsAt(1, "acceptStartToken", array("id", "*"));
421            $this->_handler->expectArgumentsAt(2, "acceptStartToken", array(">", "*"));
422            $this->_handler->expectCallCount("acceptStartToken", 3);
423            $this->_handler->expectArgumentsAt(0, "acceptAttributeToken", array("=\"", "*"));
424            $this->_handler->expectArgumentsAt(1, "acceptAttributeToken", array("0", "*"));
425            $this->_handler->expectArgumentsAt(2, "acceptAttributeToken", array("\"", "*"));
426            $this->_handler->expectCallCount("acceptAttributeToken", 3);
427            $this->assertTrue($this->_lexer->parse("<html><a id=\"0\">label</a></html>"));
428        }
429
430        function testComplexLink() {
431            $this->_handler->expectArgumentsAt(0, "acceptStartToken", array("<a", LEXER_ENTER));
432            $this->_handler->expectArgumentsAt(1, "acceptStartToken", array("HREF", "*"));
433            $this->_handler->expectArgumentsAt(2, "acceptStartToken", array("bool", "*"));
434            $this->_handler->expectArgumentsAt(3, "acceptStartToken", array("Style", "*"));
435            $this->_handler->expectArgumentsAt(4, "acceptStartToken", array(">", LEXER_EXIT));
436            $this->_handler->expectCallCount("acceptStartToken", 5);
437            $this->_handler->expectArgumentsAt(0, "acceptAttributeToken", array("= '", "*"));
438            $this->_handler->expectArgumentsAt(1, "acceptAttributeToken", array("here.html", LEXER_UNMATCHED));
439            $this->_handler->expectArgumentsAt(2, "acceptAttributeToken", array("'", "*"));
440            $this->_handler->expectArgumentsAt(3, "acceptAttributeToken", array("=\"", "*"));
441            $this->_handler->expectArgumentsAt(4, "acceptAttributeToken", array("'coo", "*"));
442            $this->_handler->expectArgumentsAt(5, "acceptAttributeToken", array('\"', "*"));
443            $this->_handler->expectArgumentsAt(6, "acceptAttributeToken", array("l'", "*"));
444            $this->_handler->expectArgumentsAt(7, "acceptAttributeToken", array("\"", "*"));
445            $this->_handler->expectCallCount("acceptAttributeToken", 8);
446            $this->assertTrue($this->_lexer->parse("<HTML><a HREF = 'here.html' bool Style=\"'coo\\\"l'\">label</A></Html>"));
447        }
448
449        function testSubmit() {
450            $this->_handler->expectArgumentsAt(0, "acceptStartToken", array("<input", LEXER_ENTER));
451            $this->_handler->expectArgumentsAt(1, "acceptStartToken", array("type", "*"));
452            $this->_handler->expectArgumentsAt(2, "acceptStartToken", array("name", "*"));
453            $this->_handler->expectArgumentsAt(3, "acceptStartToken", array("value", "*"));
454            $this->_handler->expectArgumentsAt(4, "acceptStartToken", array("/", "*"));
455            $this->_handler->expectArgumentsAt(5, "acceptStartToken", array(">", LEXER_EXIT));
456            $this->_handler->expectCallCount("acceptStartToken", 6);
457            $this->assertTrue($this->_lexer->parse('<input type="submit" name="N" value="V" />'));
458        }
459
460        function testFramesParsedWithoutError() {
461            $this->assertTrue($this->_lexer->parse(
462                    '<frameset><frame src="frame.html"></frameset>'));
463            $this->assertTrue($this->_lexer->parse(
464                    '<frameset><frame src="frame.html"><noframes>Hello</noframes></frameset>'));
465        }
466    }
467
468    class TestOfTextExtraction extends UnitTestCase {
469
470        function testSpaceNormalisation() {
471            $this->assertEqual(
472                    SimpleSaxParser::normalise("\nOne\tTwo   \nThree\t"),
473                    'One Two Three');
474        }
475
476        function testTagSuppression() {
477            $this->assertEqual(
478                    SimpleSaxParser::normalise('<b>Hello</b>'),
479                    'Hello');
480        }
481
482        function testAdjoiningTagSuppression() {
483            $this->assertEqual(
484                    SimpleSaxParser::normalise('<b>Hello</b><em>Goodbye</em>'),
485                    'HelloGoodbye');
486        }
487
488        function testExtractImageAltTextWithDifferentQuotes() {
489            $this->assertEqual(
490                    SimpleSaxParser::normalise('<img alt="One"><img alt=\'Two\'><img alt=Three>'),
491                    'One Two Three');
492        }
493
494        function testExtractImageAltTextMultipleTimes() {
495            $this->assertEqual(
496                    SimpleSaxParser::normalise('<img alt="One"><img alt="Two"><img alt="Three">'),
497                    'One Two Three');
498        }
499
500        function testHtmlEntityTranslation() {
501            $this->assertEqual(
502                    SimpleSaxParser::normalise('&lt;&gt;&quot;&amp;'),
503                    '<>"&');
504        }
505    }
506
507    class TestSimpleSaxParser extends SimpleSaxParser {
508        var $_lexer;
509
510        function TestSimpleSaxParser(&$listener, &$lexer) {
511            $this->_lexer = &$lexer;
512            $this->SimpleSaxParser($listener);
513        }
514
515        function &createLexer() {
516            return $this->_lexer;
517        }
518    }
519
520    Mock::generate("SimpleSaxListener");
521    Mock::generate("SimpleLexer");
522
523    class TestOfSaxGeneration extends UnitTestCase {
524        var $_listener;
525        var $_lexer;
526
527        function setUp() {
528            $this->_listener = &new MockSimpleSaxListener($this);
529            $this->_lexer = &new MockSimpleLexer($this);
530            $this->_parser = &new TestSimpleSaxParser($this->_listener, $this->_lexer);
531        }
532
533        function tearDown() {
534            $this->_listener->tally();
535            $this->_lexer->tally();
536        }
537
538        function testLexerFailure() {
539            $this->_lexer->setReturnValue("parse", false);
540            $this->assertFalse($this->_parser->parse("<html></html>"));
541        }
542
543        function testLexerSuccess() {
544            $this->_lexer->setReturnValue("parse", true);
545            $this->assertTrue($this->_parser->parse("<html></html>"));
546        }
547
548        function testSimpleLinkStart() {
549            $this->_parser->parse("");
550            $this->_listener->expectOnce("startElement", array("a", array()));
551            $this->_listener->setReturnValue("startElement", true);
552            $this->assertTrue($this->_parser->acceptStartToken("<a", LEXER_ENTER));
553            $this->assertTrue($this->_parser->acceptStartToken(">", LEXER_EXIT));
554        }
555
556        function testSimpleTitleStart() {
557            $this->_parser->parse("");
558            $this->_listener->expectOnce("startElement", array("title", array()));
559            $this->_listener->setReturnValue("startElement", true);
560            $this->assertTrue($this->_parser->acceptStartToken("<title", LEXER_ENTER));
561            $this->assertTrue($this->_parser->acceptStartToken(">", LEXER_EXIT));
562        }
563
564        function testLinkStart() {
565            $this->_parser->parse("");
566            $this->_listener->expectOnce("startElement", array("a", array("href" => "here.html")));
567            $this->_listener->setReturnValue("startElement", true);
568            $this->assertTrue($this->_parser->acceptStartToken("<a", LEXER_ENTER));
569            $this->assertTrue($this->_parser->acceptStartToken("href", LEXER_MATCHED));
570            $this->assertTrue($this->_parser->acceptAttributeToken("=\"", LEXER_ENTER));
571            $this->assertTrue($this->_parser->acceptAttributeToken("here.html", LEXER_UNMATCHED));
572            $this->assertTrue($this->_parser->acceptAttributeToken("\"", LEXER_EXIT));
573            $this->assertTrue($this->_parser->acceptStartToken(">", LEXER_EXIT));
574        }
575
576        function testLinkStartWithEncodedUrl() {
577            $this->_parser->parse("");
578            $this->_listener->expectOnce(
579                    "startElement",
580                    array("a", array("href" => "here&there.html")));
581            $this->_listener->setReturnValue("startElement", true);
582            $this->assertTrue($this->_parser->acceptStartToken("<a", LEXER_ENTER));
583            $this->assertTrue($this->_parser->acceptStartToken("href", LEXER_MATCHED));
584            $this->assertTrue($this->_parser->acceptAttributeToken("=\"", LEXER_ENTER));
585            $this->assertTrue($this->_parser->acceptAttributeToken("here&amp;there.html", LEXER_UNMATCHED));
586            $this->assertTrue($this->_parser->acceptAttributeToken("\"", LEXER_EXIT));
587            $this->assertTrue($this->_parser->acceptStartToken(">", LEXER_EXIT));
588        }
589
590        function testLinkStartWithId() {
591            $this->_parser->parse("");
592            $this->_listener->expectOnce(
593                    "startElement",
594                    array("a", array("id" => "0")));
595            $this->_listener->setReturnValue("startElement", true);
596            $this->assertTrue($this->_parser->acceptStartToken("<a", LEXER_ENTER));
597            $this->assertTrue($this->_parser->acceptStartToken("id", LEXER_MATCHED));
598            $this->assertTrue($this->_parser->acceptAttributeToken("= \"", LEXER_ENTER));
599            $this->assertTrue($this->_parser->acceptAttributeToken("0", LEXER_UNMATCHED));
600            $this->assertTrue($this->_parser->acceptAttributeToken("\"", LEXER_EXIT));
601            $this->assertTrue($this->_parser->acceptStartToken(">", LEXER_EXIT));
602        }
603
604        function testLinkEnd() {
605            $this->_parser->parse("");
606            $this->_listener->expectOnce("endElement", array("a"));
607            $this->_listener->setReturnValue("endElement", true);
608            $this->assertTrue($this->_parser->acceptEndToken("</a>", LEXER_SPECIAL));
609        }
610
611        function testInput() {
612            $this->_parser->parse("");
613            $this->_listener->expectOnce(
614                    "startElement",
615                    array("input", array("name" => "a")));
616            $this->_listener->setReturnValue("startElement", true);
617            $this->assertTrue($this->_parser->acceptStartToken("<input", LEXER_ENTER));
618            $this->assertTrue($this->_parser->acceptStartToken("name", LEXER_MATCHED));
619            $this->assertTrue($this->_parser->acceptAttributeToken("= a", LEXER_SPECIAL));
620            $this->assertTrue($this->_parser->acceptStartToken(">", LEXER_EXIT));
621        }
622
623        function testButton() {
624            $this->_parser->parse("");
625            $this->_listener->expectOnce(
626                    "startElement",
627                    array("button", array("name" => "a")));
628            $this->_listener->setReturnValue("startElement", true);
629            $this->assertTrue($this->_parser->acceptStartToken("<button", LEXER_ENTER));
630            $this->assertTrue($this->_parser->acceptStartToken("name", LEXER_MATCHED));
631            $this->assertTrue($this->_parser->acceptAttributeToken("= a", LEXER_SPECIAL));
632            $this->assertTrue($this->_parser->acceptStartToken(">", LEXER_EXIT));
633        }
634
635        function testContent() {
636            $this->_parser->parse("");
637            $this->_listener->expectOnce("addContent", array("stuff"));
638            $this->_listener->setReturnValue("addContent", true);
639            $this->assertTrue($this->_parser->acceptTextToken("stuff", LEXER_UNMATCHED));
640        }
641
642        function testIgnore() {
643            $this->_parser->parse("");
644            $this->_listener->expectNever("addContent");
645            $this->assertTrue($this->_parser->ignore("stuff", LEXER_UNMATCHED));
646        }
647    }
648?>