1<?php 2 // $Id: parser_test.php,v 1.1 2005/11/09 23:41:18 gsmet Exp $ 3 4 require_once(dirname(__FILE__) . '/../parser.php'); 5 6 Mock::generate('SimpleSaxParser'); 7 8 class TestOfParallelRegex extends UnitTestCase { 9 10 function testNoPatterns() { 11 $regex = &new ParallelRegex(false); 12 $this->assertFalse($regex->match("Hello", $match)); 13 $this->assertEqual($match, ""); 14 } 15 16 function testNoSubject() { 17 $regex = &new ParallelRegex(false); 18 $regex->addPattern(".*"); 19 $this->assertTrue($regex->match("", $match)); 20 $this->assertEqual($match, ""); 21 } 22 23 function testMatchAll() { 24 $regex = &new ParallelRegex(false); 25 $regex->addPattern(".*"); 26 $this->assertTrue($regex->match("Hello", $match)); 27 $this->assertEqual($match, "Hello"); 28 } 29 30 function testCaseSensitive() { 31 $regex = &new ParallelRegex(true); 32 $regex->addPattern("abc"); 33 $this->assertTrue($regex->match("abcdef", $match)); 34 $this->assertEqual($match, "abc"); 35 $this->assertTrue($regex->match("AAABCabcdef", $match)); 36 $this->assertEqual($match, "abc"); 37 } 38 39 function testCaseInsensitive() { 40 $regex = &new ParallelRegex(false); 41 $regex->addPattern("abc"); 42 $this->assertTrue($regex->match("abcdef", $match)); 43 $this->assertEqual($match, "abc"); 44 $this->assertTrue($regex->match("AAABCabcdef", $match)); 45 $this->assertEqual($match, "ABC"); 46 } 47 48 function testMatchMultiple() { 49 $regex = &new ParallelRegex(true); 50 $regex->addPattern("abc"); 51 $regex->addPattern("ABC"); 52 $this->assertTrue($regex->match("abcdef", $match)); 53 $this->assertEqual($match, "abc"); 54 $this->assertTrue($regex->match("AAABCabcdef", $match)); 55 $this->assertEqual($match, "ABC"); 56 $this->assertFalse($regex->match("Hello", $match)); 57 } 58 59 function testPatternLabels() { 60 $regex = &new ParallelRegex(false); 61 $regex->addPattern("abc", "letter"); 62 $regex->addPattern("123", "number"); 63 $this->assertIdentical($regex->match("abcdef", $match), "letter"); 64 $this->assertEqual($match, "abc"); 65 $this->assertIdentical($regex->match("0123456789", $match), "number"); 66 $this->assertEqual($match, "123"); 67 } 68 } 69 70 class TestOfStateStack extends UnitTestCase { 71 72 function testStartState() { 73 $stack = &new SimpleStateStack("one"); 74 $this->assertEqual($stack->getCurrent(), "one"); 75 } 76 77 function testExhaustion() { 78 $stack = &new SimpleStateStack("one"); 79 $this->assertFalse($stack->leave()); 80 } 81 82 function testStateMoves() { 83 $stack = &new SimpleStateStack("one"); 84 $stack->enter("two"); 85 $this->assertEqual($stack->getCurrent(), "two"); 86 $stack->enter("three"); 87 $this->assertEqual($stack->getCurrent(), "three"); 88 $this->assertTrue($stack->leave()); 89 $this->assertEqual($stack->getCurrent(), "two"); 90 $stack->enter("third"); 91 $this->assertEqual($stack->getCurrent(), "third"); 92 $this->assertTrue($stack->leave()); 93 $this->assertTrue($stack->leave()); 94 $this->assertEqual($stack->getCurrent(), "one"); 95 } 96 } 97 98 class TestParser { 99 100 function accept() { 101 } 102 103 function a() { 104 } 105 106 function b() { 107 } 108 } 109 Mock::generate('TestParser'); 110 111 class TestOfLexer extends UnitTestCase { 112 113 function testEmptyPage() { 114 $handler = &new MockTestParser($this); 115 $handler->expectNever("accept"); 116 $handler->setReturnValue("accept", true); 117 $handler->expectNever("accept"); 118 $handler->setReturnValue("accept", true); 119 $lexer = &new SimpleLexer($handler); 120 $lexer->addPattern("a+"); 121 $this->assertTrue($lexer->parse("")); 122 } 123 124 function testSinglePattern() { 125 $handler = &new MockTestParser($this); 126 $handler->expectArgumentsAt(0, "accept", array("aaa", LEXER_MATCHED)); 127 $handler->expectArgumentsAt(1, "accept", array("x", LEXER_UNMATCHED)); 128 $handler->expectArgumentsAt(2, "accept", array("a", LEXER_MATCHED)); 129 $handler->expectArgumentsAt(3, "accept", array("yyy", LEXER_UNMATCHED)); 130 $handler->expectArgumentsAt(4, "accept", array("a", LEXER_MATCHED)); 131 $handler->expectArgumentsAt(5, "accept", array("x", LEXER_UNMATCHED)); 132 $handler->expectArgumentsAt(6, "accept", array("aaa", LEXER_MATCHED)); 133 $handler->expectArgumentsAt(7, "accept", array("z", LEXER_UNMATCHED)); 134 $handler->expectCallCount("accept", 8); 135 $handler->setReturnValue("accept", true); 136 $lexer = &new SimpleLexer($handler); 137 $lexer->addPattern("a+"); 138 $this->assertTrue($lexer->parse("aaaxayyyaxaaaz")); 139 $handler->tally(); 140 } 141 142 function testMultiplePattern() { 143 $handler = &new MockTestParser($this); 144 $target = array("a", "b", "a", "bb", "x", "b", "a", "xxxxxx", "a", "x"); 145 for ($i = 0; $i < count($target); $i++) { 146 $handler->expectArgumentsAt($i, "accept", array($target[$i], '*')); 147 } 148 $handler->expectCallCount("accept", count($target)); 149 $handler->setReturnValue("accept", true); 150 $lexer = &new SimpleLexer($handler); 151 $lexer->addPattern("a+"); 152 $lexer->addPattern("b+"); 153 $this->assertTrue($lexer->parse("ababbxbaxxxxxxax")); 154 $handler->tally(); 155 } 156 } 157 158 class TestOfLexerModes extends UnitTestCase { 159 160 function testIsolatedPattern() { 161 $handler = &new MockTestParser($this); 162 $handler->expectArgumentsAt(0, "a", array("a", LEXER_MATCHED)); 163 $handler->expectArgumentsAt(1, "a", array("b", LEXER_UNMATCHED)); 164 $handler->expectArgumentsAt(2, "a", array("aa", LEXER_MATCHED)); 165 $handler->expectArgumentsAt(3, "a", array("bxb", LEXER_UNMATCHED)); 166 $handler->expectArgumentsAt(4, "a", array("aaa", LEXER_MATCHED)); 167 $handler->expectArgumentsAt(5, "a", array("x", LEXER_UNMATCHED)); 168 $handler->expectArgumentsAt(6, "a", array("aaaa", LEXER_MATCHED)); 169 $handler->expectArgumentsAt(7, "a", array("x", LEXER_UNMATCHED)); 170 $handler->expectCallCount("a", 8); 171 $handler->setReturnValue("a", true); 172 $lexer = &new SimpleLexer($handler, "a"); 173 $lexer->addPattern("a+", "a"); 174 $lexer->addPattern("b+", "b"); 175 $this->assertTrue($lexer->parse("abaabxbaaaxaaaax")); 176 $handler->tally(); 177 } 178 179 function testModeChange() { 180 $handler = &new MockTestParser($this); 181 $handler->expectArgumentsAt(0, "a", array("a", LEXER_MATCHED)); 182 $handler->expectArgumentsAt(1, "a", array("b", LEXER_UNMATCHED)); 183 $handler->expectArgumentsAt(2, "a", array("aa", LEXER_MATCHED)); 184 $handler->expectArgumentsAt(3, "a", array("b", LEXER_UNMATCHED)); 185 $handler->expectArgumentsAt(4, "a", array("aaa", LEXER_MATCHED)); 186 $handler->expectArgumentsAt(0, "b", array(":", LEXER_ENTER)); 187 $handler->expectArgumentsAt(1, "b", array("a", LEXER_UNMATCHED)); 188 $handler->expectArgumentsAt(2, "b", array("b", LEXER_MATCHED)); 189 $handler->expectArgumentsAt(3, "b", array("a", LEXER_UNMATCHED)); 190 $handler->expectArgumentsAt(4, "b", array("bb", LEXER_MATCHED)); 191 $handler->expectArgumentsAt(5, "b", array("a", LEXER_UNMATCHED)); 192 $handler->expectArgumentsAt(6, "b", array("bbb", LEXER_MATCHED)); 193 $handler->expectArgumentsAt(7, "b", array("a", LEXER_UNMATCHED)); 194 $handler->expectCallCount("a", 5); 195 $handler->expectCallCount("b", 8); 196 $handler->setReturnValue("a", true); 197 $handler->setReturnValue("b", true); 198 $lexer = &new SimpleLexer($handler, "a"); 199 $lexer->addPattern("a+", "a"); 200 $lexer->addEntryPattern(":", "a", "b"); 201 $lexer->addPattern("b+", "b"); 202 $this->assertTrue($lexer->parse("abaabaaa:ababbabbba")); 203 $handler->tally(); 204 } 205 206 function testNesting() { 207 $handler = &new MockTestParser($this); 208 $handler->setReturnValue("a", true); 209 $handler->setReturnValue("b", true); 210 $handler->expectArgumentsAt(0, "a", array("aa", LEXER_MATCHED)); 211 $handler->expectArgumentsAt(1, "a", array("b", LEXER_UNMATCHED)); 212 $handler->expectArgumentsAt(2, "a", array("aa", LEXER_MATCHED)); 213 $handler->expectArgumentsAt(3, "a", array("b", LEXER_UNMATCHED)); 214 $handler->expectArgumentsAt(0, "b", array("(", LEXER_ENTER)); 215 $handler->expectArgumentsAt(1, "b", array("bb", LEXER_MATCHED)); 216 $handler->expectArgumentsAt(2, "b", array("a", LEXER_UNMATCHED)); 217 $handler->expectArgumentsAt(3, "b", array("bb", LEXER_MATCHED)); 218 $handler->expectArgumentsAt(4, "b", array(")", LEXER_EXIT)); 219 $handler->expectArgumentsAt(4, "a", array("aa", LEXER_MATCHED)); 220 $handler->expectArgumentsAt(5, "a", array("b", LEXER_UNMATCHED)); 221 $handler->expectCallCount("a", 6); 222 $handler->expectCallCount("b", 5); 223 $lexer = &new SimpleLexer($handler, "a"); 224 $lexer->addPattern("a+", "a"); 225 $lexer->addEntryPattern("(", "a", "b"); 226 $lexer->addPattern("b+", "b"); 227 $lexer->addExitPattern(")", "b"); 228 $this->assertTrue($lexer->parse("aabaab(bbabb)aab")); 229 $handler->tally(); 230 } 231 232 function testSingular() { 233 $handler = &new MockTestParser($this); 234 $handler->setReturnValue("a", true); 235 $handler->setReturnValue("b", true); 236 $handler->expectArgumentsAt(0, "a", array("aa", LEXER_MATCHED)); 237 $handler->expectArgumentsAt(1, "a", array("aa", LEXER_MATCHED)); 238 $handler->expectArgumentsAt(2, "a", array("xx", LEXER_UNMATCHED)); 239 $handler->expectArgumentsAt(3, "a", array("xx", LEXER_UNMATCHED)); 240 $handler->expectArgumentsAt(0, "b", array("b", LEXER_SPECIAL)); 241 $handler->expectArgumentsAt(1, "b", array("bbb", LEXER_SPECIAL)); 242 $handler->expectCallCount("a", 4); 243 $handler->expectCallCount("b", 2); 244 $lexer = &new SimpleLexer($handler, "a"); 245 $lexer->addPattern("a+", "a"); 246 $lexer->addSpecialPattern("b+", "a", "b"); 247 $this->assertTrue($lexer->parse("aabaaxxbbbxx")); 248 $handler->tally(); 249 } 250 251 function testUnwindTooFar() { 252 $handler = &new MockTestParser($this); 253 $handler->setReturnValue("a", true); 254 $handler->expectArgumentsAt(0, "a", array("aa", LEXER_MATCHED)); 255 $handler->expectArgumentsAt(1, "a", array(")", LEXER_EXIT)); 256 $handler->expectCallCount("a", 2); 257 $lexer = &new SimpleLexer($handler, "a"); 258 $lexer->addPattern("a+", "a"); 259 $lexer->addExitPattern(")", "a"); 260 $this->assertFalse($lexer->parse("aa)aa")); 261 $handler->tally(); 262 } 263 } 264 265 class TestOfLexerHandlers extends UnitTestCase { 266 267 function testModeMapping() { 268 $handler = &new MockTestParser($this); 269 $handler->setReturnValue("a", true); 270 $handler->expectArgumentsAt(0, "a", array("aa", LEXER_MATCHED)); 271 $handler->expectArgumentsAt(1, "a", array("(", LEXER_ENTER)); 272 $handler->expectArgumentsAt(2, "a", array("bb", LEXER_MATCHED)); 273 $handler->expectArgumentsAt(3, "a", array("a", LEXER_UNMATCHED)); 274 $handler->expectArgumentsAt(4, "a", array("bb", LEXER_MATCHED)); 275 $handler->expectArgumentsAt(5, "a", array(")", LEXER_EXIT)); 276 $handler->expectArgumentsAt(6, "a", array("b", LEXER_UNMATCHED)); 277 $handler->expectCallCount("a", 7); 278 $lexer = &new SimpleLexer($handler, "mode_a"); 279 $lexer->addPattern("a+", "mode_a"); 280 $lexer->addEntryPattern("(", "mode_a", "mode_b"); 281 $lexer->addPattern("b+", "mode_b"); 282 $lexer->addExitPattern(")", "mode_b"); 283 $lexer->mapHandler("mode_a", "a"); 284 $lexer->mapHandler("mode_b", "a"); 285 $this->assertTrue($lexer->parse("aa(bbabb)b")); 286 $handler->tally(); 287 } 288 } 289 290 Mock::generate("HtmlSaxParser"); 291 292 class TestOfHtmlLexer extends UnitTestCase { 293 var $_handler; 294 var $_lexer; 295 296 function setUp() { 297 $this->_handler = &new MockSimpleSaxParser($this); 298 $this->_handler->setReturnValue("acceptStartToken", true); 299 $this->_handler->setReturnValue("acceptEndToken", true); 300 $this->_handler->setReturnValue("acceptAttributeToken", true); 301 $this->_handler->setReturnValue("acceptEntityToken", true); 302 $this->_handler->setReturnValue("acceptTextToken", true); 303 $this->_handler->setReturnValue("ignore", true); 304 $this->_lexer = &SimpleSaxParser::createLexer($this->_handler); 305 } 306 307 function tearDown() { 308 $this->_handler->tally(); 309 } 310 311 function testUninteresting() { 312 $this->_handler->expectOnce("acceptTextToken", array("<html></html>", "*")); 313 $this->assertTrue($this->_lexer->parse("<html></html>")); 314 } 315 316 function testSkipCss() { 317 $this->_handler->expectMaximumCallCount("acceptTextToken", 0); 318 $this->_handler->expectAtLeastOnce("ignore"); 319 $this->assertTrue($this->_lexer->parse("<style>Lot's of styles</style>")); 320 } 321 322 function testSkipJavaScript() { 323 $this->_handler->expectMaximumCallCount("acceptTextToken", 0); 324 $this->_handler->expectAtLeastOnce("ignore"); 325 $this->assertTrue($this->_lexer->parse("<SCRIPT>Javascript code {';:^%^%�$'@\"*(}</SCRIPT>")); 326 } 327 328 function testSkipComments() { 329 $this->_handler->expectMaximumCallCount("acceptTextToken", 0); 330 $this->_handler->expectAtLeastOnce("ignore"); 331 $this->assertTrue($this->_lexer->parse("<!-- <style>Lot's of styles</style> -->")); 332 } 333 334 function testTitleTag() { 335 $this->_handler->expectArgumentsAt(0, "acceptStartToken", array("<title", "*")); 336 $this->_handler->expectArgumentsAt(1, "acceptStartToken", array(">", "*")); 337 $this->_handler->expectCallCount("acceptStartToken", 2); 338 $this->_handler->expectOnce("acceptTextToken", array("Hello", "*")); 339 $this->_handler->expectOnce("acceptEndToken", array("</title>", "*")); 340 $this->assertTrue($this->_lexer->parse("<title>Hello</title>")); 341 } 342 343 function testFramesetTag() { 344 $this->_handler->expectArgumentsAt(0, "acceptStartToken", array("<frameset", "*")); 345 $this->_handler->expectArgumentsAt(1, "acceptStartToken", array(">", "*")); 346 $this->_handler->expectCallCount("acceptStartToken", 2); 347 $this->_handler->expectOnce("acceptTextToken", array("Frames", "*")); 348 $this->_handler->expectOnce("acceptEndToken", array("</frameset>", "*")); 349 $this->assertTrue($this->_lexer->parse("<frameset>Frames</frameset>")); 350 } 351 352 function testInputTag() { 353 $this->_handler->expectArgumentsAt(0, "acceptStartToken", array("<input", "*")); 354 $this->_handler->expectArgumentsAt(1, "acceptStartToken", array("name", "*")); 355 $this->_handler->expectArgumentsAt(2, "acceptStartToken", array("value", "*")); 356 $this->_handler->expectArgumentsAt(3, "acceptStartToken", array(">", "*")); 357 $this->_handler->expectArgumentsAt(0, "acceptAttributeToken", array("=a.b.c", "*")); 358 $this->_handler->expectArgumentsAt(1, "acceptAttributeToken", array("= d", "*")); 359 $this->assertTrue($this->_lexer->parse("<input name=a.b.c value = d>")); 360 } 361 362 function testEmptyLink() { 363 $this->_handler->expectArgumentsAt(0, "acceptStartToken", array("<a", "*")); 364 $this->_handler->expectArgumentsAt(1, "acceptStartToken", array(">", "*")); 365 $this->_handler->expectCallCount("acceptStartToken", 2); 366 $this->_handler->expectOnce("acceptEndToken", array("</a>", "*")); 367 $this->assertTrue($this->_lexer->parse("<html><a></a></html>")); 368 } 369 370 function testLabelledLink() { 371 $this->_handler->expectArgumentsAt(0, "acceptStartToken", array("<a", "*")); 372 $this->_handler->expectArgumentsAt(1, "acceptStartToken", array(">", "*")); 373 $this->_handler->expectCallCount("acceptStartToken", 2); 374 $this->_handler->expectOnce("acceptEndToken", array("</a>", "*")); 375 $this->_handler->expectArgumentsAt(0, "acceptTextToken", array("<html>", "*")); 376 $this->_handler->expectArgumentsAt(1, "acceptTextToken", array("label", "*")); 377 $this->_handler->expectArgumentsAt(2, "acceptTextToken", array("</html>", "*")); 378 $this->_handler->expectCallCount("acceptTextToken", 3); 379 $this->assertTrue($this->_lexer->parse("<html><a>label</a></html>")); 380 } 381 382 function testLinkAddress() { 383 $this->_handler->expectArgumentsAt(0, "acceptTextToken", array("<html>", "*")); 384 $this->_handler->expectArgumentsAt(1, "acceptTextToken", array("label", "*")); 385 $this->_handler->expectArgumentsAt(2, "acceptTextToken", array("</html>", "*")); 386 $this->_handler->expectCallCount("acceptTextToken", 3); 387 $this->_handler->expectArgumentsAt(0, "acceptStartToken", array("<a", "*")); 388 $this->_handler->expectArgumentsAt(1, "acceptStartToken", array("href", "*")); 389 $this->_handler->expectArgumentsAt(2, "acceptStartToken", array(">", "*")); 390 $this->_handler->expectCallCount("acceptStartToken", 3); 391 $this->_handler->expectArgumentsAt(0, "acceptAttributeToken", array("= '", "*")); 392 $this->_handler->expectArgumentsAt(1, "acceptAttributeToken", array("here.html", "*")); 393 $this->_handler->expectArgumentsAt(2, "acceptAttributeToken", array("'", "*")); 394 $this->_handler->expectCallCount("acceptAttributeToken", 3); 395 $this->assertTrue($this->_lexer->parse("<html><a href = 'here.html'>label</a></html>")); 396 } 397 398 function testEncodedLinkAddress() { 399 $this->_handler->expectArgumentsAt(0, "acceptTextToken", array("<html>", "*")); 400 $this->_handler->expectArgumentsAt(1, "acceptTextToken", array("label", "*")); 401 $this->_handler->expectArgumentsAt(2, "acceptTextToken", array("</html>", "*")); 402 $this->_handler->expectCallCount("acceptTextToken", 3); 403 $this->_handler->expectArgumentsAt(0, "acceptStartToken", array("<a", "*")); 404 $this->_handler->expectArgumentsAt(1, "acceptStartToken", array("href", "*")); 405 $this->_handler->expectArgumentsAt(2, "acceptStartToken", array(">", "*")); 406 $this->_handler->expectCallCount("acceptStartToken", 3); 407 $this->_handler->expectArgumentsAt(0, "acceptAttributeToken", array("= '", "*")); 408 $this->_handler->expectArgumentsAt(1, "acceptAttributeToken", array("here&there.html", "*")); 409 $this->_handler->expectArgumentsAt(2, "acceptAttributeToken", array("'", "*")); 410 $this->_handler->expectCallCount("acceptAttributeToken", 3); 411 $this->assertTrue($this->_lexer->parse("<html><a href = 'here&there.html'>label</a></html>")); 412 } 413 414 function testEmptyLinkWithId() { 415 $this->_handler->expectArgumentsAt(0, "acceptTextToken", array("<html>", "*")); 416 $this->_handler->expectArgumentsAt(1, "acceptTextToken", array("label", "*")); 417 $this->_handler->expectArgumentsAt(2, "acceptTextToken", array("</html>", "*")); 418 $this->_handler->expectCallCount("acceptTextToken", 3); 419 $this->_handler->expectArgumentsAt(0, "acceptStartToken", array("<a", "*")); 420 $this->_handler->expectArgumentsAt(1, "acceptStartToken", array("id", "*")); 421 $this->_handler->expectArgumentsAt(2, "acceptStartToken", array(">", "*")); 422 $this->_handler->expectCallCount("acceptStartToken", 3); 423 $this->_handler->expectArgumentsAt(0, "acceptAttributeToken", array("=\"", "*")); 424 $this->_handler->expectArgumentsAt(1, "acceptAttributeToken", array("0", "*")); 425 $this->_handler->expectArgumentsAt(2, "acceptAttributeToken", array("\"", "*")); 426 $this->_handler->expectCallCount("acceptAttributeToken", 3); 427 $this->assertTrue($this->_lexer->parse("<html><a id=\"0\">label</a></html>")); 428 } 429 430 function testComplexLink() { 431 $this->_handler->expectArgumentsAt(0, "acceptStartToken", array("<a", LEXER_ENTER)); 432 $this->_handler->expectArgumentsAt(1, "acceptStartToken", array("HREF", "*")); 433 $this->_handler->expectArgumentsAt(2, "acceptStartToken", array("bool", "*")); 434 $this->_handler->expectArgumentsAt(3, "acceptStartToken", array("Style", "*")); 435 $this->_handler->expectArgumentsAt(4, "acceptStartToken", array(">", LEXER_EXIT)); 436 $this->_handler->expectCallCount("acceptStartToken", 5); 437 $this->_handler->expectArgumentsAt(0, "acceptAttributeToken", array("= '", "*")); 438 $this->_handler->expectArgumentsAt(1, "acceptAttributeToken", array("here.html", LEXER_UNMATCHED)); 439 $this->_handler->expectArgumentsAt(2, "acceptAttributeToken", array("'", "*")); 440 $this->_handler->expectArgumentsAt(3, "acceptAttributeToken", array("=\"", "*")); 441 $this->_handler->expectArgumentsAt(4, "acceptAttributeToken", array("'coo", "*")); 442 $this->_handler->expectArgumentsAt(5, "acceptAttributeToken", array('\"', "*")); 443 $this->_handler->expectArgumentsAt(6, "acceptAttributeToken", array("l'", "*")); 444 $this->_handler->expectArgumentsAt(7, "acceptAttributeToken", array("\"", "*")); 445 $this->_handler->expectCallCount("acceptAttributeToken", 8); 446 $this->assertTrue($this->_lexer->parse("<HTML><a HREF = 'here.html' bool Style=\"'coo\\\"l'\">label</A></Html>")); 447 } 448 449 function testSubmit() { 450 $this->_handler->expectArgumentsAt(0, "acceptStartToken", array("<input", LEXER_ENTER)); 451 $this->_handler->expectArgumentsAt(1, "acceptStartToken", array("type", "*")); 452 $this->_handler->expectArgumentsAt(2, "acceptStartToken", array("name", "*")); 453 $this->_handler->expectArgumentsAt(3, "acceptStartToken", array("value", "*")); 454 $this->_handler->expectArgumentsAt(4, "acceptStartToken", array("/", "*")); 455 $this->_handler->expectArgumentsAt(5, "acceptStartToken", array(">", LEXER_EXIT)); 456 $this->_handler->expectCallCount("acceptStartToken", 6); 457 $this->assertTrue($this->_lexer->parse('<input type="submit" name="N" value="V" />')); 458 } 459 460 function testFramesParsedWithoutError() { 461 $this->assertTrue($this->_lexer->parse( 462 '<frameset><frame src="frame.html"></frameset>')); 463 $this->assertTrue($this->_lexer->parse( 464 '<frameset><frame src="frame.html"><noframes>Hello</noframes></frameset>')); 465 } 466 } 467 468 class TestOfTextExtraction extends UnitTestCase { 469 470 function testSpaceNormalisation() { 471 $this->assertEqual( 472 SimpleSaxParser::normalise("\nOne\tTwo \nThree\t"), 473 'One Two Three'); 474 } 475 476 function testTagSuppression() { 477 $this->assertEqual( 478 SimpleSaxParser::normalise('<b>Hello</b>'), 479 'Hello'); 480 } 481 482 function testAdjoiningTagSuppression() { 483 $this->assertEqual( 484 SimpleSaxParser::normalise('<b>Hello</b><em>Goodbye</em>'), 485 'HelloGoodbye'); 486 } 487 488 function testExtractImageAltTextWithDifferentQuotes() { 489 $this->assertEqual( 490 SimpleSaxParser::normalise('<img alt="One"><img alt=\'Two\'><img alt=Three>'), 491 'One Two Three'); 492 } 493 494 function testExtractImageAltTextMultipleTimes() { 495 $this->assertEqual( 496 SimpleSaxParser::normalise('<img alt="One"><img alt="Two"><img alt="Three">'), 497 'One Two Three'); 498 } 499 500 function testHtmlEntityTranslation() { 501 $this->assertEqual( 502 SimpleSaxParser::normalise('<>"&'), 503 '<>"&'); 504 } 505 } 506 507 class TestSimpleSaxParser extends SimpleSaxParser { 508 var $_lexer; 509 510 function TestSimpleSaxParser(&$listener, &$lexer) { 511 $this->_lexer = &$lexer; 512 $this->SimpleSaxParser($listener); 513 } 514 515 function &createLexer() { 516 return $this->_lexer; 517 } 518 } 519 520 Mock::generate("SimpleSaxListener"); 521 Mock::generate("SimpleLexer"); 522 523 class TestOfSaxGeneration extends UnitTestCase { 524 var $_listener; 525 var $_lexer; 526 527 function setUp() { 528 $this->_listener = &new MockSimpleSaxListener($this); 529 $this->_lexer = &new MockSimpleLexer($this); 530 $this->_parser = &new TestSimpleSaxParser($this->_listener, $this->_lexer); 531 } 532 533 function tearDown() { 534 $this->_listener->tally(); 535 $this->_lexer->tally(); 536 } 537 538 function testLexerFailure() { 539 $this->_lexer->setReturnValue("parse", false); 540 $this->assertFalse($this->_parser->parse("<html></html>")); 541 } 542 543 function testLexerSuccess() { 544 $this->_lexer->setReturnValue("parse", true); 545 $this->assertTrue($this->_parser->parse("<html></html>")); 546 } 547 548 function testSimpleLinkStart() { 549 $this->_parser->parse(""); 550 $this->_listener->expectOnce("startElement", array("a", array())); 551 $this->_listener->setReturnValue("startElement", true); 552 $this->assertTrue($this->_parser->acceptStartToken("<a", LEXER_ENTER)); 553 $this->assertTrue($this->_parser->acceptStartToken(">", LEXER_EXIT)); 554 } 555 556 function testSimpleTitleStart() { 557 $this->_parser->parse(""); 558 $this->_listener->expectOnce("startElement", array("title", array())); 559 $this->_listener->setReturnValue("startElement", true); 560 $this->assertTrue($this->_parser->acceptStartToken("<title", LEXER_ENTER)); 561 $this->assertTrue($this->_parser->acceptStartToken(">", LEXER_EXIT)); 562 } 563 564 function testLinkStart() { 565 $this->_parser->parse(""); 566 $this->_listener->expectOnce("startElement", array("a", array("href" => "here.html"))); 567 $this->_listener->setReturnValue("startElement", true); 568 $this->assertTrue($this->_parser->acceptStartToken("<a", LEXER_ENTER)); 569 $this->assertTrue($this->_parser->acceptStartToken("href", LEXER_MATCHED)); 570 $this->assertTrue($this->_parser->acceptAttributeToken("=\"", LEXER_ENTER)); 571 $this->assertTrue($this->_parser->acceptAttributeToken("here.html", LEXER_UNMATCHED)); 572 $this->assertTrue($this->_parser->acceptAttributeToken("\"", LEXER_EXIT)); 573 $this->assertTrue($this->_parser->acceptStartToken(">", LEXER_EXIT)); 574 } 575 576 function testLinkStartWithEncodedUrl() { 577 $this->_parser->parse(""); 578 $this->_listener->expectOnce( 579 "startElement", 580 array("a", array("href" => "here&there.html"))); 581 $this->_listener->setReturnValue("startElement", true); 582 $this->assertTrue($this->_parser->acceptStartToken("<a", LEXER_ENTER)); 583 $this->assertTrue($this->_parser->acceptStartToken("href", LEXER_MATCHED)); 584 $this->assertTrue($this->_parser->acceptAttributeToken("=\"", LEXER_ENTER)); 585 $this->assertTrue($this->_parser->acceptAttributeToken("here&there.html", LEXER_UNMATCHED)); 586 $this->assertTrue($this->_parser->acceptAttributeToken("\"", LEXER_EXIT)); 587 $this->assertTrue($this->_parser->acceptStartToken(">", LEXER_EXIT)); 588 } 589 590 function testLinkStartWithId() { 591 $this->_parser->parse(""); 592 $this->_listener->expectOnce( 593 "startElement", 594 array("a", array("id" => "0"))); 595 $this->_listener->setReturnValue("startElement", true); 596 $this->assertTrue($this->_parser->acceptStartToken("<a", LEXER_ENTER)); 597 $this->assertTrue($this->_parser->acceptStartToken("id", LEXER_MATCHED)); 598 $this->assertTrue($this->_parser->acceptAttributeToken("= \"", LEXER_ENTER)); 599 $this->assertTrue($this->_parser->acceptAttributeToken("0", LEXER_UNMATCHED)); 600 $this->assertTrue($this->_parser->acceptAttributeToken("\"", LEXER_EXIT)); 601 $this->assertTrue($this->_parser->acceptStartToken(">", LEXER_EXIT)); 602 } 603 604 function testLinkEnd() { 605 $this->_parser->parse(""); 606 $this->_listener->expectOnce("endElement", array("a")); 607 $this->_listener->setReturnValue("endElement", true); 608 $this->assertTrue($this->_parser->acceptEndToken("</a>", LEXER_SPECIAL)); 609 } 610 611 function testInput() { 612 $this->_parser->parse(""); 613 $this->_listener->expectOnce( 614 "startElement", 615 array("input", array("name" => "a"))); 616 $this->_listener->setReturnValue("startElement", true); 617 $this->assertTrue($this->_parser->acceptStartToken("<input", LEXER_ENTER)); 618 $this->assertTrue($this->_parser->acceptStartToken("name", LEXER_MATCHED)); 619 $this->assertTrue($this->_parser->acceptAttributeToken("= a", LEXER_SPECIAL)); 620 $this->assertTrue($this->_parser->acceptStartToken(">", LEXER_EXIT)); 621 } 622 623 function testButton() { 624 $this->_parser->parse(""); 625 $this->_listener->expectOnce( 626 "startElement", 627 array("button", array("name" => "a"))); 628 $this->_listener->setReturnValue("startElement", true); 629 $this->assertTrue($this->_parser->acceptStartToken("<button", LEXER_ENTER)); 630 $this->assertTrue($this->_parser->acceptStartToken("name", LEXER_MATCHED)); 631 $this->assertTrue($this->_parser->acceptAttributeToken("= a", LEXER_SPECIAL)); 632 $this->assertTrue($this->_parser->acceptStartToken(">", LEXER_EXIT)); 633 } 634 635 function testContent() { 636 $this->_parser->parse(""); 637 $this->_listener->expectOnce("addContent", array("stuff")); 638 $this->_listener->setReturnValue("addContent", true); 639 $this->assertTrue($this->_parser->acceptTextToken("stuff", LEXER_UNMATCHED)); 640 } 641 642 function testIgnore() { 643 $this->_parser->parse(""); 644 $this->_listener->expectNever("addContent"); 645 $this->assertTrue($this->_parser->ignore("stuff", LEXER_UNMATCHED)); 646 } 647 } 648?>