1<?php 2 3final class PHPASTParserTestCase extends PhutilTestCase { 4 5 public function testParser() { 6 if (!PhutilXHPASTBinary::isAvailable()) { 7 try { 8 PhutilXHPASTBinary::build(); 9 } catch (Exception $ex) { 10 $this->assertSkipped( 11 pht('%s is not built or not up to date.', 'xhpast')); 12 } 13 } 14 15 $dir = dirname(__FILE__).'/data/'; 16 foreach (Filesystem::listDirectory($dir) as $file) { 17 if (preg_match('/\.test$/', $file)) { 18 $this->executeParserTest($file, $dir.$file); 19 } 20 } 21 } 22 23 private function executeParserTest($name, $file) { 24 $contents = Filesystem::readFile($file); 25 $contents = preg_split('/^~{4,}\n/m', $contents); 26 27 if (count($contents) < 2) { 28 throw new Exception( 29 pht( 30 "Expected '%s' separating test case and results.", 31 '~~~~~~~~~~')); 32 } 33 34 list($data, $options, $expect) = array_merge($contents, array(null)); 35 36 $options = id(new PhutilSimpleOptions())->parse($options); 37 38 $type = null; 39 foreach ($options as $key => $value) { 40 switch ($key) { 41 case 'pass': 42 case 'fail-syntax': 43 case 'fail-parse': 44 if ($type !== null) { 45 throw new Exception( 46 pht( 47 'Test file "%s" unexpectedly specifies multiple expected '. 48 'test outcomes.', 49 $name)); 50 } 51 $type = $key; 52 break; 53 case 'comment': 54 // Human readable comment providing test case information. 55 break; 56 case 'rtrim': 57 // Allows construction of tests which rely on EOF without newlines. 58 $data = rtrim($data); 59 break; 60 default: 61 throw new Exception( 62 pht( 63 'Test file "%s" has unknown option "%s" in its options '. 64 'string.', 65 $name, 66 $key)); 67 } 68 } 69 70 if ($type === null) { 71 throw new Exception( 72 pht( 73 'Test file "%s" does not specify a test result (like "pass") in '. 74 'its options string.', 75 $name)); 76 } 77 78 $future = PhutilXHPASTBinary::getParserFuture($data); 79 list($err, $stdout, $stderr) = $future->resolve(); 80 81 switch ($type) { 82 case 'pass': 83 $this->assertEqual(0, $err, pht('Exit code for "%s".', $name)); 84 85 if (!strlen($expect)) { 86 // If there's no "expect" data in the test case, that's OK. 87 break; 88 } 89 90 try { 91 $stdout = phutil_json_decode($stdout); 92 } catch (PhutilJSONParserException $ex) { 93 throw new PhutilProxyException( 94 pht( 95 'Output for test file "%s" is not valid JSON.', 96 $name), 97 $ex); 98 } 99 100 $stdout_nice = $this->newReadableAST($stdout, $data); 101 102 $this->assertEqual( 103 $expect, 104 $stdout_nice, 105 pht('Parser output for "%s".', $name)); 106 break; 107 case 'fail-syntax': 108 $this->assertEqual(1, $err, pht('Exit code for "%s".', $name)); 109 $this->assertTrue( 110 (bool)preg_match('/syntax error/', $stderr), 111 pht('Expect "syntax error" in stderr or "%s".', $name)); 112 break; 113 default: 114 throw new Exception( 115 pht( 116 'Unknown PHPAST parser test type "%s"!', 117 $type)); 118 } 119 } 120 121 /** 122 * Build a human-readable, stable, relatively diff-able string representing 123 * an AST (both the node tree itself and the accompanying token stream) for 124 * use in unit tests. 125 */ 126 private function newReadableAST(array $data, $source) { 127 $tree = new XHPASTTree($data['tree'], $data['stream'], $source); 128 129 $root = $tree->getRootNode(); 130 131 $depth = 0; 132 $list = $this->newReadableTreeLines($root, $depth); 133 134 return implode('', $list); 135 } 136 137 private function newReadableTreeLines(AASTNode $node, $depth) { 138 $out = array(); 139 140 try { 141 $type_name = $node->getTypeName(); 142 } catch (Exception $ex) { 143 $type_name = sprintf('<INVALID TYPE "%s">', $node->getTypeID()); 144 } 145 146 $out[] = $this->newBlock($depth, '*', $type_name); 147 148 $tokens = $node->getTokens(); 149 150 if ($tokens) { 151 $l = head_key($tokens); 152 $r = last_key($tokens); 153 } else { 154 $l = null; 155 $r = null; 156 } 157 158 $items = array(); 159 160 $child_token_map = array(); 161 162 $children = $node->getChildren(); 163 foreach ($children as $child) { 164 $child_tokens = $child->getTokens(); 165 166 if ($child_tokens) { 167 $child_l = head_key($child_tokens); 168 $child_r = last_key($child_tokens); 169 } else { 170 $child_l = null; 171 $child_r = null; 172 } 173 174 if ($l !== null) { 175 for ($ii = $l; $ii < $child_l; $ii++) { 176 $items[] = $tokens[$ii]; 177 } 178 } 179 180 $items[] = $child; 181 182 if ($child_r !== null) { 183 // NOTE: In some cases, child nodes do not appear in token order. 184 // That is, the 4th child of a node may use tokens that appear 185 // between children 2 and 3. Ideally, we wouldn't have cases of 186 // this and wouldn't have a positional AST. 187 188 // Work around this by: never moving the token cursor backwards; and 189 // explicitly preventing tokens appearing in any child from being 190 // printed at top level. 191 192 for ($ii = $child_l; $ii <= $child_r; $ii++) { 193 if (!isset($tokens[$ii])) { 194 continue; 195 } 196 $child_token_map[$tokens[$ii]->getTokenID()] = true; 197 } 198 199 $l = max($l, $child_r + 1); 200 } else { 201 $l = null; 202 } 203 } 204 205 if ($l !== null) { 206 for ($ii = $l; $ii <= $r; $ii++) { 207 $items[] = $tokens[$ii]; 208 } 209 } 210 211 // See above. If we have tokens in the list which are part of a 212 // child node that appears later, remove them now. 213 foreach ($items as $key => $item) { 214 if ($item instanceof AASTToken) { 215 $token = $item; 216 $token_id = $token->getTokenID(); 217 218 if (isset($child_token_map[$token_id])) { 219 unset($items[$key]); 220 } 221 } 222 } 223 224 foreach ($items as $item) { 225 if ($item instanceof AASTNode) { 226 $lines = $this->newReadableTreeLines($item, $depth + 1); 227 foreach ($lines as $line) { 228 $out[] = $line; 229 } 230 } else { 231 $token_value = $item->getValue(); 232 233 $out[] = $this->newBlock($depth + 1, '>', $token_value); 234 } 235 } 236 237 return $out; 238 } 239 240 private function newBlock($depth, $type, $text) { 241 $output_width = 80; 242 $usable_width = ($output_width - $depth - 2); 243 244 $must_escape = false; 245 246 // We must escape the text if it isn't just simple printable characters. 247 if (preg_match('/[ \\\\\\r\\n\\t\\"]/', $text)) { 248 $must_escape = true; 249 } 250 251 // We must escape the text if it has trailing whitespace. 252 if (preg_match('/ \z/', $text)) { 253 $must_escape = true; 254 } 255 256 // We must escape the text if it won't fit on a single line. 257 if (strlen($text) > $usable_width) { 258 $must_escape = true; 259 } 260 261 if (!$must_escape) { 262 $lines = array($text); 263 } else { 264 $vector = phutil_utf8v_combined($text); 265 266 $escape_map = array( 267 "\r" => '\\r', 268 "\n" => '\\n', 269 "\t" => '\\t', 270 '"' => '\\"', 271 '\\' => '\\', 272 ); 273 274 $escaped = array(); 275 foreach ($vector as $key => $word) { 276 if (isset($escape_map[$word])) { 277 $vector[$key] = $escape_map[$word]; 278 } 279 } 280 281 282 $line_l = '"'; 283 $line_r = '"'; 284 285 $max_width = ($usable_width - strlen($line_l) - strlen($line_r)); 286 287 $line = ''; 288 $len = 0; 289 290 $lines = array(); 291 foreach ($vector as $word) { 292 $word_length = phutil_utf8_console_strlen($word); 293 294 if ($len + $word_length > $max_width) { 295 $lines[] = $line_l.$line.$line_r; 296 297 $line = ''; 298 $len = 0; 299 } 300 301 $line .= $word; 302 $len += $word_length; 303 } 304 305 $lines[] = $line_l.$line.$line_r; 306 } 307 308 $is_first = true; 309 $indent = str_repeat(' ', $depth); 310 311 $output = array(); 312 foreach ($lines as $line) { 313 if ($is_first) { 314 $marker = $type; 315 $is_first = false; 316 } else { 317 $marker = '.'; 318 } 319 320 $output[] = sprintf( 321 "%s%s %s\n", 322 $indent, 323 $marker, 324 $line); 325 } 326 327 return implode('', $output); 328 } 329 330} 331