1<?php
2
3final class PHPASTParserTestCase extends PhutilTestCase {
4
5  public function testParser() {
6    if (!PhutilXHPASTBinary::isAvailable()) {
7      try {
8        PhutilXHPASTBinary::build();
9      } catch (Exception $ex) {
10        $this->assertSkipped(
11          pht('%s is not built or not up to date.', 'xhpast'));
12      }
13    }
14
15    $dir = dirname(__FILE__).'/data/';
16    foreach (Filesystem::listDirectory($dir) as $file) {
17      if (preg_match('/\.test$/', $file)) {
18        $this->executeParserTest($file, $dir.$file);
19      }
20    }
21  }
22
23  private function executeParserTest($name, $file) {
24    $contents = Filesystem::readFile($file);
25    $contents = preg_split('/^~{4,}\n/m', $contents);
26
27    if (count($contents) < 2) {
28      throw new Exception(
29        pht(
30          "Expected '%s' separating test case and results.",
31          '~~~~~~~~~~'));
32    }
33
34    list($data, $options, $expect) = array_merge($contents, array(null));
35
36    $options = id(new PhutilSimpleOptions())->parse($options);
37
38    $type = null;
39    foreach ($options as $key => $value) {
40      switch ($key) {
41        case 'pass':
42        case 'fail-syntax':
43        case 'fail-parse':
44          if ($type !== null) {
45            throw new Exception(
46              pht(
47                'Test file "%s" unexpectedly specifies multiple expected '.
48                'test outcomes.',
49                $name));
50          }
51          $type = $key;
52          break;
53        case 'comment':
54          // Human readable comment providing test case information.
55          break;
56        case 'rtrim':
57          // Allows construction of tests which rely on EOF without newlines.
58          $data = rtrim($data);
59          break;
60        default:
61          throw new Exception(
62            pht(
63              'Test file "%s" has unknown option "%s" in its options '.
64              'string.',
65              $name,
66              $key));
67      }
68    }
69
70    if ($type === null) {
71      throw new Exception(
72        pht(
73          'Test file "%s" does not specify a test result (like "pass") in '.
74          'its options string.',
75          $name));
76    }
77
78    $future = PhutilXHPASTBinary::getParserFuture($data);
79    list($err, $stdout, $stderr) = $future->resolve();
80
81    switch ($type) {
82      case 'pass':
83        $this->assertEqual(0, $err, pht('Exit code for "%s".', $name));
84
85        if (!strlen($expect)) {
86          // If there's no "expect" data in the test case, that's OK.
87          break;
88        }
89
90        try {
91          $stdout = phutil_json_decode($stdout);
92        } catch (PhutilJSONParserException $ex) {
93          throw new PhutilProxyException(
94            pht(
95              'Output for test file "%s" is not valid JSON.',
96              $name),
97            $ex);
98        }
99
100        $stdout_nice = $this->newReadableAST($stdout, $data);
101
102        $this->assertEqual(
103          $expect,
104          $stdout_nice,
105          pht('Parser output for "%s".', $name));
106        break;
107      case 'fail-syntax':
108        $this->assertEqual(1, $err, pht('Exit code for "%s".', $name));
109        $this->assertTrue(
110          (bool)preg_match('/syntax error/', $stderr),
111          pht('Expect "syntax error" in stderr or "%s".', $name));
112        break;
113      default:
114        throw new Exception(
115          pht(
116            'Unknown PHPAST parser test type "%s"!',
117            $type));
118    }
119  }
120
121  /**
122   * Build a human-readable, stable, relatively diff-able string representing
123   * an AST (both the node tree itself and the accompanying token stream) for
124   * use in unit tests.
125   */
126  private function newReadableAST(array $data, $source) {
127    $tree = new XHPASTTree($data['tree'], $data['stream'], $source);
128
129    $root = $tree->getRootNode();
130
131    $depth = 0;
132    $list = $this->newReadableTreeLines($root, $depth);
133
134    return implode('', $list);
135  }
136
137  private function newReadableTreeLines(AASTNode $node, $depth) {
138    $out = array();
139
140    try {
141      $type_name = $node->getTypeName();
142    } catch (Exception $ex) {
143      $type_name = sprintf('<INVALID TYPE "%s">', $node->getTypeID());
144    }
145
146    $out[] = $this->newBlock($depth, '*', $type_name);
147
148    $tokens = $node->getTokens();
149
150    if ($tokens) {
151      $l = head_key($tokens);
152      $r = last_key($tokens);
153    } else {
154      $l = null;
155      $r = null;
156    }
157
158    $items = array();
159
160    $child_token_map = array();
161
162    $children = $node->getChildren();
163    foreach ($children as $child) {
164      $child_tokens = $child->getTokens();
165
166      if ($child_tokens) {
167        $child_l = head_key($child_tokens);
168        $child_r = last_key($child_tokens);
169      } else {
170        $child_l = null;
171        $child_r = null;
172      }
173
174      if ($l !== null) {
175        for ($ii = $l; $ii < $child_l; $ii++) {
176          $items[] = $tokens[$ii];
177        }
178      }
179
180      $items[] = $child;
181
182      if ($child_r !== null) {
183        // NOTE: In some cases, child nodes do not appear in token order.
184        // That is, the 4th child of a node may use tokens that appear
185        // between children 2 and 3. Ideally, we wouldn't have cases of
186        // this and wouldn't have a positional AST.
187
188        // Work around this by: never moving the token cursor backwards; and
189        // explicitly preventing tokens appearing in any child from being
190        // printed at top level.
191
192        for ($ii = $child_l; $ii <= $child_r; $ii++) {
193          if (!isset($tokens[$ii])) {
194            continue;
195          }
196          $child_token_map[$tokens[$ii]->getTokenID()] = true;
197        }
198
199        $l = max($l, $child_r + 1);
200      } else {
201        $l = null;
202      }
203    }
204
205    if ($l !== null) {
206      for ($ii = $l; $ii <= $r; $ii++) {
207        $items[] = $tokens[$ii];
208      }
209    }
210
211    // See above. If we have tokens in the list which are part of a
212    // child node that appears later, remove them now.
213    foreach ($items as $key => $item) {
214      if ($item instanceof AASTToken) {
215        $token = $item;
216        $token_id = $token->getTokenID();
217
218        if (isset($child_token_map[$token_id])) {
219          unset($items[$key]);
220        }
221      }
222    }
223
224    foreach ($items as $item) {
225      if ($item instanceof AASTNode) {
226        $lines = $this->newReadableTreeLines($item, $depth + 1);
227        foreach ($lines as $line) {
228          $out[] = $line;
229        }
230      } else {
231        $token_value = $item->getValue();
232
233        $out[] = $this->newBlock($depth + 1, '>', $token_value);
234      }
235    }
236
237    return $out;
238  }
239
240  private function newBlock($depth, $type, $text) {
241    $output_width = 80;
242    $usable_width = ($output_width - $depth - 2);
243
244    $must_escape = false;
245
246    // We must escape the text if it isn't just simple printable characters.
247    if (preg_match('/[ \\\\\\r\\n\\t\\"]/', $text)) {
248      $must_escape = true;
249    }
250
251    // We must escape the text if it has trailing whitespace.
252    if (preg_match('/ \z/', $text)) {
253      $must_escape = true;
254    }
255
256    // We must escape the text if it won't fit on a single line.
257    if (strlen($text) > $usable_width) {
258      $must_escape = true;
259    }
260
261    if (!$must_escape) {
262      $lines = array($text);
263    } else {
264      $vector = phutil_utf8v_combined($text);
265
266      $escape_map = array(
267        "\r" => '\\r',
268        "\n" => '\\n',
269        "\t" => '\\t',
270        '"' => '\\"',
271        '\\' => '\\',
272      );
273
274      $escaped = array();
275      foreach ($vector as $key => $word) {
276        if (isset($escape_map[$word])) {
277          $vector[$key] = $escape_map[$word];
278        }
279      }
280
281
282      $line_l = '"';
283      $line_r = '"';
284
285      $max_width = ($usable_width - strlen($line_l) - strlen($line_r));
286
287      $line = '';
288      $len = 0;
289
290      $lines = array();
291      foreach ($vector as $word) {
292        $word_length = phutil_utf8_console_strlen($word);
293
294        if ($len + $word_length > $max_width) {
295          $lines[] = $line_l.$line.$line_r;
296
297          $line = '';
298          $len = 0;
299        }
300
301        $line .= $word;
302        $len += $word_length;
303      }
304
305      $lines[] = $line_l.$line.$line_r;
306    }
307
308    $is_first = true;
309    $indent = str_repeat(' ', $depth);
310
311    $output = array();
312    foreach ($lines as $line) {
313      if ($is_first) {
314        $marker = $type;
315        $is_first = false;
316      } else {
317        $marker = '.';
318      }
319
320      $output[] = sprintf(
321        "%s%s %s\n",
322        $indent,
323        $marker,
324        $line);
325    }
326
327    return implode('', $output);
328  }
329
330}
331