1#!/usr/bin/env python
2# Copyright 2016 The PDFium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6import functools
7import multiprocessing
8import optparse
9import os
10import re
11import shutil
12import subprocess
13import sys
14
15# pylint: disable=relative-import
16import common
17import gold
18import pngdiffer
19import suppressor
20
21# Arbitrary timestamp, expressed in seconds since the epoch, used to make sure
22# that tests that depend on the current time are stable. Happens to be the
23# timestamp of the first commit to repo, 2014/5/9 17:48:50.
24TEST_SEED_TIME = "1399672130"
25
26# List of test types that should run text tests instead of pixel tests.
27TEXT_TESTS = ['javascript']
28
29
30class KeyboardInterruptError(Exception):
31  pass
32
33
34# Nomenclature:
35#   x_root - "x"
36#   x_filename - "x.ext"
37#   x_path - "path/to/a/b/c/x.ext"
38#   c_dir - "path/to/a/b/c"
39
40
41def TestOneFileParallel(this, test_case):
42  """Wrapper to call GenerateAndTest() and redirect output to stdout."""
43  try:
44    input_filename, source_dir = test_case
45    result = this.GenerateAndTest(input_filename, source_dir)
46    return (result, input_filename, source_dir)
47  except KeyboardInterrupt:
48    raise KeyboardInterruptError()
49
50
51def DeleteFiles(files):
52  """Utility function to delete a list of files"""
53  for f in files:
54    if os.path.exists(f):
55      os.remove(f)
56
57
58class TestRunner:
59
60  def __init__(self, dirname):
61    # Currently the only used directories are corpus, javascript, and pixel,
62    # which all correspond directly to the type for the test being run. In the
63    # future if there are tests that don't have this clean correspondence, then
64    # an argument for the type will need to be added.
65    self.test_dir = dirname
66    self.test_type = dirname
67    self.delete_output_on_success = False
68    self.enforce_expected_images = False
69    self.oneshot_renderer = False
70
71  # GenerateAndTest returns a tuple <success, outputfiles> where
72  # success is a boolean indicating whether the tests passed comparison
73  # tests and outputfiles is a list tuples:
74  #          (path_to_image, md5_hash_of_pixelbuffer)
75  def GenerateAndTest(self, input_filename, source_dir):
76    input_root, _ = os.path.splitext(input_filename)
77    pdf_path = os.path.join(self.working_dir, input_root + '.pdf')
78
79    # Remove any existing generated images from previous runs.
80    actual_images = self.image_differ.GetActualFiles(input_filename, source_dir,
81                                                     self.working_dir)
82    DeleteFiles(actual_images)
83
84    sys.stdout.flush()
85
86    raised_exception = self.Generate(source_dir, input_filename, input_root,
87                                     pdf_path)
88
89    if raised_exception is not None:
90      print 'FAILURE: %s; %s' % (input_filename, raised_exception)
91      return False, []
92
93    results = []
94    if self.test_type in TEXT_TESTS:
95      expected_txt_path = os.path.join(source_dir, input_root + '_expected.txt')
96      raised_exception = self.TestText(input_filename, input_root,
97                                       expected_txt_path, pdf_path)
98    else:
99      use_ahem = 'use_ahem' in source_dir
100      raised_exception, results = self.TestPixel(pdf_path, use_ahem)
101
102    if raised_exception is not None:
103      print 'FAILURE: %s; %s' % (input_filename, raised_exception)
104      return False, results
105
106    if actual_images:
107      if self.image_differ.HasDifferences(input_filename, source_dir,
108                                          self.working_dir):
109        self.RegenerateIfNeeded_(input_filename, source_dir)
110        return False, results
111    else:
112      if (self.enforce_expected_images and
113          not self.test_suppressor.IsImageDiffSuppressed(input_filename)):
114        self.RegenerateIfNeeded_(input_filename, source_dir)
115        print 'FAILURE: %s; Missing expected images' % input_filename
116        return False, results
117
118    if self.delete_output_on_success:
119      DeleteFiles(actual_images)
120    return True, results
121
122  def RegenerateIfNeeded_(self, input_filename, source_dir):
123    if (not self.options.regenerate_expected or
124        self.test_suppressor.IsResultSuppressed(input_filename) or
125        self.test_suppressor.IsImageDiffSuppressed(input_filename)):
126      return
127
128    platform_only = (self.options.regenerate_expected == 'platform')
129    self.image_differ.Regenerate(input_filename, source_dir, self.working_dir,
130                                 platform_only)
131
132  def Generate(self, source_dir, input_filename, input_root, pdf_path):
133    original_path = os.path.join(source_dir, input_filename)
134    input_path = os.path.join(source_dir, input_root + '.in')
135
136    input_event_path = os.path.join(source_dir, input_root + '.evt')
137    if os.path.exists(input_event_path):
138      output_event_path = os.path.splitext(pdf_path)[0] + '.evt'
139      shutil.copyfile(input_event_path, output_event_path)
140
141    if not os.path.exists(input_path):
142      if os.path.exists(original_path):
143        shutil.copyfile(original_path, pdf_path)
144      return None
145
146    sys.stdout.flush()
147
148    return common.RunCommand([
149        sys.executable, self.fixup_path, '--output-dir=' + self.working_dir,
150        input_path
151    ])
152
153  def TestText(self, input_filename, input_root, expected_txt_path, pdf_path):
154    txt_path = os.path.join(self.working_dir, input_root + '.txt')
155
156    with open(txt_path, 'w') as outfile:
157      cmd_to_run = [
158          self.pdfium_test_path, '--send-events', '--time=' + TEST_SEED_TIME
159      ]
160
161      if self.options.disable_javascript:
162        cmd_to_run.append('--disable-javascript')
163
164      if self.options.disable_xfa:
165        cmd_to_run.append('--disable-xfa')
166
167      cmd_to_run.append(pdf_path)
168      subprocess.check_call(cmd_to_run, stdout=outfile)
169
170    # If the expected file does not exist, the output is expected to be empty.
171    if not os.path.exists(expected_txt_path):
172      return self._VerifyEmptyText(txt_path)
173
174    # If JavaScript is disabled, the output should be empty.
175    # However, if the test is suppressed and JavaScript is disabled, do not
176    # verify that the text is empty so the suppressed test does not surprise.
177    if (self.options.disable_javascript and
178        not self.test_suppressor.IsResultSuppressed(input_filename)):
179      return self._VerifyEmptyText(txt_path)
180
181    cmd = [sys.executable, self.text_diff_path, expected_txt_path, txt_path]
182    return common.RunCommand(cmd)
183
184  def _VerifyEmptyText(self, txt_path):
185    try:
186      with open(txt_path, "r") as txt_file:
187        txt_data = txt_file.readlines()
188      if not len(txt_data):
189        return None
190      sys.stdout.write('Unexpected output:\n')
191      for line in txt_data:
192        sys.stdout.write(line)
193      raise Exception('%s should be empty.' % txt_path)
194    except Exception as e:
195      return e
196
197  def TestPixel(self, pdf_path, use_ahem):
198    cmd_to_run = [
199        self.pdfium_test_path, '--send-events', '--png', '--md5',
200        '--time=' + TEST_SEED_TIME
201    ]
202
203    if self.oneshot_renderer:
204      cmd_to_run.append('--render-oneshot')
205
206    if use_ahem:
207      cmd_to_run.append('--font-dir=%s' % self.font_dir)
208
209    if self.options.disable_javascript:
210      cmd_to_run.append('--disable-javascript')
211
212    if self.options.disable_xfa:
213      cmd_to_run.append('--disable-xfa')
214
215    if self.options.reverse_byte_order:
216      cmd_to_run.append('--reverse-byte-order')
217
218    cmd_to_run.append(pdf_path)
219    return common.RunCommandExtractHashedFiles(cmd_to_run)
220
221  def HandleResult(self, input_filename, input_path, result):
222    success, image_paths = result
223
224    if image_paths:
225      for img_path, md5_hash in image_paths:
226        # The output filename without image extension becomes the test name.
227        # For example, "/path/to/.../testing/corpus/example_005.pdf.0.png"
228        # becomes "example_005.pdf.0".
229        test_name = os.path.splitext(os.path.split(img_path)[1])[0]
230
231        matched = "suppressed"
232        if not self.test_suppressor.IsResultSuppressed(input_filename):
233          matched = self.gold_baseline.MatchLocalResult(test_name, md5_hash)
234          if matched == gold.GoldBaseline.MISMATCH:
235            print 'Skia Gold hash mismatch for test case: %s' % test_name
236          elif matched == gold.GoldBaseline.NO_BASELINE:
237            print 'No Skia Gold baseline found for test case: %s' % test_name
238
239        if self.gold_results:
240          self.gold_results.AddTestResult(test_name, md5_hash, img_path,
241                                          matched)
242
243    if self.test_suppressor.IsResultSuppressed(input_filename):
244      self.result_suppressed_cases.append(input_filename)
245      if success:
246        self.surprises.append(input_path)
247    else:
248      if not success:
249        self.failures.append(input_path)
250
251  def Run(self):
252    # Running a test defines a number of attributes on the fly.
253    # pylint: disable=attribute-defined-outside-init
254
255    parser = optparse.OptionParser()
256
257    parser.add_option(
258        '--build-dir',
259        default=os.path.join('out', 'Debug'),
260        help='relative path from the base source directory')
261
262    parser.add_option(
263        '-j',
264        default=multiprocessing.cpu_count(),
265        dest='num_workers',
266        type='int',
267        help='run NUM_WORKERS jobs in parallel')
268
269    parser.add_option(
270        '--disable-javascript',
271        action="store_true",
272        dest="disable_javascript",
273        help='Prevents JavaScript from executing in PDF files.')
274
275    parser.add_option(
276        '--disable-xfa',
277        action="store_true",
278        dest="disable_xfa",
279        help='Prevents processing XFA forms.')
280
281    parser.add_option(
282        '--gold_properties',
283        default='',
284        dest="gold_properties",
285        help='Key value pairs that are written to the top level '
286        'of the JSON file that is ingested by Gold.')
287
288    parser.add_option(
289        '--gold_key',
290        default='',
291        dest="gold_key",
292        help='Key value pairs that are added to the "key" field '
293        'of the JSON file that is ingested by Gold.')
294
295    parser.add_option(
296        '--gold_output_dir',
297        default='',
298        dest="gold_output_dir",
299        help='Path of where to write the JSON output to be '
300        'uploaded to Gold.')
301
302    parser.add_option(
303        '--gold_ignore_hashes',
304        default='',
305        dest="gold_ignore_hashes",
306        help='Path to a file with MD5 hashes we wish to ignore.')
307
308    parser.add_option(
309        '--regenerate_expected',
310        default='',
311        dest="regenerate_expected",
312        help='Regenerates expected images. Valid values are '
313        '"all" to regenerate all expected pngs, and '
314        '"platform" to regenerate only platform-specific '
315        'expected pngs.')
316
317    parser.add_option(
318        '--reverse-byte-order',
319        action='store_true',
320        dest="reverse_byte_order",
321        help='Run image-based tests using --reverse-byte-order.')
322
323    parser.add_option(
324        '--ignore_errors',
325        action="store_true",
326        dest="ignore_errors",
327        help='Prevents the return value from being non-zero '
328        'when image comparison fails.')
329
330    self.options, self.args = parser.parse_args()
331
332    if (self.options.regenerate_expected and
333        self.options.regenerate_expected not in ['all', 'platform']):
334      print 'FAILURE: --regenerate_expected must be "all" or "platform"'
335      return 1
336
337    finder = common.DirectoryFinder(self.options.build_dir)
338    self.fixup_path = finder.ScriptPath('fixup_pdf_template.py')
339    self.text_diff_path = finder.ScriptPath('text_diff.py')
340    self.font_dir = os.path.join(finder.TestingDir(), 'resources', 'fonts')
341
342    self.source_dir = finder.TestingDir()
343    if self.test_dir != 'corpus':
344      test_dir = finder.TestingDir(os.path.join('resources', self.test_dir))
345    else:
346      test_dir = finder.TestingDir(self.test_dir)
347
348    self.pdfium_test_path = finder.ExecutablePath('pdfium_test')
349    if not os.path.exists(self.pdfium_test_path):
350      print "FAILURE: Can't find test executable '%s'" % self.pdfium_test_path
351      print 'Use --build-dir to specify its location.'
352      return 1
353
354    self.working_dir = finder.WorkingDir(os.path.join('testing', self.test_dir))
355    shutil.rmtree(self.working_dir, ignore_errors=True)
356    os.makedirs(self.working_dir)
357
358    self.features = subprocess.check_output(
359        [self.pdfium_test_path, '--show-config']).strip().split(',')
360    self.test_suppressor = suppressor.Suppressor(
361        finder, self.features, self.options.disable_javascript,
362        self.options.disable_xfa)
363    self.image_differ = pngdiffer.PNGDiffer(finder,
364                                            self.options.reverse_byte_order)
365    error_message = self.image_differ.CheckMissingTools(
366        self.options.regenerate_expected)
367    if error_message:
368      print "FAILURE: %s" % error_message
369      return 1
370
371    self.gold_baseline = gold.GoldBaseline(self.options.gold_properties)
372
373    walk_from_dir = finder.TestingDir(test_dir)
374
375    self.test_cases = []
376    self.execution_suppressed_cases = []
377    input_file_re = re.compile('^.+[.](in|pdf)$')
378    if self.args:
379      for file_name in self.args:
380        file_name.replace('.pdf', '.in')
381        input_path = os.path.join(walk_from_dir, file_name)
382        if not os.path.isfile(input_path):
383          print "Can't find test file '%s'" % file_name
384          return 1
385
386        self.test_cases.append((os.path.basename(input_path),
387                                os.path.dirname(input_path)))
388    else:
389      for file_dir, _, filename_list in os.walk(walk_from_dir):
390        for input_filename in filename_list:
391          if input_file_re.match(input_filename):
392            input_path = os.path.join(file_dir, input_filename)
393            if self.test_suppressor.IsExecutionSuppressed(input_path):
394              self.execution_suppressed_cases.append(input_path)
395            else:
396              if os.path.isfile(input_path):
397                self.test_cases.append((input_filename, file_dir))
398
399    self.test_cases.sort()
400    self.failures = []
401    self.surprises = []
402    self.result_suppressed_cases = []
403
404    # Collect Gold results if an output directory was named.
405    self.gold_results = None
406    if self.options.gold_output_dir:
407      self.gold_results = gold.GoldResults(
408          self.test_type, self.options.gold_output_dir,
409          self.options.gold_properties, self.options.gold_key,
410          self.options.gold_ignore_hashes)
411
412    if self.options.num_workers > 1 and len(self.test_cases) > 1:
413      try:
414        pool = multiprocessing.Pool(self.options.num_workers)
415        worker_func = functools.partial(TestOneFileParallel, self)
416
417        worker_results = pool.imap(worker_func, self.test_cases)
418        for worker_result in worker_results:
419          result, input_filename, source_dir = worker_result
420          input_path = os.path.join(source_dir, input_filename)
421
422          self.HandleResult(input_filename, input_path, result)
423
424      except KeyboardInterrupt:
425        pool.terminate()
426      finally:
427        pool.close()
428        pool.join()
429    else:
430      for test_case in self.test_cases:
431        input_filename, input_file_dir = test_case
432        result = self.GenerateAndTest(input_filename, input_file_dir)
433        self.HandleResult(input_filename,
434                          os.path.join(input_file_dir, input_filename), result)
435
436    if self.gold_results:
437      self.gold_results.WriteResults()
438
439    if self.surprises:
440      self.surprises.sort()
441      print '\n\nUnexpected Successes:'
442      for surprise in self.surprises:
443        print surprise
444
445    if self.failures:
446      self.failures.sort()
447      print '\n\nSummary of Failures:'
448      for failure in self.failures:
449        print failure
450
451    self._PrintSummary()
452
453    if self.failures:
454      if not self.options.ignore_errors:
455        return 1
456
457    return 0
458
459  def _PrintSummary(self):
460    number_test_cases = len(self.test_cases)
461    number_failures = len(self.failures)
462    number_suppressed = len(self.result_suppressed_cases)
463    number_successes = number_test_cases - number_failures - number_suppressed
464    number_surprises = len(self.surprises)
465    print
466    print 'Test cases executed: %d' % number_test_cases
467    print '  Successes: %d' % number_successes
468    print '  Suppressed: %d' % number_suppressed
469    print '    Surprises: %d' % number_surprises
470    print '  Failures: %d' % number_failures
471    print
472    print 'Test cases not executed: %d' % len(self.execution_suppressed_cases)
473
474  def SetDeleteOutputOnSuccess(self, new_value):
475    """Set whether to delete generated output if the test passes."""
476    self.delete_output_on_success = new_value
477
478  def SetEnforceExpectedImages(self, new_value):
479    """Set whether to enforce that each test case provide an expected image."""
480    self.enforce_expected_images = new_value
481
482  def SetOneShotRenderer(self, new_value):
483    """Set whether to use the oneshot renderer. """
484    self.oneshot_renderer = new_value
485