1#!/usr/bin/python
2#
3# Copyright (c) 2012 The Chromium Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6"""Creates a GN include file for building FFmpeg from source.
7
8The way this works is a bit silly but it's easier than reverse engineering
9FFmpeg's configure scripts and Makefiles and manually maintaining chromium
10build files. It scans through build directories for object files then does a
11reverse lookup against the FFmpeg source tree to find the corresponding C or
12assembly file.
13
14Running build_ffmpeg.py on each supported platform for all architectures is
15required prior to running this script.  See build_ffmpeg.py for details as well
16as the documentation at:
17
18https://docs.google.com/document/d/14bqZ9NISsyEO3948wehhJ7wc9deTIz-yHUhF1MQp7Po/edit
19
20Once you've built all platforms and architectures you may run this script.
21"""
22
23__author__ = 'scherkus@chromium.org (Andrew Scherkus)'
24
25import collections
26import copy
27import datetime
28from enum import enum
29import fnmatch
30import credits_updater
31import itertools
32import optparse
33import os
34import re
35import shutil
36import string
37import subprocess
38import sys
39
40COPYRIGHT = """# Copyright %d The Chromium Authors. All rights reserved.
41# Use of this source code is governed by a BSD-style license that can be
42# found in the LICENSE file.
43
44# NOTE: this file is autogenerated by ffmpeg/chromium/scripts/generate_gn.py
45
46""" % (
47    datetime.datetime.now().year)
48
49GN_HEADER = """import("//build/config/arm.gni")
50import("ffmpeg_options.gni")
51
52# Declare empty versions of each variable for easier +=ing later.
53ffmpeg_c_sources = []
54ffmpeg_gas_sources = []
55ffmpeg_asm_sources = []
56
57use_linux_config = is_linux || is_fuchsia
58
59"""
60GN_CONDITION_BEGIN = """if (%s) {
61"""
62GN_CONDITION_END = """}
63
64"""
65GN_C_SOURCES_BEGIN = """ffmpeg_c_sources += [
66"""
67GN_GAS_SOURCES_BEGIN = """ffmpeg_gas_sources += [
68"""
69GN_NASM_SOURCES_BEGIN = """ffmpeg_asm_sources += [
70"""
71GN_SOURCE_ITEM = """  "%s",
72"""
73GN_SOURCE_END = """]
74"""
75
76# Controls conditional stanza generation.
77Attr = enum('ARCHITECTURE', 'TARGET', 'PLATFORM')
78SUPPORT_MATRIX = {
79    Attr.ARCHITECTURE:
80        set(['ia32', 'x64', 'arm', 'arm64', 'arm-neon', 'mipsel', 'mips64el']),
81    Attr.TARGET:
82        set(['Chromium', 'Chrome', 'ChromeOS']),
83    Attr.PLATFORM:
84        set(['android', 'linux', 'win', 'mac'])
85}
86
87
88def NormalizeFilename(name):
89  """Removes leading path separators in an attempt to normalize paths."""
90  return string.lstrip(name, os.sep)
91
92
93def CleanObjectFiles(object_files):
94  """Removes unneeded object files due to linker errors, binary size, etc...
95
96  Args:
97    object_files: List of object files that needs cleaning.
98  """
99  blacklist = [
100      'libavcodec/inverse.o',  # Includes libavutil/inverse.c
101      'libavcodec/file_open.o',  # Includes libavutil/file_open.c
102      'libavcodec/log2_tab.o',  # Includes libavutil/log2_tab.c
103      'libavformat/golomb_tab.o',  # Includes libavcodec/golomb.c
104      'libavformat/log2_tab.o',  # Includes libavutil/log2_tab.c
105      'libavformat/file_open.o',  # Includes libavutil/file_open.c
106
107      # These codecs are not supported by Chromium and allowing ogg to parse
108      # them can lead to issues. See http://crbug.com/654612 for an example.
109      'libavformat/oggparsecelt.o',
110      'libavformat/oggparsedaala.o',
111      'libavformat/oggparsedirac.o',
112      'libavformat/oggparsespeex.o',
113
114      # The following files are removed to trim down on binary size.
115      # TODO(ihf): Warning, it is *easy* right now to remove more files
116      # than is healthy and end up with a library that the linker does
117      # not complain about but that can't be loaded. Add some verification!
118      'libavcodec/audioconvert.o',
119      'libavcodec/resample.o',
120      'libavcodec/resample2.o',
121      'libavcodec/x86/dnxhd_mmx.o',
122      'libavformat/sdp.o',
123      'libavutil/adler32.o',
124      'libavutil/audio_fifo.o',
125      'libavutil/blowfish.o',
126      'libavutil/cast5.o',
127      'libavutil/des.o',
128      'libavutil/file.o',
129      'libavutil/hash.o',
130      'libavutil/hmac.o',
131      'libavutil/lls.o',
132      'libavutil/murmur3.o',
133      'libavutil/rc4.o',
134      'libavutil/ripemd.o',
135      'libavutil/sha512.o',
136      'libavutil/tree.o',
137      'libavutil/xtea.o',
138      'libavutil/xga_font_data.o',
139  ]
140  for name in blacklist:
141    name = name.replace('/', os.sep)
142    if name in object_files:
143      object_files.remove(name)
144  return object_files
145
146
147def IsAssemblyFile(f):
148  _, ext = os.path.splitext(f)
149  return ext in ['.S', '.asm']
150
151
152def IsGasFile(f):
153  _, ext = os.path.splitext(f)
154  return ext in ['.S']
155
156
157def IsNasmFile(f):
158  _, ext = os.path.splitext(f)
159  return ext in ['.asm']
160
161
162def IsCFile(f):
163  _, ext = os.path.splitext(f)
164  return ext in ['.c']
165
166
167def IsSourceFile(f):
168  return IsAssemblyFile(f) or IsCFile(f)
169
170
171def GetSourceFiles(source_dir):
172  """Returns a list of source files for the given source directory.
173
174  Args:
175    source_dir: Path to build a source mapping for.
176
177  Returns:
178    A python list of source file paths.
179  """
180
181  def IsSourceDir(d):
182    return d != '.git'
183
184  source_files = []
185  for root, dirs, files in os.walk(source_dir):
186    dirs = filter(IsSourceDir, dirs)
187    files = filter(IsSourceFile, files)
188
189    # Strip leading source_dir from root.
190    root = root[len(source_dir):]
191    source_files.extend(
192        [NormalizeFilename(os.path.join(root, name)) for name in files])
193  return source_files
194
195
196def GetObjectFiles(build_dir):
197  """Returns a list of object files for the given build directory.
198
199  Args:
200    build_dir: Path to build an object file list for.
201
202  Returns:
203    A python list of object files paths.
204  """
205  object_files = []
206  for root, _, files in os.walk(build_dir):
207    # Strip leading build_dir from root.
208    root = root[len(build_dir):]
209
210    for name in files:
211      _, ext = os.path.splitext(name)
212      if ext == '.o':
213        name = NormalizeFilename(os.path.join(root, name))
214        object_files.append(name)
215  CleanObjectFiles(object_files)
216  return object_files
217
218
219def GetObjectToSourceMapping(source_files):
220  """Returns a map of object file paths to source file paths.
221
222  Args:
223    source_files: List of source file paths.
224
225  Returns:
226    Map with object file paths as keys and source file paths as values.
227  """
228  object_to_sources = {}
229  for name in source_files:
230    basename, _ = os.path.splitext(name)
231    key = basename + '.o'
232    object_to_sources[key] = name
233  return object_to_sources
234
235
236def GetSourceFileSet(object_to_sources, object_files):
237  """Determines set of source files given object files.
238
239  Args:
240    object_to_sources: A dictionary of object to source file paths.
241    object_files: A list of object file paths.
242
243  Returns:
244    A python set of source files required to build said objects.
245  """
246  source_set = set()
247  for name in object_files:
248    # Intentially raise a KeyError if lookup fails since something is messed
249    # up with our source and object lists.
250    source_set.add(object_to_sources[name])
251  return source_set
252
253
254SourceListCondition = collections.namedtuple(
255    'SourceListCondition', [Attr.ARCHITECTURE, Attr.TARGET, Attr.PLATFORM])
256
257
258class SourceSet(object):
259  """A SourceSet represents a set of source files that are built on each of the
260  given set of SourceListConditions.
261  """
262
263  def __init__(self, sources, conditions):
264    """Creates a SourceSet.
265
266    Args:
267      sources: a python set of source files
268      conditions: a python set of SourceListConditions where the given sources
269        are to be used.
270    """
271    self.sources = sources
272    self.conditions = conditions
273
274  def __repr__(self):
275    return '{%s, %s}' % (self.sources, self.conditions)
276
277  def __eq__(self, other):
278    return (self.sources == other.sources and
279            self.conditions == other.conditions)
280
281  def __hash__(self):
282    return hash((frozenset(self.sources), frozenset(self.conditions)))
283
284  def Intersect(self, other):
285    """Return a new SourceSet containing the set of source files common to both
286    this and the other SourceSet.
287
288    The resulting SourceSet represents the union of the architectures and
289    targets of this and the other SourceSet.
290    """
291    return SourceSet(self.sources & other.sources,
292                     self.conditions | other.conditions)
293
294  def Difference(self, other):
295    """Return a new SourceSet containing the set of source files not present in
296    the other SourceSet.
297
298    The resulting SourceSet represents the intersection of the
299    SourceListConditions from this and the other SourceSet.
300    """
301    return SourceSet(self.sources - other.sources,
302                     self.conditions & other.conditions)
303
304  def IsEmpty(self):
305    """An empty SourceSet is defined as containing no source files or no
306    conditions (i.e., a set of files that aren't built on anywhere).
307    """
308    return (len(self.sources) == 0 or len(self.conditions) == 0)
309
310  def GenerateGnStanza(self):
311    """Generates a gn conditional stanza representing this source set.
312    """
313
314    conjunctions = []
315    for condition in self.conditions:
316      if condition.ARCHITECTURE == '*':
317        arch_condition = None
318      elif condition.ARCHITECTURE == 'arm-neon':
319        arch_condition = 'current_cpu == "arm" && arm_use_neon'
320      elif condition.ARCHITECTURE == 'ia32':
321        arch_condition = 'current_cpu == "x86"'
322      else:
323        arch_condition = 'current_cpu == "%s"' % condition.ARCHITECTURE
324
325      # Branding conditions look like:
326      #   ffmpeg_branding == "Chrome"
327      if condition.TARGET == '*':
328        target_condition = None
329      else:
330        target_condition = 'ffmpeg_branding == "%s"' % condition.TARGET
331
332      # Platform conditions look like: is_mac .
333      # Linux configuration is also used on Fuchsia, for linux config we use
334      # |use_linux_config| flag.
335      if condition.PLATFORM == '*':
336        platform_condition = None
337      elif condition.PLATFORM == 'linux':
338        platform_condition = 'use_linux_config'
339      else:
340        platform_condition = 'is_%s' % condition.PLATFORM
341
342      conjunction_parts = filter(
343          None, [platform_condition, arch_condition, target_condition])
344      conjunctions.append(' && '.join(conjunction_parts))
345
346    # If there is more that one clause, wrap various conditions in parens
347    # before joining.
348    if len(conjunctions) > 1:
349      conjunctions = ['(%s)' % x for x in conjunctions]
350
351    # Sort conjunctions to make order deterministic.
352    joined_conjuctions = ' || '.join(sorted(conjunctions))
353
354    stanza = ''
355    # Output a conditional wrapper around stanzas if necessary.
356    if joined_conjuctions:
357      stanza += GN_CONDITION_BEGIN % joined_conjuctions
358
359      def indent(s):
360        return '  %s' % s
361    else:
362
363      def indent(s):
364        return s
365
366    sources = sorted(n.replace('\\', '/') for n in self.sources)
367
368    # Write out all C sources.
369    c_sources = filter(IsCFile, sources)
370    if c_sources:
371      stanza += indent(GN_C_SOURCES_BEGIN)
372      for name in c_sources:
373        stanza += indent(GN_SOURCE_ITEM % (name))
374      stanza += indent(GN_SOURCE_END)
375
376    # Write out all assembly sources.
377    gas_sources = filter(IsGasFile, sources)
378    if gas_sources:
379      stanza += indent(GN_GAS_SOURCES_BEGIN)
380      for name in gas_sources:
381        stanza += indent(GN_SOURCE_ITEM % (name))
382      stanza += indent(GN_SOURCE_END)
383
384    # Write out all assembly sources.
385    nasm_sources = filter(IsNasmFile, sources)
386    if nasm_sources:
387      stanza += indent(GN_NASM_SOURCES_BEGIN)
388      for name in nasm_sources:
389        stanza += indent(GN_SOURCE_ITEM % (name))
390      stanza += indent(GN_SOURCE_END)
391
392    # Close the conditional if necessary.
393    if joined_conjuctions:
394      stanza += GN_CONDITION_END
395    else:
396      stanza += '\n'  # Makeup the spacing for the remove conditional.
397    return stanza
398
399
400def CreatePairwiseDisjointSets(sets):
401  """Given a list of SourceSet objects, returns the pairwise disjoint sets.
402
403  NOTE: This isn't the most efficient algorithm, but given how infrequent we
404  need to run this and how small the input size is we'll leave it as is.
405  """
406
407  disjoint_sets = list(sets)
408
409  new_sets = True
410  while new_sets:
411    new_sets = False
412    for pair in itertools.combinations(disjoint_sets, 2):
413      intersection = pair[0].Intersect(pair[1])
414
415      # Both pairs are already disjoint, nothing to do.
416      if intersection.IsEmpty():
417        continue
418
419      # Add the resulting intersection set.
420      new_sets = True
421      disjoint_sets.append(intersection)
422
423      # Calculate the resulting differences for this pair of sets.
424      #
425      # If the differences are an empty set, remove them from the list of sets,
426      # otherwise update the set itself.
427      for p in pair:
428        i = disjoint_sets.index(p)
429        difference = p.Difference(intersection)
430        if difference.IsEmpty():
431          del disjoint_sets[i]
432        else:
433          disjoint_sets[i] = difference
434
435      # Restart the calculation since the list of disjoint sets has changed.
436      break
437
438  return disjoint_sets
439
440
441def GetAllMatchingConditions(conditions, condition_to_match):
442  """Given a set of conditions, find those that match the condition_to_match.
443  Matches are found when all attributes of the condition have the same value as
444  the condition_to_match, or value is accepted for wildcard attributes within
445  condition_to_match.
446  """
447
448  found_matches = set()
449
450  # Check all attributes of condition for matching values.
451  def accepts_all_values(attribute):
452    return getattr(condition_to_match, attribute) == '*'
453
454  attributes_to_check = [a for a in Attr if not accepts_all_values(a)]
455
456  # If all attributes allow wildcard, all conditions are considered matching
457  if not attributes_to_check:
458    return conditions
459
460  # Check all conditions and accumulate matches.
461  for condition in conditions:
462    condition_matches = True
463    for attribute in attributes_to_check:
464      if (getattr(condition, attribute) != getattr(condition_to_match,
465                                                   attribute)):
466        condition_matches = False
467        break
468    if condition_matches:
469      found_matches.add(condition)
470
471  return found_matches
472
473
474def GetAttributeValuesRange(attribute, condition):
475  """Get the range of values for the given attribute considering the values
476  of all attributes in the given condition."""
477  if getattr(condition, attribute) == '*':
478    values = copy.copy(SUPPORT_MATRIX[attribute])
479  else:
480    values = set([getattr(condition, attribute)])
481
482  # Filter out impossible values given condition platform. This is admittedly
483  # fragile to changes in our supported platforms. Fortunately, these platforms
484  # don't change often. Refactor if we run into trouble.
485  platform = condition.PLATFORM
486  if attribute == Attr.TARGET and platform != '*' and platform != 'linux':
487    values.difference_update(['ChromeOS'])
488  if attribute == Attr.ARCHITECTURE and platform == 'win':
489    values.intersection_update(['ia32', 'x64', 'arm64'])
490  if attribute == Attr.ARCHITECTURE and platform == 'mac':
491    values.intersection_update(['x64'])
492
493  return values
494
495
496def GenerateConditionExpansion(condition):
497  """Expand wildcard in condition into all possible matching conditions."""
498  architectures = GetAttributeValuesRange(Attr.ARCHITECTURE, condition)
499  targets = GetAttributeValuesRange(Attr.TARGET, condition)
500  platforms = GetAttributeValuesRange(Attr.PLATFORM, condition)
501  return set(
502      SourceListCondition(arch, target, plat)
503      for (arch, target,
504           plat) in itertools.product(architectures, targets, platforms))
505
506
507def ReduceConditionalLogic(source_set):
508  """Reduces the conditions for the given SourceSet.
509
510  The reduction leverages what we know about the space of possible combinations,
511  finding cases where conditions span all values possible of a given attribute.
512  In such cases, these conditions can be flattened into a single condition with
513  the spanned attribute removed.
514
515  There is room for further reduction (e.g. Quine-McCluskey), not implemented
516  at this time."""
517
518  ConditionReduction = collections.namedtuple('ConditionReduction',
519                                              'condition, matches')
520  reduced_conditions = set()
521
522  for condition in source_set.conditions:
523    condition_dict = condition._asdict()
524
525    for attribute in Attr:
526      # Set attribute value to wildcard and find matching attributes.
527      original_attribute_value = condition_dict[attribute]
528      condition_dict[attribute] = '*'
529      new_condition = SourceListCondition(**condition_dict)
530
531      # Conditions with wildcards can replace existing conditions iff the
532      # source set contains conditions covering all possible expansions
533      # of the wildcarded values.
534      matches = GetAllMatchingConditions(source_set.conditions, new_condition)
535      if matches == GenerateConditionExpansion(new_condition):
536        reduced_conditions.add(
537            ConditionReduction(new_condition, frozenset(matches)))
538      else:
539        # This wildcard won't work, restore the original value.
540        condition_dict[attribute] = original_attribute_value
541
542  # Finally, find the most efficient reductions. Do a pairwise comparison of all
543  # reductions to de-dup and remove those that are covered by more inclusive
544  # conditions.
545  did_work = True
546  while did_work:
547    did_work = False
548    for reduction_pair in itertools.combinations(reduced_conditions, 2):
549      if reduction_pair[0].matches.issubset(reduction_pair[1].matches):
550        reduced_conditions.remove(reduction_pair[0])
551        did_work = True
552        break
553      elif reduction_pair[1].matches.issubset(reduction_pair[0].matches):
554        reduced_conditions.remove(reduction_pair[1])
555        did_work = True
556        break
557
558  # Apply the reductions to the source_set.
559  for reduction in reduced_conditions:
560    source_set.conditions.difference_update(reduction.matches)
561    source_set.conditions.add(reduction.condition)
562
563
564def ParseOptions():
565  """Parses the options and terminates program if they are not sane.
566
567  Returns:
568    The pair (optparse.OptionValues, [string]), that is the output of
569    a successful call to parser.parse_args().
570  """
571  parser = optparse.OptionParser(usage='usage: %prog [options] DIR')
572
573  parser.add_option(
574      '-s',
575      '--source_dir',
576      dest='source_dir',
577      default='.',
578      metavar='DIR',
579      help='FFmpeg source directory.')
580
581  parser.add_option(
582      '-b',
583      '--build_dir',
584      dest='build_dir',
585      default='.',
586      metavar='DIR',
587      help='Build root containing build.x64.linux, etc...')
588
589  parser.add_option(
590      '-p',
591      '--print_licenses',
592      dest='print_licenses',
593      default=False,
594      action='store_true',
595      help='Print all licenses to console.')
596
597  parser.add_option(
598      '-i',
599      '--output_git_commands',
600      dest='output_git_commands',
601      default=False,
602      help='Write git commands for renames to a file.')
603
604  options, args = parser.parse_args()
605
606  if not options.source_dir:
607    parser.error('No FFmpeg source directory specified')
608  elif not os.path.exists(options.source_dir):
609    parser.error('FFmpeg source directory does not exist')
610
611  if not options.build_dir:
612    parser.error('No build root directory specified')
613  elif not os.path.exists(options.build_dir):
614    parser.error('FFmpeg build directory does not exist')
615
616  return options, args
617
618
619def WriteGn(fd, disjoint_sets):
620  fd.write(COPYRIGHT)
621  fd.write(GN_HEADER)
622
623  # Generate conditional stanza for each disjoint source set.
624  for s in reversed(disjoint_sets):
625    fd.write(s.GenerateGnStanza())
626
627
628# Lists of files that are exempt from searching in GetIncludedSources.
629IGNORED_INCLUDE_FILES = [
630    # Chromium generated files
631    'config.h',
632    os.path.join('libavcodec', 'bsf_list.c'),
633    os.path.join('libavcodec', 'codec_list.c'),
634    os.path.join('libavcodec', 'parser_list.c'),
635    os.path.join('libavformat', 'demuxer_list.c'),
636    os.path.join('libavformat', 'muxer_list.c'),
637    os.path.join('libavformat', 'protocol_list.c'),
638    os.path.join('libavutil', 'avconfig.h'),
639    os.path.join('libavutil', 'ffversion.h'),
640
641    # Current configure values are set such that we don't include these (because
642    # of various defines) and we also don't generate them at all, so we will
643    # fail to find these because they don't exist in our repository.
644    os.path.join('libavcodec', 'aacps_tables.h'),
645    os.path.join('libavcodec', 'aacps_fixed_tables.h'),
646    os.path.join('libavcodec', 'aacsbr_tables.h'),
647    os.path.join('libavcodec', 'aac_tables.h'),
648    os.path.join('libavcodec', 'cabac_tables.h'),
649    os.path.join('libavcodec', 'cbrt_tables.h'),
650    os.path.join('libavcodec', 'cbrt_fixed_tables.h'),
651    os.path.join('libavcodec', 'mpegaudio_tables.h'),
652    os.path.join('libavcodec', 'pcm_tables.h'),
653    os.path.join('libavcodec', 'sinewin_tables.h'),
654    os.path.join('libavcodec', 'sinewin_fixed_tables.h'),
655]
656
657# Known licenses that are acceptable for static linking
658# DO NOT ADD TO THIS LIST without first confirming with lawyers that the
659# licenses are okay to add.
660LICENSE_WHITELIST = [
661    'BSD (3 clause) LGPL (v2.1 or later)',
662    'BSL (v1) LGPL (v2.1 or later)',
663    'ISC GENERATED FILE',
664    'LGPL (v2.1 or later)',
665    'LGPL (v2.1 or later) GENERATED FILE',
666    'MIT/X11 (BSD like)',
667    'Public domain LGPL (v2.1 or later)',
668]
669
670# Files permitted to report an UNKNOWN license. All files mentioned here should
671# give the full path from the source_dir to avoid ambiguity.
672# DO NOT ADD TO THIS LIST without first confirming with lawyers that the files
673# you're adding have acceptable licenses.
674UNKNOWN_WHITELIST = [
675    # From of Independent JPEG group. No named license, but usage is allowed.
676    os.path.join('libavcodec', 'jrevdct.c'),
677    os.path.join('libavcodec', 'jfdctfst.c'),
678    os.path.join('libavcodec', 'jfdctint_template.c'),
679]
680
681# Regex to find lines matching #include "some_dir\some_file.h".
682INCLUDE_REGEX = re.compile('#\s*include\s+"([^"]+)"')
683
684# Regex to find whacky includes that we might be overlooking (e.g. using macros
685# or defines).
686EXOTIC_INCLUDE_REGEX = re.compile('#\s*include\s+[^"<\s].+')
687
688# Prefix added to renamed files as part of
689RENAME_PREFIX = 'autorename'
690
691# Match an absolute path to a generated auotorename_ file.
692RENAME_REGEX = re.compile('.*' + RENAME_PREFIX + '_.+')
693
694# Content for the rename file. #includes the original file to ensure the two
695# files stay in sync.
696RENAME_CONTENT = """{0} File automatically generated. See crbug.com/495833.
697{1}include "{2}"
698"""
699
700
701def GetIncludedSources(file_path, source_dir, include_set):
702  """Recurse over include tree, accumulating absolute paths to all included
703  files (including the seed file) in include_set.
704
705  Pass in the set returned from previous calls to avoid re-walking parts of the
706  tree. Given file_path may be relative (to options.src_dir) or absolute.
707
708  NOTE: This algorithm is greedy. It does not know which includes may be
709  excluded due to compile-time defines, so it considers any mentioned include.
710
711  NOTE: This algorithm makes hard assumptions about the include search paths.
712  Paths are checked in the order:
713  1. Directory of the file containing the #include directive
714  2. Directory specified by source_dir
715
716  NOTE: Files listed in IGNORED_INCLUDE_FILES will be ignored if not found. See
717  reasons at definition for IGNORED_INCLUDE_FILES.
718  """
719  # Use options.source_dir to correctly resolve relative file path. Use only
720  # absolute paths in the set to avoid same-name-errors.
721  if not os.path.isabs(file_path):
722    file_path = os.path.abspath(os.path.join(source_dir, file_path))
723  file_path = os.path.normpath(file_path)
724
725  current_dir = os.path.dirname(file_path)
726
727  # Already processed this file, bail out.
728  if file_path in include_set:
729    return include_set
730
731  include_set.add(file_path)
732
733  for line in open(file_path):
734    include_match = INCLUDE_REGEX.search(line)
735
736    if not include_match:
737      if EXOTIC_INCLUDE_REGEX.search(line):
738        print 'WARNING: Investigate whacky include line:', line
739      continue
740
741    include_file_path = include_match.group(1)
742
743    # These may or may not be where the file lives. Just storing temps here
744    # and we'll checking their validity below.
745    include_path_in_current_dir = os.path.join(current_dir, include_file_path)
746    include_path_in_source_dir = os.path.join(source_dir, include_file_path)
747    resolved_include_path = ''
748
749    # Check if file is in current directory.
750    if os.path.isfile(include_path_in_current_dir):
751      resolved_include_path = include_path_in_current_dir
752    # Else, check source_dir (should be FFmpeg root).
753    elif os.path.isfile(include_path_in_source_dir):
754      resolved_include_path = include_path_in_source_dir
755    # Else, we couldn't find it :(.
756    elif include_file_path in IGNORED_INCLUDE_FILES:
757      continue
758    else:
759      exit('Failed to find file ' + include_file_path)
760
761    # At this point we've found the file. Check if its in our ignore list which
762    # means that the list should be updated to no longer mention this file.
763    if include_file_path in IGNORED_INCLUDE_FILES:
764      print('Found %s in IGNORED_INCLUDE_FILES. Consider updating the list '
765            'to remove this file.' % str(include_file_path))
766
767    GetIncludedSources(resolved_include_path, source_dir, include_set)
768
769
770def CheckLicensesForSources(sources, source_dir, print_licenses):
771  # Assumed to be two back from source_dir (e.g. third_party/ffmpeg/../..).
772  source_root = os.path.abspath(
773      os.path.join(source_dir, os.path.pardir, os.path.pardir))
774
775  licensecheck_path = os.path.abspath(
776      os.path.join(source_root, 'third_party', 'devscripts', 'licensecheck.pl'))
777  if not os.path.exists(licensecheck_path):
778    exit('Could not find licensecheck.pl: ' + str(licensecheck_path))
779
780  check_process = subprocess.Popen(
781      [licensecheck_path, '-m', '-l', '100'] +
782      [os.path.abspath(s) for s in sources],
783      stdout=subprocess.PIPE,
784      stderr=subprocess.PIPE)
785  stdout, _ = check_process.communicate()
786
787  # Get the filename and license out of the stdout. stdout is expected to be
788  # "/abspath/to/file: *No copyright* SOME LICENSE".
789  for line in stdout.strip().splitlines():
790    filename, licensename = line.split('\t', 1)
791    licensename = licensename.replace('*No copyright*', '').strip()
792    rel_file_path = os.path.relpath(filename, os.path.abspath(source_dir))
793
794    if (licensename in LICENSE_WHITELIST or
795        (licensename == 'UNKNOWN' and rel_file_path in UNKNOWN_WHITELIST)):
796      if print_licenses:
797        print filename, ':', licensename
798      continue
799
800    print 'UNEXPECTED LICENSE: %s: %s' % (filename, licensename)
801    return False
802
803  return True
804
805
806def CheckLicensesForStaticLinking(sources_to_check, source_dir, print_licenses):
807  print 'Checking licenses...'
808  return CheckLicensesForSources(sources_to_check, source_dir, print_licenses)
809
810
811def FixBasenameCollision(old_path, new_path, content):
812  with open(new_path, 'w') as new_file:
813    new_file.write(content)
814
815
816def FixObjectBasenameCollisions(disjoint_sets,
817                                all_sources,
818                                do_rename_cb,
819                                log_renames=True):
820  """Mac libtool warns needlessly when it encounters two object files with
821  the same basename in a given static library. See more at
822  https://code.google.com/p/gyp/issues/detail?id=384#c7
823
824  Here we hack around the issue by making a new source file with a different
825  base name, and #including the original file.
826
827  If upstream changes the name such that the collision no longer exists, we
828  detect the presence of a renamed file in all_sources which is overridden and
829  warn that it should be removed.
830
831  This will return a tuple of two sets.  The first is a list of all currently
832  renamed files, in their renamed form.  The second is a list of renamed files
833  that we have in the working directory, but no longer need."""
834
835  SourceRename = collections.namedtuple('SourceRename', 'old_path, new_path')
836  known_basenames = set()
837  all_renames = set()
838
839  # Set of files that have renames, but no longer collide.
840  old_renames_to_delete = set()
841
842  for source_set in disjoint_sets:
843    # Track needed adjustments to change when we're done with each SourceSet.
844    renames = set()
845
846    for source_path in source_set.sources:
847      folder, filename = os.path.split(source_path)
848      basename, _ = os.path.splitext(filename)
849
850      # Sanity check: source set should not have any renames prior to this step.
851      if RENAME_PREFIX in basename:
852        exit('Found unexpected renamed file in SourceSet: %s' % source_path)
853
854      # Craft a new unique basename from the path of the colliding file
855      if basename in known_basenames:
856        name_parts = source_path.split(os.sep)
857        name_parts.insert(0, RENAME_PREFIX)
858        new_filename = '_'.join(name_parts)
859        new_source_path = (
860            new_filename
861            if folder == '' else os.sep.join([folder, new_filename]))
862
863        renames.add(SourceRename(source_path, new_source_path))
864      else:
865        known_basenames.add(basename)
866
867    for rename in renames:
868      if log_renames:
869        print 'Fixing basename collision: %s -> %s' % (rename.old_path,
870                                                       rename.new_path)
871      _, old_filename = os.path.split(rename.old_path)
872      _, file_extension = os.path.splitext(old_filename)
873      include_prefix = '%' if (file_extension == '.asm') else '#'
874      comment_prefix = ';' if (file_extension == '.asm') else '//'
875
876      do_rename_cb(
877          rename.old_path, rename.new_path,
878          RENAME_CONTENT.format(comment_prefix, include_prefix, old_filename))
879
880      source_set.sources.remove(rename.old_path)
881      source_set.sources.add(rename.new_path)
882      all_renames.add(rename.new_path)
883
884  # Now, with all collisions handled, walk the set of known sources and warn
885  # about any renames that were not replaced. This should indicate that an old
886  # collision is now resolved by some external/upstream change.
887  for source_path in all_sources:
888    if RENAME_PREFIX in source_path and source_path not in all_renames:
889      old_renames_to_delete.add(source_path)
890      print 'WARNING: %s no longer collides. DELETE ME!' % source_path
891
892  return all_renames, old_renames_to_delete
893
894def UpdateCredits(sources_to_check, source_dir):
895  print 'Updating ffmpeg credits...'
896  updater = credits_updater.CreditsUpdater(source_dir)
897  for source_name in sources_to_check:
898    updater.ProcessFile(source_name)
899  updater.PrintStats()
900  updater.WriteCredits()
901
902def WriteGitCommands(filename, all_renames, old_renames_to_delete):
903  """Write a shell script that will add renames and delete old ones."""
904  with open(filename, 'w') as git_file:
905    git_file.write("#!/bin/sh\n")
906    git_file.write("# Git commands to add all renames and delete old ones.\n")
907    git_file.write("# This file is automatically generated by generate_gn.py\n")
908    for renamed_file in all_renames:
909      git_file.write("git add %s\n" % renamed_file)
910    for unrenamed_file in old_renames_to_delete:
911      git_file.write("git rm %s\n" % unrenamed_file)
912
913def main():
914  options, _ = ParseOptions()
915
916  # Generate map of FFmpeg source files.
917  source_dir = options.source_dir
918  source_files = GetSourceFiles(source_dir)
919  object_to_sources = GetObjectToSourceMapping(source_files)
920
921  sets = []
922
923  for arch in SUPPORT_MATRIX[Attr.ARCHITECTURE]:
924    for target in SUPPORT_MATRIX[Attr.TARGET]:
925      for platform in SUPPORT_MATRIX[Attr.PLATFORM]:
926        # Assume build directory is of the form build.$arch.$platform/$target.
927        name = ''.join(['build.', arch, '.', platform])
928        build_dir = os.path.join(options.build_dir, name, target)
929        if not os.path.exists(build_dir):
930          continue
931        print 'Processing build directory: %s' % name
932
933        object_files = GetObjectFiles(build_dir)
934
935        # Generate the set of source files to build said target.
936        s = GetSourceFileSet(object_to_sources, object_files)
937        sets.append(
938            SourceSet(s, set([SourceListCondition(arch, target, platform)])))
939
940  sets = CreatePairwiseDisjointSets(sets)
941
942  for source_set in sets:
943    ReduceConditionalLogic(source_set)
944
945  if not sets:
946    exit('ERROR: failed to find any source sets. ' +
947         'Are build_dir (%s) and/or source_dir (%s) options correct?' %
948         (options.build_dir, options.source_dir))
949
950  all_renames, old_renames_to_delete = FixObjectBasenameCollisions(
951                                         sets,
952                                         source_files,
953                                         FixBasenameCollision)
954  if options.output_git_commands:
955    WriteGitCommands(
956            options.output_git_commands,
957            all_renames,
958            old_renames_to_delete)
959
960  # Build up set of all sources and includes.
961  sources_to_check = set()
962  for source_set in sets:
963    for source in source_set.sources:
964      GetIncludedSources(source, source_dir, sources_to_check)
965
966  # Remove autorename_ files now that we've grabbed their underlying includes.
967  # We generated autorename_ files above and should not consider them for
968  # licensing or credits.
969  sources_to_check = filter(lambda s: not RENAME_REGEX.search(s),
970                            sources_to_check)
971
972  if not CheckLicensesForStaticLinking(sources_to_check, source_dir,
973                                       options.print_licenses):
974    exit('GENERATE FAILED: invalid licenses detected.')
975  print 'License checks passed.'
976  UpdateCredits(sources_to_check, source_dir)
977
978  gn_file_name = os.path.join(options.source_dir, 'ffmpeg_generated.gni')
979  print 'Writing:', gn_file_name
980  with open(gn_file_name, 'w') as fd:
981    WriteGn(fd, sets)
982
983
984if __name__ == '__main__':
985  main()
986