1# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
2#
3# Use of this source code is governed by a BSD-style license
4# that can be found in the LICENSE file in the root of the source
5# tree. An additional intellectual property rights grant can be found
6# in the file PATENTS.  All contributing project authors may
7# be found in the AUTHORS file in the root of the source tree.
8
9"""Imports a filtered subset of the scores and configurations computed
10by apm_quality_assessment.py into a pandas data frame.
11"""
12
13import argparse
14import glob
15import logging
16import os
17import re
18import sys
19
20try:
21  import pandas as pd
22except ImportError:
23  logging.critical('Cannot import the third-party Python package pandas')
24  sys.exit(1)
25
26from . import data_access as data_access
27from . import simulation as sim
28
29# Compiled regular expressions used to extract score descriptors.
30RE_CONFIG_NAME = re.compile(
31    sim.ApmModuleSimulator.GetPrefixApmConfig() + r'(.+)')
32RE_CAPTURE_NAME = re.compile(
33    sim.ApmModuleSimulator.GetPrefixCapture() + r'(.+)')
34RE_RENDER_NAME = re.compile(
35    sim.ApmModuleSimulator.GetPrefixRender() + r'(.+)')
36RE_ECHO_SIM_NAME = re.compile(
37    sim.ApmModuleSimulator.GetPrefixEchoSimulator() + r'(.+)')
38RE_TEST_DATA_GEN_NAME = re.compile(
39    sim.ApmModuleSimulator.GetPrefixTestDataGenerator() + r'(.+)')
40RE_TEST_DATA_GEN_PARAMS = re.compile(
41    sim.ApmModuleSimulator.GetPrefixTestDataGeneratorParameters() + r'(.+)')
42RE_SCORE_NAME = re.compile(
43    sim.ApmModuleSimulator.GetPrefixScore() + r'(.+)(\..+)')
44
45
46def InstanceArgumentsParser():
47  """Arguments parser factory.
48  """
49  parser = argparse.ArgumentParser(description=(
50      'Override this description in a user script by changing'
51      ' `parser.description` of the returned parser.'))
52
53  parser.add_argument('-o', '--output_dir', required=True,
54                      help=('the same base path used with the '
55                            'apm_quality_assessment tool'))
56
57  parser.add_argument('-c', '--config_names', type=re.compile,
58                      help=('regular expression to filter the APM configuration'
59                            ' names'))
60
61  parser.add_argument('-i', '--capture_names', type=re.compile,
62                      help=('regular expression to filter the capture signal '
63                            'names'))
64
65  parser.add_argument('-r', '--render_names', type=re.compile,
66                      help=('regular expression to filter the render signal '
67                            'names'))
68
69  parser.add_argument('-e', '--echo_simulator_names', type=re.compile,
70                      help=('regular expression to filter the echo simulator '
71                            'names'))
72
73  parser.add_argument('-t', '--test_data_generators', type=re.compile,
74                      help=('regular expression to filter the test data '
75                            'generator names'))
76
77  parser.add_argument('-s', '--eval_scores', type=re.compile,
78                      help=('regular expression to filter the evaluation score '
79                            'names'))
80
81  return parser
82
83
84def _GetScoreDescriptors(score_filepath):
85  """Extracts a score descriptor from the given score file path.
86
87  Args:
88    score_filepath: path to the score file.
89
90  Returns:
91    A tuple of strings (APM configuration name, capture audio track name,
92    render audio track name, echo simulator name, test data generator name,
93    test data generator parameters as string, evaluation score name).
94  """
95  fields = score_filepath.split(os.sep)[-7:]
96  extract_name = lambda index, reg_expr: (
97      reg_expr.match(fields[index]).groups(0)[0])
98  return (
99      extract_name(0, RE_CONFIG_NAME),
100      extract_name(1, RE_CAPTURE_NAME),
101      extract_name(2, RE_RENDER_NAME),
102      extract_name(3, RE_ECHO_SIM_NAME),
103      extract_name(4, RE_TEST_DATA_GEN_NAME),
104      extract_name(5, RE_TEST_DATA_GEN_PARAMS),
105      extract_name(6, RE_SCORE_NAME),
106  )
107
108
109def _ExcludeScore(config_name, capture_name, render_name, echo_simulator_name,
110                  test_data_gen_name, score_name, args):
111  """Decides whether excluding a score.
112
113  A set of optional regular expressions in args is used to determine if the
114  score should be excluded (depending on its |*_name| descriptors).
115
116  Args:
117    config_name: APM configuration name.
118    capture_name: capture audio track name.
119    render_name: render audio track name.
120    echo_simulator_name: echo simulator name.
121    test_data_gen_name: test data generator name.
122    score_name: evaluation score name.
123    args: parsed arguments.
124
125  Returns:
126    A boolean.
127  """
128  value_regexpr_pairs = [
129      (config_name, args.config_names),
130      (capture_name, args.capture_names),
131      (render_name, args.render_names),
132      (echo_simulator_name, args.echo_simulator_names),
133      (test_data_gen_name, args.test_data_generators),
134      (score_name, args.eval_scores),
135  ]
136
137  # Score accepted if each value matches the corresponding regular expression.
138  for value, regexpr in value_regexpr_pairs:
139    if regexpr is None:
140      continue
141    if not regexpr.match(value):
142      return True
143
144  return False
145
146
147def FindScores(src_path, args):
148  """Given a search path, find scores and return a DataFrame object.
149
150  Args:
151    src_path: Search path pattern.
152    args: parsed arguments.
153
154  Returns:
155    A DataFrame object.
156  """
157  # Get scores.
158  scores = []
159  for score_filepath in glob.iglob(src_path):
160    # Extract score descriptor fields from the path.
161    (config_name,
162     capture_name,
163     render_name,
164     echo_simulator_name,
165     test_data_gen_name,
166     test_data_gen_params,
167     score_name) = _GetScoreDescriptors(score_filepath)
168
169    # Ignore the score if required.
170    if _ExcludeScore(
171        config_name,
172        capture_name,
173        render_name,
174        echo_simulator_name,
175        test_data_gen_name,
176        score_name,
177        args):
178      logging.info(
179          'ignored score: %s %s %s %s %s %s',
180          config_name,
181          capture_name,
182          render_name,
183          echo_simulator_name,
184          test_data_gen_name,
185          score_name)
186      continue
187
188    # Read metadata and score.
189    metadata = data_access.Metadata.LoadAudioTestDataPaths(
190        os.path.split(score_filepath)[0])
191    score = data_access.ScoreFile.Load(score_filepath)
192
193    # Add a score with its descriptor fields.
194    scores.append((
195        metadata['clean_capture_input_filepath'],
196        metadata['echo_free_capture_filepath'],
197        metadata['echo_filepath'],
198        metadata['render_filepath'],
199        metadata['capture_filepath'],
200        metadata['apm_output_filepath'],
201        metadata['apm_reference_filepath'],
202        config_name,
203        capture_name,
204        render_name,
205        echo_simulator_name,
206        test_data_gen_name,
207        test_data_gen_params,
208        score_name,
209        score,
210    ))
211
212  return pd.DataFrame(
213      data=scores,
214      columns=(
215          'clean_capture_input_filepath',
216          'echo_free_capture_filepath',
217          'echo_filepath',
218          'render_filepath',
219          'capture_filepath',
220          'apm_output_filepath',
221          'apm_reference_filepath',
222          'apm_config',
223          'capture',
224          'render',
225          'echo_simulator',
226          'test_data_gen',
227          'test_data_gen_params',
228          'eval_score_name',
229          'score',
230      ))
231
232
233def ConstructSrcPath(args):
234  return os.path.join(
235      args.output_dir,
236      sim.ApmModuleSimulator.GetPrefixApmConfig() + '*',
237      sim.ApmModuleSimulator.GetPrefixCapture() + '*',
238      sim.ApmModuleSimulator.GetPrefixRender() + '*',
239      sim.ApmModuleSimulator.GetPrefixEchoSimulator() + '*',
240      sim.ApmModuleSimulator.GetPrefixTestDataGenerator() + '*',
241      sim.ApmModuleSimulator.GetPrefixTestDataGeneratorParameters() + '*',
242      sim.ApmModuleSimulator.GetPrefixScore() + '*')
243