1#!/usr/bin/env python
2# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3#
4# Use of this source code is governed by a BSD-style license
5# that can be found in the LICENSE file in the root of the source
6# tree. An additional intellectual property rights grant can be found
7# in the file PATENTS.  All contributing project authors may
8# be found in the AUTHORS file in the root of the source tree.
9
10"""Finds the APM configuration that maximizes a provided metric by
11parsing the output generated apm_quality_assessment.py.
12"""
13
14from __future__ import division
15
16import collections
17import logging
18import os
19
20import quality_assessment.data_access as data_access
21import quality_assessment.collect_data as collect_data
22
23def _InstanceArgumentsParser():
24  """Arguments parser factory. Extends the arguments from 'collect_data'
25  with a few extra for selecting what parameters to optimize for.
26  """
27  parser = collect_data.InstanceArgumentsParser()
28  parser.description = (
29      'Rudimentary optimization of a function over different parameter'
30      'combinations.')
31
32  parser.add_argument('-n', '--config_dir', required=False,
33                      help=('path to the folder with the configuration files'),
34                      default='apm_configs')
35
36  parser.add_argument('-p', '--params', required=True, nargs='+',
37                      help=('parameters to parse from the config files in'
38                            'config_dir'))
39
40  parser.add_argument('-z', '--params_not_to_optimize', required=False,
41                      nargs='+', default=[],
42                      help=('parameters from `params` not to be optimized for'))
43
44  return parser
45
46
47def _ConfigurationAndScores(data_frame, params,
48                            params_not_to_optimize, config_dir):
49  """Returns a list of all configurations and scores.
50
51  Args:
52    data_frame: A pandas data frame with the scores and config name
53                returned by _FindScores.
54    params: The parameter names to parse from configs the config
55            directory
56
57    params_not_to_optimize: The parameter names which shouldn't affect
58                            the optimal parameter
59                            selection. E.g., fixed settings and not
60                            tunable parameters.
61
62    config_dir: Path to folder with config files.
63
64  Returns:
65    Dictionary of the form
66    {param_combination: [{params: {param1: value1, ...},
67                          scores: {score1: value1, ...}}]}.
68
69    The key `param_combination` runs over all parameter combinations
70    of the parameters in `params` and not in
71    `params_not_to_optimize`. A corresponding value is a list of all
72    param combinations for params in `params_not_to_optimize` and
73    their scores.
74  """
75  results = collections.defaultdict(list)
76  config_names = data_frame['apm_config'].drop_duplicates().values.tolist()
77  score_names = data_frame['eval_score_name'].drop_duplicates().values.tolist()
78
79  # Normalize the scores
80  normalization_constants = {}
81  for score_name in score_names:
82    scores = data_frame[data_frame.eval_score_name == score_name].score
83    normalization_constants[score_name] = max(scores)
84
85  params_to_optimize = [p for p in params if p not in params_not_to_optimize]
86  param_combination = collections.namedtuple("ParamCombination",
87                                            params_to_optimize)
88
89  for config_name in config_names:
90    config_json = data_access.AudioProcConfigFile.Load(
91        os.path.join(config_dir, config_name + ".json"))
92    scores = {}
93    data_cell = data_frame[data_frame.apm_config == config_name]
94    for score_name in score_names:
95      data_cell_scores = data_cell[data_cell.eval_score_name ==
96                                   score_name].score
97      scores[score_name] = sum(data_cell_scores) / len(data_cell_scores)
98      scores[score_name] /= normalization_constants[score_name]
99
100    result = {'scores': scores, 'params': {}}
101    config_optimize_params = {}
102    for param in params:
103      if param in params_to_optimize:
104        config_optimize_params[param] = config_json['-' + param]
105      else:
106        result['params'][param] = config_json['-' + param]
107
108    current_param_combination = param_combination(
109        **config_optimize_params)
110    results[current_param_combination].append(result)
111  return results
112
113
114def _FindOptimalParameter(configs_and_scores, score_weighting):
115  """Finds the config producing the maximal score.
116
117  Args:
118    configs_and_scores: structure of the form returned by
119                        _ConfigurationAndScores
120
121    score_weighting: a function to weight together all score values of
122                     the form [{params: {param1: value1, ...}, scores:
123                                {score1: value1, ...}}] into a numeric
124                     value
125  Returns:
126    the config that has the largest values of |score_weighting| applied
127    to its scores.
128  """
129
130  min_score = float('+inf')
131  best_params = None
132  for config in configs_and_scores:
133    scores_and_params = configs_and_scores[config]
134    current_score = score_weighting(scores_and_params)
135    if current_score < min_score:
136      min_score = current_score
137      best_params = config
138      logging.debug("Score: %f", current_score)
139      logging.debug("Config: %s", str(config))
140  return best_params
141
142
143def _ExampleWeighting(scores_and_configs):
144  """Example argument to `_FindOptimalParameter`
145  Args:
146    scores_and_configs: a list of configs and scores, in the form
147                        described in _FindOptimalParameter
148  Returns:
149    numeric value, the sum of all scores
150  """
151  res = 0
152  for score_config in scores_and_configs:
153    res += sum(score_config['scores'].values())
154  return res
155
156
157def main():
158  # Init.
159  # TODO(alessiob): INFO once debugged.
160  logging.basicConfig(level=logging.DEBUG)
161  parser = _InstanceArgumentsParser()
162  args = parser.parse_args()
163
164  # Get the scores.
165  src_path = collect_data.ConstructSrcPath(args)
166  logging.debug('Src path <%s>', src_path)
167  scores_data_frame = collect_data.FindScores(src_path, args)
168  all_scores = _ConfigurationAndScores(scores_data_frame,
169                                       args.params,
170                                       args.params_not_to_optimize,
171                                       args.config_dir)
172
173  opt_param = _FindOptimalParameter(all_scores, _ExampleWeighting)
174
175  logging.info('Optimal parameter combination: <%s>', opt_param)
176  logging.info('It\'s score values: <%s>', all_scores[opt_param])
177
178if __name__ == "__main__":
179  main()
180