1# Copyright (c) 2015-2017 Intel Corporation 2# 3# Permission is hereby granted, free of charge, to any person obtaining a 4# copy of this software and associated documentation files (the "Software"), 5# to deal in the Software without restriction, including without limitation 6# the rights to use, copy, modify, merge, publish, distribute, sublicense, 7# and/or sell copies of the Software, and to permit persons to whom the 8# Software is furnished to do so, subject to the following conditions: 9# 10# The above copyright notice and this permission notice (including the next 11# paragraph) shall be included in all copies or substantial portions of the 12# Software. 13# 14# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20# IN THE SOFTWARE. 21 22import argparse 23import builtins 24import collections 25import os 26import sys 27import textwrap 28 29import xml.etree.ElementTree as et 30 31hashed_funcs = {} 32 33c_file = None 34_c_indent = 0 35 36def c(*args): 37 code = ' '.join(map(str,args)) 38 for line in code.splitlines(): 39 text = ''.rjust(_c_indent) + line 40 c_file.write(text.rstrip() + "\n") 41 42# indented, but no trailing newline... 43def c_line_start(code): 44 c_file.write(''.rjust(_c_indent) + code) 45def c_raw(code): 46 c_file.write(code) 47 48def c_indent(n): 49 global _c_indent 50 _c_indent = _c_indent + n 51def c_outdent(n): 52 global _c_indent 53 _c_indent = _c_indent - n 54 55header_file = None 56_h_indent = 0 57 58def h(*args): 59 code = ' '.join(map(str,args)) 60 for line in code.splitlines(): 61 text = ''.rjust(_h_indent) + line 62 header_file.write(text.rstrip() + "\n") 63 64def h_indent(n): 65 global _c_indent 66 _h_indent = _h_indent + n 67def h_outdent(n): 68 global _c_indent 69 _h_indent = _h_indent - n 70 71 72def emit_fadd(tmp_id, args): 73 c("double tmp{0} = {1} + {2};".format(tmp_id, args[1], args[0])) 74 return tmp_id + 1 75 76# Be careful to check for divide by zero... 77def emit_fdiv(tmp_id, args): 78 c("double tmp{0} = {1};".format(tmp_id, args[1])) 79 c("double tmp{0} = {1};".format(tmp_id + 1, args[0])) 80 c("double tmp{0} = tmp{1} ? tmp{2} / tmp{1} : 0;".format(tmp_id + 2, tmp_id + 1, tmp_id)) 81 return tmp_id + 3 82 83def emit_fmax(tmp_id, args): 84 c("double tmp{0} = {1};".format(tmp_id, args[1])) 85 c("double tmp{0} = {1};".format(tmp_id + 1, args[0])) 86 c("double tmp{0} = MAX(tmp{1}, tmp{2});".format(tmp_id + 2, tmp_id, tmp_id + 1)) 87 return tmp_id + 3 88 89def emit_fmul(tmp_id, args): 90 c("double tmp{0} = {1} * {2};".format(tmp_id, args[1], args[0])) 91 return tmp_id + 1 92 93def emit_fsub(tmp_id, args): 94 c("double tmp{0} = {1} - {2};".format(tmp_id, args[1], args[0])) 95 return tmp_id + 1 96 97def emit_read(tmp_id, args): 98 type = args[1].lower() 99 c("uint64_t tmp{0} = results->accumulator[query->{1}_offset + {2}];".format(tmp_id, type, args[0])) 100 return tmp_id + 1 101 102def emit_uadd(tmp_id, args): 103 c("uint64_t tmp{0} = {1} + {2};".format(tmp_id, args[1], args[0])) 104 return tmp_id + 1 105 106# Be careful to check for divide by zero... 107def emit_udiv(tmp_id, args): 108 c("uint64_t tmp{0} = {1};".format(tmp_id, args[1])) 109 c("uint64_t tmp{0} = {1};".format(tmp_id + 1, args[0])) 110 if args[0].isdigit(): 111 assert int(args[0]) > 0 112 c("uint64_t tmp{0} = tmp{2} / tmp{1};".format(tmp_id + 2, tmp_id + 1, tmp_id)) 113 else: 114 c("uint64_t tmp{0} = tmp{1} ? tmp{2} / tmp{1} : 0;".format(tmp_id + 2, tmp_id + 1, tmp_id)) 115 return tmp_id + 3 116 117def emit_umul(tmp_id, args): 118 c("uint64_t tmp{0} = {1} * {2};".format(tmp_id, args[1], args[0])) 119 return tmp_id + 1 120 121def emit_usub(tmp_id, args): 122 c("uint64_t tmp{0} = {1} - {2};".format(tmp_id, args[1], args[0])) 123 return tmp_id + 1 124 125def emit_umin(tmp_id, args): 126 c("uint64_t tmp{0} = MIN({1}, {2});".format(tmp_id, args[1], args[0])) 127 return tmp_id + 1 128 129def emit_lshft(tmp_id, args): 130 c("uint64_t tmp{0} = {1} << {2};".format(tmp_id, args[1], args[0])) 131 return tmp_id + 1 132 133def emit_rshft(tmp_id, args): 134 c("uint64_t tmp{0} = {1} >> {2};".format(tmp_id, args[1], args[0])) 135 return tmp_id + 1 136 137def emit_and(tmp_id, args): 138 c("uint64_t tmp{0} = {1} & {2};".format(tmp_id, args[1], args[0])) 139 return tmp_id + 1 140 141ops = {} 142# (n operands, emitter) 143ops["FADD"] = (2, emit_fadd) 144ops["FDIV"] = (2, emit_fdiv) 145ops["FMAX"] = (2, emit_fmax) 146ops["FMUL"] = (2, emit_fmul) 147ops["FSUB"] = (2, emit_fsub) 148ops["READ"] = (2, emit_read) 149ops["UADD"] = (2, emit_uadd) 150ops["UDIV"] = (2, emit_udiv) 151ops["UMUL"] = (2, emit_umul) 152ops["USUB"] = (2, emit_usub) 153ops["UMIN"] = (2, emit_umin) 154ops["<<"] = (2, emit_lshft) 155ops[">>"] = (2, emit_rshft) 156ops["AND"] = (2, emit_and) 157 158def brkt(subexp): 159 if " " in subexp: 160 return "(" + subexp + ")" 161 else: 162 return subexp 163 164def splice_bitwise_and(args): 165 return brkt(args[1]) + " & " + brkt(args[0]) 166 167def splice_logical_and(args): 168 return brkt(args[1]) + " && " + brkt(args[0]) 169 170def splice_ult(args): 171 return brkt(args[1]) + " < " + brkt(args[0]) 172 173def splice_ugte(args): 174 return brkt(args[1]) + " >= " + brkt(args[0]) 175 176exp_ops = {} 177# (n operands, splicer) 178exp_ops["AND"] = (2, splice_bitwise_and) 179exp_ops["UGTE"] = (2, splice_ugte) 180exp_ops["ULT"] = (2, splice_ult) 181exp_ops["&&"] = (2, splice_logical_and) 182 183 184hw_vars = {} 185hw_vars["$EuCoresTotalCount"] = "perf->sys_vars.n_eus" 186hw_vars["$EuSlicesTotalCount"] = "perf->sys_vars.n_eu_slices" 187hw_vars["$EuSubslicesTotalCount"] = "perf->sys_vars.n_eu_sub_slices" 188hw_vars["$EuThreadsCount"] = "perf->sys_vars.eu_threads_count" 189hw_vars["$SliceMask"] = "perf->sys_vars.slice_mask" 190# subslice_mask is interchangeable with subslice/dual-subslice since Gfx12+ 191# only has dual subslices which can be assimilated with 16EUs subslices. 192hw_vars["$SubsliceMask"] = "perf->sys_vars.subslice_mask" 193hw_vars["$DualSubsliceMask"] = "perf->sys_vars.subslice_mask" 194hw_vars["$GpuTimestampFrequency"] = "perf->sys_vars.timestamp_frequency" 195hw_vars["$GpuMinFrequency"] = "perf->sys_vars.gt_min_freq" 196hw_vars["$GpuMaxFrequency"] = "perf->sys_vars.gt_max_freq" 197hw_vars["$SkuRevisionId"] = "perf->sys_vars.revision" 198hw_vars["$QueryMode"] = "perf->sys_vars.query_mode" 199 200def output_rpn_equation_code(set, counter, equation): 201 c("/* RPN equation: " + equation + " */") 202 tokens = equation.split() 203 stack = [] 204 tmp_id = 0 205 tmp = None 206 207 for token in tokens: 208 stack.append(token) 209 while stack and stack[-1] in ops: 210 op = stack.pop() 211 argc, callback = ops[op] 212 args = [] 213 for i in range(0, argc): 214 operand = stack.pop() 215 if operand[0] == "$": 216 if operand in hw_vars: 217 operand = hw_vars[operand] 218 elif operand in set.counter_vars: 219 reference = set.counter_vars[operand] 220 operand = set.read_funcs[operand[1:]] + "(perf, query, results)" 221 else: 222 raise Exception("Failed to resolve variable " + operand + " in equation " + equation + " for " + set.name + " :: " + counter.get('name')); 223 args.append(operand) 224 225 tmp_id = callback(tmp_id, args) 226 227 tmp = "tmp{0}".format(tmp_id - 1) 228 stack.append(tmp) 229 230 if len(stack) != 1: 231 raise Exception("Spurious empty rpn code for " + set.name + " :: " + 232 counter.get('name') + ".\nThis is probably due to some unhandled RPN function, in the equation \"" + 233 equation + "\"") 234 235 value = stack[-1] 236 237 if value in hw_vars: 238 value = hw_vars[value] 239 if value in set.counter_vars: 240 value = set.read_funcs[value[1:]] + "(perf, query, results)" 241 242 c("\nreturn " + value + ";") 243 244def splice_rpn_expression(set, counter, expression): 245 tokens = expression.split() 246 stack = [] 247 248 for token in tokens: 249 stack.append(token) 250 while stack and stack[-1] in exp_ops: 251 op = stack.pop() 252 argc, callback = exp_ops[op] 253 args = [] 254 for i in range(0, argc): 255 operand = stack.pop() 256 if operand[0] == "$": 257 if operand in hw_vars: 258 operand = hw_vars[operand] 259 else: 260 raise Exception("Failed to resolve variable " + operand + " in expression " + expression + " for " + set.name + " :: " + counter.get('name')); 261 args.append(operand) 262 263 subexp = callback(args) 264 265 stack.append(subexp) 266 267 if len(stack) != 1: 268 raise Exception("Spurious empty rpn expression for " + set.name + " :: " + 269 counter.get('name') + ".\nThis is probably due to some unhandled RPN operation, in the expression \"" + 270 expression + "\"") 271 272 return stack[-1] 273 274def output_counter_read(gen, set, counter): 275 c("\n") 276 c("/* {0} :: {1} */".format(set.name, counter.get('name'))) 277 278 if counter.read_hash in hashed_funcs: 279 c("#define %s \\" % counter.read_sym) 280 c_indent(3) 281 c("%s" % hashed_funcs[counter.read_hash]) 282 c_outdent(3) 283 else: 284 ret_type = counter.get('data_type') 285 if ret_type == "uint64": 286 ret_type = "uint64_t" 287 288 read_eq = counter.get('equation') 289 290 c("static " + ret_type) 291 c(counter.read_sym + "(UNUSED struct intel_perf_config *perf,\n") 292 c_indent(len(counter.read_sym) + 1) 293 c("const struct intel_perf_query_info *query,\n") 294 c("const struct intel_perf_query_result *results)\n") 295 c_outdent(len(counter.read_sym) + 1) 296 297 c("{") 298 c_indent(3) 299 output_rpn_equation_code(set, counter, read_eq) 300 c_outdent(3) 301 c("}") 302 303 hashed_funcs[counter.read_hash] = counter.read_sym 304 305 306def output_counter_max(gen, set, counter): 307 max_eq = counter.get('max_equation') 308 309 if not counter.has_max_func(): 310 return 311 312 c("\n") 313 c("/* {0} :: {1} */".format(set.name, counter.get('name'))) 314 315 if counter.max_hash in hashed_funcs: 316 c("#define %s \\" % counter.max_sym()) 317 c_indent(3) 318 c("%s" % hashed_funcs[counter.max_hash]) 319 c_outdent(3) 320 else: 321 ret_type = counter.get('data_type') 322 if ret_type == "uint64": 323 ret_type = "uint64_t" 324 325 c("static " + ret_type) 326 c(counter.max_sym() + "(struct intel_perf_config *perf)\n") 327 c("{") 328 c_indent(3) 329 output_rpn_equation_code(set, counter, max_eq) 330 c_outdent(3) 331 c("}") 332 333 hashed_funcs[counter.max_hash] = counter.max_sym() 334 335 336c_type_sizes = { "uint32_t": 4, "uint64_t": 8, "float": 4, "double": 8, "bool": 4 } 337def sizeof(c_type): 338 return c_type_sizes[c_type] 339 340def pot_align(base, pot_alignment): 341 return (base + pot_alignment - 1) & ~(pot_alignment - 1); 342 343semantic_type_map = { 344 "duration": "raw", 345 "ratio": "event" 346 } 347 348def output_availability(set, availability, counter_name): 349 expression = splice_rpn_expression(set, counter_name, availability) 350 lines = expression.split(' && ') 351 n_lines = len(lines) 352 if n_lines == 1: 353 c("if (" + lines[0] + ") {") 354 else: 355 c("if (" + lines[0] + " &&") 356 c_indent(4) 357 for i in range(1, (n_lines - 1)): 358 c(lines[i] + " &&") 359 c(lines[(n_lines - 1)] + ") {") 360 c_outdent(4) 361 362 363def output_units(unit): 364 return unit.replace(' ', '_').upper() 365 366 367# should a unit be visible in description? 368units_map = { 369 "bytes" : True, 370 "cycles" : True, 371 "eu atomic requests to l3 cache lines" : False, 372 "eu bytes per l3 cache line" : False, 373 "eu requests to l3 cache lines" : False, 374 "eu sends to l3 cache lines" : False, 375 "events" : True, 376 "hz" : True, 377 "messages" : True, 378 "ns" : True, 379 "number" : False, 380 "percent" : True, 381 "pixels" : True, 382 "texels" : True, 383 "threads" : True, 384 "us" : True, 385 "utilization" : False, 386 } 387 388 389def desc_units(unit): 390 val = units_map.get(unit) 391 if val is None: 392 raise Exception("Unknown unit: " + unit) 393 if val == False: 394 return "" 395 if unit == 'hz': 396 unit = 'Hz' 397 return "Unit: " + unit + "." 398 399 400counter_key_tuple = collections.namedtuple( 401 'counter_key', 402 [ 403 'name', 404 'description', 405 'symbol_name', 406 'mdapi_group', 407 'semantic_type', 408 'data_type', 409 'units', 410 ] 411) 412 413 414def counter_key(counter): 415 return counter_key_tuple._make([counter.get(field) for field in counter_key_tuple._fields]) 416 417 418def output_counter_struct(set, counter, idx, 419 name_to_idx, desc_to_idx, 420 symbol_name_to_idx, category_to_idx): 421 data_type = counter.data_type 422 data_type_uc = data_type.upper() 423 424 semantic_type = counter.semantic_type 425 if semantic_type in semantic_type_map: 426 semantic_type = semantic_type_map[semantic_type] 427 428 semantic_type_uc = semantic_type.upper() 429 430 c("[" + str(idx) + "] = {\n") 431 c_indent(3) 432 c(".name_idx = " + str(name_to_idx[counter.name]) + ",\n") 433 c(".desc_idx = " + str(desc_to_idx[counter.description + " " + desc_units(counter.units)]) + ",\n") 434 c(".symbol_name_idx = " + str(symbol_name_to_idx[counter.symbol_name]) + ",\n") 435 c(".category_idx = " + str(category_to_idx[counter.mdapi_group]) + ",\n") 436 c(".type = INTEL_PERF_COUNTER_TYPE_" + semantic_type_uc + ",\n") 437 c(".data_type = INTEL_PERF_COUNTER_DATA_TYPE_" + data_type_uc + ",\n") 438 c(".units = INTEL_PERF_COUNTER_UNITS_" + output_units(counter.units) + ",\n") 439 c_outdent(3) 440 c("},\n") 441 442 443def output_counter_report(set, counter, counter_to_idx, current_offset): 444 data_type = counter.get('data_type') 445 data_type_uc = data_type.upper() 446 c_type = data_type 447 448 if "uint" in c_type: 449 c_type = c_type + "_t" 450 451 semantic_type = counter.get('semantic_type') 452 if semantic_type in semantic_type_map: 453 semantic_type = semantic_type_map[semantic_type] 454 455 semantic_type_uc = semantic_type.upper() 456 457 c("\n") 458 459 availability = counter.get('availability') 460 if availability: 461 output_availability(set, availability, counter.get('name')) 462 c_indent(3) 463 464 key = counter_key(counter) 465 idx = str(counter_to_idx[key]) 466 467 current_offset = pot_align(current_offset, sizeof(c_type)) 468 469 c("intel_perf_query_add_counter(query, " + idx + ", " + 470 str(current_offset) + ", " + 471 set.max_values[counter.get('symbol_name')] + ", (oa_counter_read_func)" + 472 set.read_funcs[counter.get('symbol_name')] + ");\n") 473 474 if availability: 475 c_outdent(3); 476 c("}") 477 478 return current_offset + sizeof(c_type) 479 480 481def str_to_idx_table(strs): 482 sorted_strs = sorted(strs) 483 484 str_to_idx = collections.OrderedDict() 485 str_to_idx[sorted_strs[0]] = 0 486 previous = sorted_strs[0] 487 488 for i in range(1, len(sorted_strs)): 489 str_to_idx[sorted_strs[i]] = str_to_idx[previous] + len(previous) + 1 490 previous = sorted_strs[i] 491 492 return str_to_idx 493 494 495def output_str_table(name: str, str_to_idx): 496 c("\n") 497 c("static const char " + name + "[] = {\n") 498 c_indent(3) 499 c("\n".join(f"/* {idx} */ \"{val}\\0\"" for val, idx in str_to_idx.items())) 500 c_outdent(3) 501 c("};\n") 502 503 504register_types = { 505 'FLEX': 'flex_regs', 506 'NOA': 'mux_regs', 507 'OA': 'b_counter_regs', 508} 509 510def compute_register_lengths(set): 511 register_lengths = {} 512 register_configs = set.findall('register_config') 513 for register_config in register_configs: 514 t = register_types[register_config.get('type')] 515 if t not in register_lengths: 516 register_lengths[t] = len(register_config.findall('register')) 517 else: 518 register_lengths[t] += len(register_config.findall('register')) 519 520 return register_lengths 521 522 523def generate_register_configs(set): 524 register_configs = set.findall('register_config') 525 526 for register_config in register_configs: 527 t = register_types[register_config.get('type')] 528 529 availability = register_config.get('availability') 530 if availability: 531 output_availability(set, availability, register_config.get('type') + ' register config') 532 c_indent(3) 533 534 registers = register_config.findall('register') 535 c("static const struct intel_perf_query_register_prog %s[] = {" % t) 536 c_indent(3) 537 for register in registers: 538 c("{ .reg = %s, .val = %s }," % (register.get('address'), register.get('value'))) 539 c_outdent(3) 540 c("};") 541 c("query->config.%s = %s;" % (t, t)) 542 c("query->config.n_%s = ARRAY_SIZE(%s);" % (t, t)) 543 544 if availability: 545 c_outdent(3) 546 c("}") 547 c("\n") 548 549 550# Wraps a <counter> element from the oa-*.xml files. 551class Counter: 552 def __init__(self, set, xml): 553 self.xml = xml 554 self.set = set 555 self.read_hash = None 556 self.max_hash = None 557 558 self.read_sym = "{0}__{1}__{2}__read".format(self.set.gen.chipset, 559 self.set.underscore_name, 560 self.xml.get('underscore_name')) 561 562 def get(self, prop): 563 return self.xml.get(prop) 564 565 # Compute the hash of a counter's equation by expanding (including all the 566 # sub-equations it depends on) 567 def compute_hashes(self): 568 if self.read_hash is not None: 569 return 570 571 def replace_token(token): 572 if token[0] != "$": 573 return token 574 if token not in self.set.counter_vars: 575 return token 576 self.set.counter_vars[token].compute_hashes() 577 return self.set.counter_vars[token].read_hash 578 579 read_eq = self.xml.get('equation') 580 self.read_hash = ' '.join(map(replace_token, read_eq.split())) 581 582 max_eq = self.xml.get('max_equation') 583 if max_eq: 584 self.max_hash = ' '.join(map(replace_token, max_eq.split())) 585 586 def has_max_func(self): 587 max_eq = self.xml.get('max_equation') 588 if not max_eq: 589 return False 590 591 try: 592 val = float(max_eq) 593 return False 594 except ValueError: 595 pass 596 597 for token in max_eq.split(): 598 if token[0] == '$' and token not in hw_vars: 599 return False 600 return True 601 602 def max_sym(self): 603 assert self.has_max_func() 604 return "{0}__{1}__{2}__max".format(self.set.gen.chipset, 605 self.set.underscore_name, 606 self.xml.get('underscore_name')) 607 608 def max_value(self): 609 max_eq = self.xml.get('max_equation') 610 if not max_eq: 611 return "0 /* undefined */" 612 613 try: 614 return "{0}".format(float(max_eq)) 615 except ValueError: 616 pass 617 618 for token in max_eq.split(): 619 if token[0] == '$' and token not in hw_vars: 620 return "0 /* unsupported (varies over time) */" 621 622 return "{0}__{1}__{2}__max(perf)".format(self.set.gen.chipset, 623 self.set.underscore_name, 624 self.xml.get('underscore_name')) 625 626# Wraps a <set> element from the oa-*.xml files. 627class Set: 628 def __init__(self, gen, xml): 629 self.gen = gen 630 self.xml = xml 631 632 self.counter_vars = {} 633 self.max_values = {} 634 self.read_funcs = {} 635 636 xml_counters = self.xml.findall("counter") 637 self.counters = [] 638 for xml_counter in xml_counters: 639 counter = Counter(self, xml_counter) 640 self.counters.append(counter) 641 self.counter_vars["$" + counter.get('symbol_name')] = counter 642 self.read_funcs[counter.get('symbol_name')] = counter.read_sym 643 self.max_values[counter.get('symbol_name')] = counter.max_value() 644 645 for counter in self.counters: 646 counter.compute_hashes() 647 648 @property 649 def hw_config_guid(self): 650 return self.xml.get('hw_config_guid') 651 652 @property 653 def name(self): 654 return self.xml.get('name') 655 656 @property 657 def symbol_name(self): 658 return self.xml.get('symbol_name') 659 660 @property 661 def underscore_name(self): 662 return self.xml.get('underscore_name') 663 664 def findall(self, path): 665 return self.xml.findall(path) 666 667 def find(self, path): 668 return self.xml.find(path) 669 670 671# Wraps an entire oa-*.xml file. 672class Gen: 673 def __init__(self, filename): 674 self.filename = filename 675 self.xml = et.parse(self.filename) 676 self.chipset = self.xml.find('.//set').get('chipset').lower() 677 self.sets = [] 678 679 for xml_set in self.xml.findall(".//set"): 680 self.sets.append(Set(self, xml_set)) 681 682 683def main(): 684 global c_file 685 global header_file 686 687 parser = argparse.ArgumentParser() 688 parser.add_argument("--header", help="Header file to write", required=True) 689 parser.add_argument("--code", help="C file to write", required=True) 690 parser.add_argument("xml_files", nargs='+', help="List of xml metrics files to process") 691 692 args = parser.parse_args() 693 694 c_file = open(args.code, 'w') 695 header_file = open(args.header, 'w') 696 697 gens = [] 698 for xml_file in args.xml_files: 699 gens.append(Gen(xml_file)) 700 701 702 copyright = textwrap.dedent("""\ 703 /* Autogenerated file, DO NOT EDIT manually! generated by {} 704 * 705 * Copyright (c) 2015 Intel Corporation 706 * 707 * Permission is hereby granted, free of charge, to any person obtaining a 708 * copy of this software and associated documentation files (the "Software"), 709 * to deal in the Software without restriction, including without limitation 710 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 711 * and/or sell copies of the Software, and to permit persons to whom the 712 * Software is furnished to do so, subject to the following conditions: 713 * 714 * The above copyright notice and this permission notice (including the next 715 * paragraph) shall be included in all copies or substantial portions of the 716 * Software. 717 * 718 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 719 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 720 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 721 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 722 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 723 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 724 * DEALINGS IN THE SOFTWARE. 725 */ 726 727 """).format(os.path.basename(__file__)) 728 729 h(copyright) 730 h(textwrap.dedent("""\ 731 #pragma once 732 733 struct intel_perf_config; 734 735 """)) 736 737 c(copyright) 738 c(textwrap.dedent("""\ 739 #include <stdint.h> 740 #include <stdbool.h> 741 742 #include <drm-uapi/i915_drm.h> 743 744 #include "util/hash_table.h" 745 #include "util/ralloc.h" 746 747 """)) 748 749 c("#include \"" + os.path.basename(args.header) + "\"") 750 751 c(textwrap.dedent("""\ 752 #include "perf/intel_perf.h" 753 #include "perf/intel_perf_setup.h" 754 """)) 755 756 names = builtins.set() 757 descs = builtins.set() 758 symbol_names = builtins.set() 759 categories = builtins.set() 760 for gen in gens: 761 for set in gen.sets: 762 for counter in set.counters: 763 names.add(counter.get('name')) 764 symbol_names.add(counter.get('symbol_name')) 765 descs.add(counter.get('description') + " " + desc_units(counter.get('units'))) 766 categories.add(counter.get('mdapi_group')) 767 768 name_to_idx = str_to_idx_table(names) 769 output_str_table("name", name_to_idx) 770 771 desc_to_idx = str_to_idx_table(descs) 772 output_str_table("desc", desc_to_idx) 773 774 symbol_name_to_idx = str_to_idx_table(symbol_names) 775 output_str_table("symbol_name", symbol_name_to_idx) 776 777 category_to_idx = str_to_idx_table(categories) 778 output_str_table("category", category_to_idx) 779 780 # Print out all equation functions. 781 for gen in gens: 782 for set in gen.sets: 783 for counter in set.counters: 784 output_counter_read(gen, set, counter) 785 output_counter_max(gen, set, counter) 786 787 c("\n") 788 c("static const struct intel_perf_query_counter_data counters[] = {\n") 789 c_indent(3) 790 791 counter_to_idx = collections.OrderedDict() 792 idx = 0 793 for gen in gens: 794 for set in gen.sets: 795 for counter in set.counters: 796 key = counter_key(counter) 797 if key not in counter_to_idx: 798 counter_to_idx[key] = idx 799 output_counter_struct(set, key, idx, 800 name_to_idx, 801 desc_to_idx, 802 symbol_name_to_idx, 803 category_to_idx) 804 idx += 1 805 806 c_outdent(3) 807 c("};\n\n") 808 809 c(textwrap.dedent("""\ 810 typedef uint64_t (*oa_counter_read_func)(struct intel_perf_config *perf, 811 const struct intel_perf_query_info *query, 812 const struct intel_perf_query_result *results); 813 static void ATTRIBUTE_NOINLINE 814 intel_perf_query_add_counter(struct intel_perf_query_info *query, 815 int counter_idx, size_t offset, 816 uint64_t raw_max, oa_counter_read_func oa_counter_read_uint64) 817 { 818 struct intel_perf_query_counter *dest = &query->counters[query->n_counters++]; 819 const struct intel_perf_query_counter_data *counter = &counters[counter_idx]; 820 821 dest->name = &name[counter->name_idx]; 822 dest->desc = &desc[counter->desc_idx]; 823 dest->symbol_name = &symbol_name[counter->symbol_name_idx]; 824 dest->category = &category[counter->category_idx]; 825 dest->raw_max = raw_max; 826 827 dest->offset = offset; 828 dest->type = counter->type; 829 dest->data_type = counter->data_type; 830 dest->units = counter->units; 831 dest->oa_counter_read_uint64 = oa_counter_read_uint64; 832 } 833 """)) 834 835 # Print out all metric sets registration functions for each set in each 836 # generation. 837 for gen in gens: 838 for set in gen.sets: 839 counters = set.counters 840 841 c("\n") 842 c("\nstatic void\n") 843 c("{0}_register_{1}_counter_query(struct intel_perf_config *perf)\n".format(gen.chipset, set.underscore_name)) 844 c("{\n") 845 c_indent(3) 846 847 if gen.chipset == "hsw": 848 c("struct intel_perf_query_info *query = hsw_query_alloc(perf, %u);\n" % len(counters)) 849 else: 850 c("struct intel_perf_query_info *query = bdw_query_alloc(perf, %u);\n" % len(counters)) 851 c("\n") 852 c("query->name = \"" + set.name + "\";\n") 853 c("query->symbol_name = \"" + set.symbol_name + "\";\n") 854 c("query->guid = \"" + set.hw_config_guid + "\";\n") 855 856 c("\n") 857 c("struct intel_perf_query_counter *counter = query->counters;\n") 858 859 c("\n") 860 c("/* Note: we're assuming there can't be any variation in the definition ") 861 c(" * of a query between contexts so it's ok to describe a query within a ") 862 c(" * global variable which only needs to be initialized once... */") 863 c("\nif (!query->data_size) {") 864 c_indent(3) 865 866 generate_register_configs(set) 867 868 offset = 0 869 for counter in counters: 870 offset = output_counter_report(set, counter, counter_to_idx, offset) 871 872 873 c("\ncounter = &query->counters[query->n_counters - 1];\n") 874 c("query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);\n") 875 876 c_outdent(3) 877 c("}"); 878 879 c("\n_mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);") 880 881 c_outdent(3) 882 c("}\n") 883 884 h("void intel_oa_register_queries_" + gen.chipset + "(struct intel_perf_config *perf);\n") 885 886 c("\nvoid") 887 c("intel_oa_register_queries_" + gen.chipset + "(struct intel_perf_config *perf)") 888 c("{") 889 c_indent(3) 890 891 for set in gen.sets: 892 c("{0}_register_{1}_counter_query(perf);".format(gen.chipset, set.underscore_name)) 893 894 c_outdent(3) 895 c("}") 896 897 898if __name__ == '__main__': 899 main() 900