Source code for reframechecks.common.sphexa.sanity_perftools

# Copyright 2019-2020 Swiss National Supercomputing Centre (CSCS/ETH Zurich)
# HPCTools Project Developers. See the top-level LICENSE file for details.
#
# SPDX-License-Identifier: BSD-3-Clause

import os
import reframe as rfm
import reframe.utility.sanity as sn
import numpy as np
from reframe.core.fields import ScopedDict
import sphexa.sanity as sphs


[docs]class PerftoolsBaseTest(rfm.RegressionTest): def __init__(self): x = 0 # {{{ sanity patterns # {{{ patrun_version @rfm.run_before('sanity') def patrun_version(self): '''Checks tool's version: .. code-block:: > pat_run -V CrayPat/X: Version 20.08.0 Revision 28ef35c9f ''' reference_tool_version = { 'daint': '20.08.0', 'dom': '20.08.0', 'eiger': '20.11.0', 'pilatus': '20.11.0', } ref_version = reference_tool_version[self.current_system.name] regex = r'^CrayPat/X:\s+Version (?P<toolversion>\S+) Revision' res_version = sn.extractsingle(regex, self.version_rpt, 'toolversion') # self.sanity_patterns_l.append(sn.assert_eq(res_version, ref_version, # msg='sanityV failed "{0}"')) # }}} # }}} # {{{ regex functions # {{{ patrun: number of compute nodes @rfm.run_before('performance') def patrun_num_of_compute_nodes(self): '''Extract the number of compute nodes to compute averages .. code-block:: > ls 96mpi/sqpatch.exe+8709-4s/xf-files/: 000004.xf 000005.xf 000006.xf 000007.xf Typical output: * patrun_cn: 4 ''' regex = r'^(?P<cn>\d+.xf)$' self.num_cn = sn.count(sn.extractall(regex, self.stdout, 'cn')) # }}} # {{{ perftools-lite: Memory @rfm.run_before('performance') def perftools_lite_memory(self): ''' # 20.10.0 / AMD High Memory: 85,743.7 MiBytes 669.9 MiBytes per PE # More --> pat_report -O himem exe+141047-1002s/index.ap2 > rpt.mem ''' res = {} regex = (r'^High Memory:\s+(?P<mem_cn>\S+) MiBytes\s+(?P<mem_c>\S+) ' r'MiBytes per PE') self.ptl_high_mem = sn.extractsingle( regex, self.stdout, 'mem_cn', conv=lambda x: int(x.replace(',', '').split('.')[0])) self.ptl_high_mem_c = sn.extractsingle( regex, self.stdout, 'mem_c', conv=lambda x: int(x.replace(',', '').split('.')[0])) # }}} # {{{ patrun: table Wall Clock Time, Memory @rfm.run_before('performance') def patrun_walltime_and_memory(self): '''This table shows total wall clock time for the ranks with the maximum, mean, and minimum time, as well as the average across ranks. .. code-block:: Table 10: Wall Clock Time, Memory High Water Mark Process | Process | PE=[mmm] Time | HiMem | | (MiBytes) | 11.389914 | 76.3 | Total <-- avgt |-------------------------------- | 11.398188 | 57.7 | pe.24 <-- maxt | 11.389955 | 98.9 | pe.34 | 11.365630 | 54.0 | pe.93 <-- mint |================================ Typical output: * patrun_wallt_max: 11.3982 s * patrun_wallt_avg: 11.3899 s * patrun_wallt_min: 11.3656 s * patrun_mem_max: 57.7 MiBytes * patrun_mem_min: 54.0 MiBytes ''' # TODO: bug avg mem? res = {} # --- avg regex = (r'^Table \d+: Wall Clock Time, Memory High Water Mark\n' r'(.*\n){4}\s+(.*\n)\s+(?P<proct>\S+)\s+\|\s+(?P<mem>\S+)' r' \| Total$') res['patrun_wallt_avg'] = sn.extractsingle(regex, self.stdout, 'proct', float) res['patrun_mem_avg'] = sn.extractsingle(regex, self.stdout, 'mem', float) # --- max regex = (r'^Table \d+: Wall Clock Time, Memory High Water Mark\n' r'(.*\n){4}\s+(.*\n){2}\|\s+(?P<proct>\S+) \|\s+(?P<mem>\S+)' r'\s+\|\s(?P<pe>\S+)$') res['patrun_wallt_max'] = sn.extractsingle(regex, self.stdout, 'proct', float) res['patrun_mem_max'] = sn.extractsingle(regex, self.stdout, 'mem', float) res['patrun_mem_max_pe'] = sn.extractsingle(regex, self.stdout, 'pe', float) # --- min regex = (r'^Table \d+: Wall Clock Time, Memory High Water Mark\n' r'(.*\n){4}\s+(.*\n){4}\|\s+(?P<proct>\S+) \|\s+(?P<mem>\S+)' r'\s+\|\s(?P<pe>\S+)$') res['patrun_wallt_min'] = sn.extractsingle(regex, self.stdout, 'proct', float) res['patrun_mem_min'] = sn.extractsingle(regex, self.stdout, 'mem', float) res['patrun_mem_min_pe'] = sn.extractsingle(regex, self.stdout, 'pe', float) for kk, vv in res.items(): if not isinstance(vv, str): res[kk] = sn.round(vv, 4) self.patrun_perf_d = res # }}} # {{{ patrun: table Memory Bandwidth by Numanode @rfm.run_before('performance') def patrun_memory_bw(self): '''This table shows memory traffic to local and remote memory for numa nodes, taking for each numa node the maximum value across nodes. .. code-block:: Table 9: Memory Bandwidth by Numanode Memory | Local | Thread | Memory | Memory | Numanode Traffic | Memory | Time | Traffic | Traffic | Node Id GBytes | Traffic | | GBytes | / | PE=HIDE | GBytes | | / Sec | Nominal | | | | | Peak | |-------------------------------------------------------------- | 33.64 | 33.64 | 11.360701 | 2.96 | 4.3% | numanode.0 ||------------------------------------------------------------- || 33.64 | 33.64 | 11.359413 | 2.96 | 4.3% | nid.4 || 33.59 | 33.59 | 11.359451 | 2.96 | 4.3% | nid.6 || 33.24 | 33.24 | 11.360701 | 2.93 | 4.3% | nid.5 || 28.24 | 28.24 | 11.355006 | 2.49 | 3.6% | nid.7 |============================================================== 2 sockets: Table 10: Memory Bandwidth by Numanode Memory | Local | Remote | Thread | Memory | Memory | Numanode Traffic | Memory | Memory | Time | Traffic | Traffic | Node Id GBytes | Traffic | Traffic | | GBytes | / | PE=HIDE | GBytes | GBytes | | / Sec | Nominal | | | | | | Peak | |------------------------------------------------------------------- | 11.21 | 10.99 | 0.22 | 3.886926 | 2.88 | 3.8% | numanode.0 ||------------------------------------------------------------------ || 11.21 | 10.99 | 0.22 | 3.886926 | 2.88 |3.8% | nid.407 || 10.47 | 10.27 | 0.20 | 3.886450 | 2.69 |3.5% | nid.416 ||================================================================== | 11.29 | 11.06 | 0.23 | 3.889932 | 2.90 | 3.8% | numanode.1 ||------------------------------------------------------------------ || 11.29 | 11.06 | 0.23 | 3.889932 | 2.90 |3.8% | nid.407 || 10.09 | 9.88 | 0.20 | 3.885858 | 2.60 |3.4% | nid.416 |=================================================================== Typical output: * patrun_memory_traffic_global: 33.64 GB * patrun_memory_traffic_local: 33.64 GB * %patrun_memory_traffic_peak: 4.3 % ''' res = {} regex = (r'^Table \d+:\s+Memory Bandwidth by Numanode\n(.*\n){7}\|\s+' r'(?P<GBytes>\S+)\s+\|\s+(?P<GBytes_localm>\S+)' r'(\s+\|\s+\S+){2,3}\s+\|\s+(?P<peak_pct>\S+)%') res['memory_traffic_global'] = sn.extractsingle(regex, self.stdout, 'GBytes', float) res['memory_traffic_local'] = sn.extractsingle(regex, self.stdout, 'GBytes_localm', float) res['memory_traffic_peak'] = sn.extractsingle(regex, self.stdout, 'peak_pct', float) # if self.patrun_perf_d: self.patrun_perf_d = {**self.patrun_perf_d, **res} else: self.patrun_perf_d = res # }}} # {{{ patrun: table HW Performance Counter @rfm.run_before('performance') def patrun_hwpc(self): '''This table shows HW performance counter data for the whole program, averaged across ranks or threads, as applicable. .. code-block:: Table 4: Program HW Performance Counter Data ... Thread Time 11.352817 secs UNHALTED_REFERENCE_CYCLES 28,659,167,096 CPU_CLK_THREAD_UNHALTED:THREAD_P 34,170,540,119 DTLB_LOAD_MISSES:WALK_DURATION 61,307,848 INST_RETIRED:ANY_P 22,152,242,298 RESOURCE_STALLS:ANY 19,793,119,676 OFFCORE_RESPONSE_0:ANY_REQUEST:L3_MISS_LOCAL 20,949,344 CPU CLK Boost 1.19 X Resource stall cycles / Cycles --> 57.9% Memory traffic GBytes --> 0.118G/sec 1.34 GB Local Memory traffic GBytes 0.118G/sec 1.34 GB Memory Traffic / Nominal Peak 0.2% DTLB Miss Ovhd 61,307,848 cycles 0.2% cycles Retired Inst per Clock --> 0.65 ============================================================================== Typical output: * patrun_memory_traffic: 1.34 GB * patrun_ipc: 0.65 * %patrun_stallcycles: 57.9 % ''' res = {} regex = (r'^Table \d+:\s+Program HW Performance Counter Data\n' r'(.*\n){15}.*Resource stall cycles \/ Cycles\s+(?P<pp>\S+)%') res['stallcycles'] = sn.extractsingle(regex, self.stdout, 'pp', float) # regex = (r'^Table \d+:\s+Program HW Performance Counter Data\n' r'(.*\n){16}.*Memory traffic GBytes.*\s+(?P<GB>\S+) GB') res['memory_traffic'] = sn.extractsingle(regex, self.stdout, 'GB', float) # regex = (r'^Table \d+:\s+Program HW Performance Counter Data\n' r'(.*\n){20}.*Retired Inst per Clock\s+(?P<ipc>\S+)') res['ipc'] = sn.extractsingle(regex, self.stdout, 'ipc', float) # self.patrun_hwc_d = res # if self.patrun_perf_d: # self.patrun_perf_d = {**self.patrun_perf_d, **res} # else: # self.patrun_perf_d = res # }}} # {{{ patrun: table energy and power usage @rfm.run_before('performance') def patrun_energy_power(self): '''This table shows HW performance counter data for the whole program, averaged across ranks or threads, as applicable. .. code-block:: Table 8: Program energy and power usage (from Cray PM) Node | Node | Process | Node Id Energy | Power | Time | PE=HIDE (J) | (W) | | 7,891 | 692.806 | 11.389914 | Total <--- |-- -------------------------------------- | 2,076 | 182.356 | 11.384319 | nid.7 | 1,977 | 173.548 | 11.391657 | nid.4 | 1,934 | 169.765 | 11.392220 | nid.6 | 1,904 | 167.143 | 11.391461 | nid.5 |======================================== Typical output: * patrun_avg_power: 692.806 W ''' res = {} # eiger: regex = (r'^Table \d+:\s+Program energy and power usage \(from Cray ' r'PM\).*\n(.*\n){5}\s+(?P<nrgy>\S+)\s+\|\s+(?P<power>\S+).*' r'(Total|Avg of PE values)$') res['energy_avg'] = \ sn.extractsingle(regex, self.stdout, 'nrgy', conv=lambda x: int(float(x.replace(',', '')))) \ / self.num_cn res['power_avg'] = \ sn.extractsingle(regex, self.stdout, 'power', conv=lambda x: int(float(x.replace(',', '')))) \ / self.num_cn if self.patrun_perf_d: self.patrun_perf_d = {**self.patrun_perf_d, **res} else: self.patrun_perf_d = res # }}} # {{{ patrun: table Profile by Function @rfm.run_before('performance') def patrun_samples(self): '''Elapsed time (in samples) reported by the tool: .. code-block:: Table 1: Profile by Function Samp% | Samp | Imb. | Imb. | Group | | Samp | Samp% | Function | | | | PE=HIDE 100.0% | 382.8 | -- | -- | Total TODO: Experiment: samp_cs_time Sampling interval: 10000 microsecs ''' regex = (r'^Table 1: Profile by Function\n(.*\n){4}\s+100.0%\s+\|\s+' r'(?P<sam>\S+)\s+') self.patrun_sample = sn.extractsingle(regex, self.stdout, 'sam', float) # }}} # {{{ patrun: hotspot1 @rfm.run_after('sanity') def patrun_hotspot1(self): regex = (r'^Table \d+: Profile by Group, Function, and Line.*\n' r'(.*\n){7}\s+.*Total\n(.*\n){3}(\|)+\s+(?P<pct>\S+)%.*\|\s+' r'(?P<fname>(sphexa.*|MPI_.*))$') # --- ok: rpt = os.path.join(self.stagedir, self.rpt) self.patrun_hotspot1_pct = sn.extractsingle(regex, rpt, 'pct', float) self.patrun_hotspot1_name = sn.extractsingle(regex, rpt, 'fname') # {{{ # --- ko: # self.patrun_hotspot1_pct = \ # sn.extractsingle(regex, self.stdout, 'pct', float) # self.patrun_hotspot1_name = \ # sn.extractsingle(regex, self.stdout, 'fname') # --- ko: # self.patrun_hotspot1_pct = \ # sn.extractsingle(regex, self.rpt, 'pct', float) # self.patrun_hotspot1_name = \ # sn.extractsingle(regex, self.rpt, 'fname') # }}} # }}} # {{{ patrun: hotspot1 MPI @rfm.run_after('sanity') def patrun_hotspot1_mpi(self): ''' .. code-block:: Table 1: Profile by Function Samp% | Samp | Imb. | Imb. | Group | | Samp | Samp% | Function | | | | PE=HIDE 100.0% | 1,126.4 | -- | -- | Total ... ||================================================= | 9.9% | 111.4 | -- | -- | MPI ||------------------------------------------------- || 5.2% | 58.2 | 993.8 | 95.5% | MPI_Allreduce <-- || 3.6% | 40.9 | 399.1 | 91.7% | MPI_Recv ''' rpt = os.path.join(self.stagedir, self.rpt) regex = (r'^Table 1: Profile by Function(.*\n){10}.*^\|.* ' r'(?P<samp_pct>\S+)%.* (?P<imb_pct>\S+)%.*' r'(?P<fname>sphexa\S+|MPI_\S+)') res = {} res['mpi_h1'] = sn.extractsingle(regex, rpt, 'samp_pct', float) res['mpi_h1_imb'] = sn.extractsingle(regex, rpt, 'imb_pct', float) res['mpi_h1_name'] = sn.extractsingle(regex, rpt, 'fname') # self.mpi_h1 = res['mpi_h1'] self.mpi_h1_imb = res['mpi_h1_imb'] self.mpi_h1_name = res['mpi_h1_name'] # if self.patrun_perf_d: # self.patrun_perf_d = {**self.patrun_perf_d, **res} # else: # self.patrun_perf_d = res # }}} # TODO: rpt from sqpatch.exe+5046-0s/rpt-files/RUNTIME.rpt # {{{ patrun: imbalance @rfm.run_after('sanity') def patrun_imbalance(self): # {{{ '''Load imbalance from csv report .. code-block:: Table 1: load Balance with MPI Message Stats ''' # }}} rpt = os.path.join(self.stagedir, self.csv_rpt) if self.num_tasks == 1: regex_use = r'^(?P<pe>1),\S+,\s?(?P<samples>\S+),USER$' regex_mpi = r'^(?P<pe>1),\S+,\s?(?P<samples>\S+),MPI$' regex_etc = r'^(?P<pe>1),\S+,\s?(?P<samples>\S+),ETC$' else: regex_use = r'^2,\S+,\s?(?P<samples>\S+),USER/pe.(?P<pe>\d+)$' regex_mpi = r'^2,\S+,\s?(?P<samples>\S+),MPI/pe.(?P<pe>\d+)$' regex_etc = r'^2,\S+,\s?(?P<samples>\S+),ETC/pe.(?P<pe>\d+)$' res_user_sm_l = sn.extractall(regex_use, rpt, 'samples', float) res_user_pe_l = sn.extractall(regex_use, rpt, 'pe', int) # MPI: res_mpi_sm_l = sn.extractall(regex_mpi, rpt, 'samples', float) res_mpi_pe_l = sn.extractall(regex_mpi, rpt, 'pe', int) if not sn.evaluate(res_mpi_sm_l): res_mpi_sm_l = [0 for i in sn.evaluate(res_user_sm_l)] res_mpi_pe_l = [i for i in sn.evaluate(res_user_pe_l)] # ETC: res_etc_sm_l = sn.extractall(regex_etc, rpt, 'samples', float) res_etc_pe_l = sn.extractall(regex_etc, rpt, 'pe', int) # DICT from LISTs: dict(zip(pe,usr)) # TOTAL = USER+MPI+ETC res_total_sm_l = [] # WARNING: this fails if data is not sorted by pe, use pat_report with: # -s sort_by_pe='yes' !!! res_total_sm_l = [sum(sam) for sam in zip(res_user_sm_l, res_mpi_sm_l, res_etc_sm_l)] # USER pes # {{{ slowest pe (USER) # slowest = max(max(res_user_sm_l), # max(res_mpi_sm_l), # max(res_etc_sm_l)) slowest = max(res_user_sm_l) user_slowest_pe = -1 index = -1 if slowest in res_user_sm_l: for sam in res_user_sm_l: index += 1 if sam == slowest: user_slowest_pe = index if user_slowest_pe == -1: user_slowest_pe = 0 # }}} # {{{ fastest pe (USER) fastest = min(res_user_sm_l) user_fastest_pe = -1 index = -1 for sam in res_user_sm_l: index += 1 if sam == fastest: user_fastest_pe = index if user_fastest_pe == -1: user_fastest_pe = 0 # }}} # MPI pes # {{{ slowest pe (MPI) slowest = max(res_mpi_sm_l) # try: # slowest = max(res_mpi_sm_l) # except ValueError: # slowest = 0 mpi_slowest_pe = -1 index = -1 if slowest in res_mpi_sm_l: for sam in res_mpi_sm_l: index += 1 if sam == slowest: mpi_slowest_pe = index if mpi_slowest_pe == -1: mpi_slowest_pe = 0 # }}} # {{{ fastest pe (MPI) fastest = min(res_mpi_sm_l) # try: # fastest = min(res_mpi_sm_l) # except ValueError: # fastest = 0 mpi_fastest_pe = -1 index = -1 for sam in res_mpi_sm_l: index += 1 if sam == fastest: mpi_fastest_pe = index if mpi_fastest_pe == -1: mpi_fastest_pe = 0 # }}} # ETC pes # {{{ slowest pe (ETC) slowest = max(res_etc_sm_l) etc_slowest_pe = -1 index = -1 if slowest in res_etc_sm_l: for sam in res_etc_sm_l: index += 1 if sam == slowest: etc_slowest_pe = index if etc_slowest_pe == -1: etc_slowest_pe = 0 # }}} # {{{ fastest pe (ETC) fastest = min(res_etc_sm_l) etc_fastest_pe = -1 index = -1 for sam in res_etc_sm_l: index += 1 if sam == fastest: etc_fastest_pe = index if etc_fastest_pe == -1: etc_fastest_pe = 0 # }}} # TOTAL pes # {{{ slowest pe (TOTAL) slowest = max(res_total_sm_l) # try: # slowest = max(res_total_sm_l) # except ValueError: # slowest = 0 total_slowest_pe = -1 index = -1 if slowest in res_total_sm_l: for sam in res_total_sm_l: index += 1 if sam == slowest: total_slowest_pe = index if total_slowest_pe == -1: total_slowest_pe = 0 # }}} # {{{ fastest pe (TOTAL) fastest = min(res_total_sm_l) # try: # fastest = min(res_total_sm_l) # except ValueError: # fastest = 0 total_fastest_pe = -1 index = -1 for sam in res_total_sm_l: index += 1 if sam == fastest: total_fastest_pe = index if total_fastest_pe == -1: total_fastest_pe = 0 # }}} # {{{ res dict res = {} # min/(mean=average)/median/max res['user_samples_min'] = sn.round(sn.min(res_user_sm_l), 0) res['mpi_samples_min'] = sn.round(sn.min(res_mpi_sm_l), 0) res['etc_samples_min'] = sn.round(sn.min(res_etc_sm_l), 0) res['total_samples_min'] = sn.round(sn.min(res_total_sm_l), 0) # res['user_samples_mean'] = sn.round(sn.avg(res_user_sm_l), 1) res['mpi_samples_mean'] = sn.round(sn.avg(res_mpi_sm_l), 1) res['etc_samples_mean'] = sn.round(sn.avg(res_etc_sm_l), 1) res['total_samples_mean'] = sn.round(sn.avg(res_total_sm_l), 1) # res['user_samples_median'] = \ sn.sanity_function(np.median)(res_user_sm_l) res['mpi_samples_median'] = sn.sanity_function(np.median)(res_mpi_sm_l) res['etc_samples_median'] = sn.sanity_function(np.median)(res_etc_sm_l) res['total_samples_median'] = \ sn.sanity_function(np.median)(res_total_sm_l) # res['user_samples_max'] = sn.round(sn.max(res_user_sm_l), 0) res['mpi_samples_max'] = sn.round(sn.max(res_mpi_sm_l), 0) res['etc_samples_max'] = sn.round(sn.max(res_etc_sm_l), 0) res['total_samples_max'] = sn.round(sn.max(res_total_sm_l), 0) # res['%user_samples'] = sn.round(100 * res['user_samples_mean'] / res['total_samples_mean'], 1) res['%mpi_samples'] = sn.round(100 * res['mpi_samples_mean'] / res['total_samples_mean'], 1) res['%etc_samples'] = sn.round(100 * res['etc_samples_mean'] / res['total_samples_mean'], 1) # slowest pes res['user_slowest_pe'] = user_slowest_pe res['mpi_slowest_pe'] = mpi_slowest_pe res['etc_slowest_pe'] = etc_slowest_pe res['total_slowest_pe'] = total_slowest_pe # --- debug with: # print("> res_user_sm_l", sn.evaluate(res_user_sm_l)) # print("> res_user_pe_l", sn.evaluate(res_user_pe_l)) # print("> res_mpi_sm_l", sn.evaluate(res_mpi_sm_l)) # print("> res_mpi_pe_l", sn.evaluate(res_mpi_pe_l)) # print("> res_etc_sm_l", sn.evaluate(res_etc_sm_l)) # print("> res_etc_pe_l", sn.evaluate(res_etc_pe_l)) try: res['%user_slowest'] = \ sn.round(100 * res_user_sm_l[user_slowest_pe] / res_total_sm_l[user_slowest_pe], 1) except ValueError: res['%user_slowest'] = 0 try: res['%mpi_slowest'] = \ sn.round(100 * res_mpi_sm_l[user_slowest_pe] / res_total_sm_l[user_slowest_pe], 1) except ValueError: res['%mpi_slowest'] = 0 try: res['%etc_slowest'] = \ sn.round(100 * res_etc_sm_l[user_slowest_pe] / res_total_sm_l[user_slowest_pe], 1) except ValueError: res['%etc_slowest'] = 0 # fastest pes res['user_fastest_pe'] = user_fastest_pe res['mpi_fastest_pe'] = mpi_fastest_pe res['etc_fastest_pe'] = etc_fastest_pe res['total_fastest_pe'] = total_fastest_pe try: res['%user_fastest'] = \ sn.round(100 * res_user_sm_l[user_fastest_pe] / res_total_sm_l[user_fastest_pe], 1) except ValueError: res['%user_fastest'] = 0 try: res['%mpi_fastest'] = \ sn.round(100 * res_mpi_sm_l[user_fastest_pe] / res_total_sm_l[user_fastest_pe], 1) except ValueError: res['%mpi_fastest'] = 0 try: res['%etc_fastest'] = \ sn.round(100 * res_etc_sm_l[user_fastest_pe] / res_total_sm_l[user_fastest_pe], 1) except ValueError: res['%etc_fastest'] = 0 # }}} self.patrun_stats_d = res # }}} # {{{ rpt_path_stdout # @rfm.run_before('sanity') # def rpt_path_stdout(self): # '''Get path to the report dir from stdout: # # .. code-block:: # # Experiment data directory written: # .../sqpatch.exe+19625-2s # ''' # regex = r'^Experiment data directory written:\n(?P<rpt_path>.*)$' # self.rpt_path = sn.extractsingle(regex, self.stdout, 'rpt_path') # }}} # }}} # {{{ performance patterns # --- 1 # @rfm.run_before('performance') # def set_basic_perf_patterns(self): # '''A set of basic perf_patterns shared between the tests # ''' # self.perf_patterns = sn.evaluate(sphs.basic_perf_patterns(self)) # {{{ --- 2 @rfm.run_before('performance') def set_tool_perf_patterns(self): '''More perf_patterns for the tool Typical performance reporting: .. literalinclude:: ../../reframechecks/perftools/patrun.res :lines: 141-169 ''' regex = r'^\|\s+(?P<pct>\S+)%\s+\|\s+(?P<sam>\S+).*USER$' usr_pct = sn.extractsingle(regex, self.stdout, 'pct', float) regex = r'^\|\s+(?P<pct>\S+)%\s+\|\s+(?P<sam>\S+).*MPI$' mpi_pct = sn.extractsingle(regex, self.stdout, 'pct', float) etc_pct = sn.round(100 - usr_pct - mpi_pct, 1) self.patrun_stats_d['%total_samples'] = sn.round( self.patrun_stats_d['%user_samples'] + self.patrun_stats_d['%mpi_samples'] + self.patrun_stats_d['%etc_samples'], 1) perf_pattern = { 'patrun_cn': self.num_cn, # 'patrun_wallt_max': self.patrun_perf_d['patrun_wallt_max'], # 'patrun_wallt_avg': self.patrun_perf_d['patrun_wallt_avg'], # 'patrun_wallt_min': self.patrun_perf_d['patrun_wallt_min'], # # # 'patrun_mem_max': self.patrun_perf_d['patrun_mem_max'], # # 'patrun_mem_avg': self.patrun_perf_d['patrun_mem_avg'], # 'patrun_mem_min': self.patrun_perf_d['patrun_mem_min'], # # # 'patrun_memory_traffic_global': # self.patrun_perf_d['memory_traffic_global'], # 'patrun_memory_traffic_local': # self.patrun_perf_d['memory_traffic_local'], # '%patrun_memory_traffic_peak': # self.patrun_perf_d['memory_traffic_peak'], # # # 'patrun_memory_traffic': self.patrun_hwc_d['memory_traffic'], # 'patrun_ipc': self.patrun_hwc_d['ipc'], # '%patrun_stallcycles': self.patrun_hwc_d['stallcycles'], # # # 'ptl_high_mem': self.ptl_high_mem, # 'ptl_high_mem_c': self.ptl_high_mem_c, # % '%patrun_user': self.patrun_stats_d['%user_samples'], '%patrun_mpi': self.patrun_stats_d['%mpi_samples'], '%patrun_etc': self.patrun_stats_d['%etc_samples'], '%patrun_total': self.patrun_stats_d['%total_samples'], # # # '%patrun_user_slowest': self.patrun_stats_d['%user_slowest'], # '%patrun_mpi_slowest': self.patrun_stats_d['%mpi_slowest'], # '%patrun_etc_slowest': self.patrun_stats_d['%etc_slowest'], # # # '%patrun_user_fastest': self.patrun_stats_d['%user_fastest'], # '%patrun_mpi_fastest': self.patrun_stats_d['%mpi_fastest'], # '%patrun_etc_fastest': self.patrun_stats_d['%etc_fastest'], # # # '%patrun_avg_usr_reported': usr_pct, # '%patrun_avg_mpi_reported': mpi_pct, # '%patrun_avg_etc_reported': etc_pct, # '%patrun_hotspot1': self.patrun_hotspot1_pct, # # # '%patrun_mpi_h1': self.mpi_h1, # '%patrun_mpi_h1_imb': self.mpi_h1_imb, # # ko: # # '%patrun_mpi_h1': self.patrun_perf_d['mpi_h1'], # # '%patrun_mpi_h1_imb': self.patrun_perf_d['mpi_h1_imb'], # # # 'patrun_avg_energy': self.patrun_perf_d['energy_avg'], # 'patrun_avg_power': self.patrun_perf_d['power_avg'], } if self.perf_patterns: self.perf_patterns = {**self.perf_patterns, **perf_pattern} else: self.perf_patterns = perf_pattern # }}} # }}} # {{{ performance reference # --- 1 # @rfm.run_before('performance') # def set_basic_reference(self): # self.reference = sn.evaluate(sphs.basic_reference_scoped_d(self)) # {{{ --- 2 @rfm.run_before('performance') def set_tool_reference(self): ref = ScopedDict() # first, copy the existing self.reference (if any): if self.reference: for kk in self.reference: ref[kk] = self.reference['*:%s' % kk] # then add more: myzero = (0, None, None, '') myzero_s = (0, None, None, 's') myzero_j = (0, None, None, 'J') myzero_w = (0, None, None, 'W') myzero_p = (0, None, None, '%') myzero_mb = (0, None, None, 'MiBytes') myzero_gb = (0, None, None, 'GB') myzero_sam = (0, None, None, 'samples') # ----------------------------------------------------------- # h1_name = '%% (%s)' % self.patrun_hotspot1_name # myzero_h1 = (0, None, None, h1_name) # # mpi_h1_name = '%% (%s)' % self.patrun_perf_d['mpi_h1_name'] # mpi_h1_name = '%% (%s)' % self.mpi_h1_name # myzero_mpi_h1 = (0, None, None, mpi_h1_name) # # ----------------------------------------------------------- # user_slowest_pe = '%% (pe.%s)' % # self.patrun_stats_d['user_slowest_pe'] # myzero_slowest = (0, None, None, user_slowest_pe) # # ----------------------------------------------------------- # user_fastest_pe = # '%% (pe.%s)' % self.patrun_stats_d['user_fastest_pe'] # myzero_fastest = (0, None, None, user_fastest_pe) # # ----------------------------------------------------------- # %patrun_user: 76.4 % (slowest:1015.0 [pe71] / mean:950.2 / # median:985.0 / fastest:20.0 [pe94]) user_stats = ('%% (slow: %s samp [pe%s] / mean:%s median:%s / ' 'fast:%s [pe%s])') \ % (self.patrun_stats_d['user_samples_max'], self.patrun_stats_d['user_slowest_pe'], self.patrun_stats_d['user_samples_mean'], self.patrun_stats_d['user_samples_median'], self.patrun_stats_d['user_samples_min'], self.patrun_stats_d['user_fastest_pe']) myzero_user = (0, None, None, user_stats) # ----------------------------------------------------------- # %patrun_mpi: 18.2 % (slowest:1178.0 [pe95] / mean:226.8 / # median:191.5 / fastest:150.0 [pe20]) mpi_stats = ('%% (slow: %s samp [pe%s] / mean:%s median:%s / ' 'fast:%s [pe%s])') \ % (self.patrun_stats_d['mpi_samples_max'], # 'xx', self.patrun_stats_d['mpi_slowest_pe'], self.patrun_stats_d['mpi_samples_mean'], self.patrun_stats_d['mpi_samples_median'], self.patrun_stats_d['mpi_samples_min'], # 'xx') self.patrun_stats_d['mpi_fastest_pe']) myzero_mpi = (0, None, None, mpi_stats) # ----------------------------------------------------------- # %patrun_etc: 5.4 % (slowest:83.0 [pe21] / mean:67.3 / # median:67.5 / fastest:41.0 [pe93]) etc_stats = ('%% (slow: %s samp [pe%s] / mean:%s median:%s / ' 'fast:%s [pe%s])') \ % (self.patrun_stats_d['etc_samples_max'], # 'xx', self.patrun_stats_d['etc_slowest_pe'], self.patrun_stats_d['etc_samples_mean'], self.patrun_stats_d['etc_samples_median'], self.patrun_stats_d['etc_samples_min'], # 'xx') self.patrun_stats_d['etc_fastest_pe']) myzero_etc = (0, None, None, etc_stats) # ----------------------------------------------------------- # %patrun_total: 100% (slowest:1250.0 [pe33] / mean:1244.3 / # median:1245.0 / fastest:1234.0 [pe20]) total_stats = ('%% (slow: %s samp [pe%s] / mean:%s median:%s / ' 'fast:%s [pe%s])') \ % (self.patrun_stats_d['total_samples_max'], # 'xx', self.patrun_stats_d['total_slowest_pe'], self.patrun_stats_d['total_samples_mean'], self.patrun_stats_d['total_samples_median'], self.patrun_stats_d['total_samples_min'], # 'xx') self.patrun_stats_d['total_fastest_pe']) myzero_total = (0, None, None, total_stats) # ----------------------------------------------------------- ref['patrun_cn'] = myzero # {{{ # ref['patrun_wallt_max'] = myzero_s # ref['patrun_wallt_avg'] = myzero_s # ref['patrun_wallt_min'] = myzero_s # # # ref['patrun_mem_max'] = myzero_mb # # ref['patrun_mem_avg'] = myzero_mb # ref['patrun_mem_min'] = myzero_mb # # # ref['patrun_memory_traffic_global'] = myzero_gb # ref['patrun_memory_traffic_local'] = myzero_gb # ref['%patrun_memory_traffic_peak'] = myzero_p # # # ref['patrun_memory_traffic'] = myzero_gb # ref['patrun_ipc'] = myzero # ref['%patrun_stallcycles'] = myzero_p # # # ref['ptl_high_mem'] = myzero_mb # ref['ptl_high_mem_c'] = myzero_mb # # ref['%patrun_user'] = myzero_user ref['%patrun_mpi'] = myzero_mpi ref['%patrun_etc'] = myzero_etc ref['%patrun_total'] = myzero_total # # # ref['%patrun_user_slowest'] = myzero_slowest # ref['%patrun_mpi_slowest'] = myzero_slowest # ref['%patrun_etc_slowest'] = myzero_slowest # # # ref['%patrun_user_fastest'] = myzero_fastest # ref['%patrun_mpi_fastest'] = myzero_fastest # ref['%patrun_etc_fastest'] = myzero_fastest # # # ref['%patrun_avg_usr_reported'] = myzero_p # ref['%patrun_avg_mpi_reported'] = myzero_p # ref['%patrun_avg_etc_reported'] = myzero_p # ref['%patrun_hotspot1'] = myzero_h1 # # # ref['%patrun_mpi_h1'] = myzero_mpi_h1 # ref['%patrun_mpi_h1_imb'] = myzero_mpi_h1 # # # ref['patrun_avg_power'] = myzero_w # ref['patrun_avg_energy'] = myzero_j # }}} # final reference: self.reference = ref
# }}} # }}} # {{{ TODO: perftools-lite # }}}