Source code for reframechecks.common.sphexa.sanity_perftools
# Copyright 2019-2020 Swiss National Supercomputing Centre (CSCS/ETH Zurich)
# HPCTools Project Developers. See the top-level LICENSE file for details.
#
# SPDX-License-Identifier: BSD-3-Clause
import os
import reframe as rfm
import reframe.utility.sanity as sn
import numpy as np
from reframe.core.fields import ScopedDict
import sphexa.sanity as sphs
[docs]class PerftoolsBaseTest(rfm.RegressionTest):
def __init__(self):
x = 0
# {{{ sanity patterns
# {{{ patrun_version
@rfm.run_before('sanity')
def patrun_version(self):
'''Checks tool's version:
.. code-block::
> pat_run -V
CrayPat/X: Version 20.08.0 Revision 28ef35c9f
'''
reference_tool_version = {
'daint': '20.08.0',
'dom': '20.08.0',
'eiger': '20.11.0',
'pilatus': '20.11.0',
}
ref_version = reference_tool_version[self.current_system.name]
regex = r'^CrayPat/X:\s+Version (?P<toolversion>\S+) Revision'
res_version = sn.extractsingle(regex, self.version_rpt, 'toolversion')
# self.sanity_patterns_l.append(sn.assert_eq(res_version, ref_version,
# msg='sanityV failed "{0}"'))
# }}}
# }}}
# {{{ regex functions
# {{{ patrun: number of compute nodes
@rfm.run_before('performance')
def patrun_num_of_compute_nodes(self):
'''Extract the number of compute nodes to compute averages
.. code-block::
> ls 96mpi/sqpatch.exe+8709-4s/xf-files/:
000004.xf
000005.xf
000006.xf
000007.xf
Typical output:
* patrun_cn: 4
'''
regex = r'^(?P<cn>\d+.xf)$'
self.num_cn = sn.count(sn.extractall(regex, self.stdout, 'cn'))
# }}}
# {{{ perftools-lite: Memory
@rfm.run_before('performance')
def perftools_lite_memory(self):
'''
# 20.10.0 / AMD
High Memory: 85,743.7 MiBytes 669.9 MiBytes per PE
# More --> pat_report -O himem exe+141047-1002s/index.ap2 > rpt.mem
'''
res = {}
regex = (r'^High Memory:\s+(?P<mem_cn>\S+) MiBytes\s+(?P<mem_c>\S+) '
r'MiBytes per PE')
self.ptl_high_mem = sn.extractsingle(
regex, self.stdout, 'mem_cn',
conv=lambda x: int(x.replace(',', '').split('.')[0]))
self.ptl_high_mem_c = sn.extractsingle(
regex, self.stdout, 'mem_c',
conv=lambda x: int(x.replace(',', '').split('.')[0]))
# }}}
# {{{ patrun: table Wall Clock Time, Memory
@rfm.run_before('performance')
def patrun_walltime_and_memory(self):
'''This table shows total wall clock time for the ranks with the
maximum, mean, and minimum time, as well as the average across ranks.
.. code-block::
Table 10: Wall Clock Time, Memory High Water Mark
Process | Process | PE=[mmm]
Time | HiMem |
| (MiBytes) |
11.389914 | 76.3 | Total <-- avgt
|--------------------------------
| 11.398188 | 57.7 | pe.24 <-- maxt
| 11.389955 | 98.9 | pe.34
| 11.365630 | 54.0 | pe.93 <-- mint
|================================
Typical output:
* patrun_wallt_max: 11.3982 s
* patrun_wallt_avg: 11.3899 s
* patrun_wallt_min: 11.3656 s
* patrun_mem_max: 57.7 MiBytes
* patrun_mem_min: 54.0 MiBytes
'''
# TODO: bug avg mem?
res = {}
# --- avg
regex = (r'^Table \d+: Wall Clock Time, Memory High Water Mark\n'
r'(.*\n){4}\s+(.*\n)\s+(?P<proct>\S+)\s+\|\s+(?P<mem>\S+)'
r' \| Total$')
res['patrun_wallt_avg'] = sn.extractsingle(regex, self.stdout, 'proct',
float)
res['patrun_mem_avg'] = sn.extractsingle(regex, self.stdout, 'mem',
float)
# --- max
regex = (r'^Table \d+: Wall Clock Time, Memory High Water Mark\n'
r'(.*\n){4}\s+(.*\n){2}\|\s+(?P<proct>\S+) \|\s+(?P<mem>\S+)'
r'\s+\|\s(?P<pe>\S+)$')
res['patrun_wallt_max'] = sn.extractsingle(regex, self.stdout, 'proct',
float)
res['patrun_mem_max'] = sn.extractsingle(regex, self.stdout, 'mem',
float)
res['patrun_mem_max_pe'] = sn.extractsingle(regex, self.stdout, 'pe',
float)
# --- min
regex = (r'^Table \d+: Wall Clock Time, Memory High Water Mark\n'
r'(.*\n){4}\s+(.*\n){4}\|\s+(?P<proct>\S+) \|\s+(?P<mem>\S+)'
r'\s+\|\s(?P<pe>\S+)$')
res['patrun_wallt_min'] = sn.extractsingle(regex, self.stdout, 'proct',
float)
res['patrun_mem_min'] = sn.extractsingle(regex, self.stdout, 'mem',
float)
res['patrun_mem_min_pe'] = sn.extractsingle(regex, self.stdout, 'pe',
float)
for kk, vv in res.items():
if not isinstance(vv, str):
res[kk] = sn.round(vv, 4)
self.patrun_perf_d = res
# }}}
# {{{ patrun: table Memory Bandwidth by Numanode
@rfm.run_before('performance')
def patrun_memory_bw(self):
'''This table shows memory traffic to local and remote memory for numa
nodes, taking for each numa node the maximum value across nodes.
.. code-block::
Table 9: Memory Bandwidth by Numanode
Memory | Local | Thread | Memory | Memory | Numanode
Traffic | Memory | Time | Traffic | Traffic | Node Id
GBytes | Traffic | | GBytes | / | PE=HIDE
| GBytes | | / Sec | Nominal |
| | | | Peak |
|--------------------------------------------------------------
| 33.64 | 33.64 | 11.360701 | 2.96 | 4.3% | numanode.0
||-------------------------------------------------------------
|| 33.64 | 33.64 | 11.359413 | 2.96 | 4.3% | nid.4
|| 33.59 | 33.59 | 11.359451 | 2.96 | 4.3% | nid.6
|| 33.24 | 33.24 | 11.360701 | 2.93 | 4.3% | nid.5
|| 28.24 | 28.24 | 11.355006 | 2.49 | 3.6% | nid.7
|==============================================================
2 sockets:
Table 10: Memory Bandwidth by Numanode
Memory | Local | Remote | Thread | Memory | Memory | Numanode
Traffic | Memory | Memory | Time | Traffic | Traffic | Node Id
GBytes | Traffic | Traffic | | GBytes | / | PE=HIDE
| GBytes | GBytes | | / Sec | Nominal |
| | | | | Peak |
|-------------------------------------------------------------------
| 11.21 | 10.99 | 0.22 | 3.886926 | 2.88 | 3.8% | numanode.0
||------------------------------------------------------------------
|| 11.21 | 10.99 | 0.22 | 3.886926 | 2.88 |3.8% | nid.407
|| 10.47 | 10.27 | 0.20 | 3.886450 | 2.69 |3.5% | nid.416
||==================================================================
| 11.29 | 11.06 | 0.23 | 3.889932 | 2.90 | 3.8% | numanode.1
||------------------------------------------------------------------
|| 11.29 | 11.06 | 0.23 | 3.889932 | 2.90 |3.8% | nid.407
|| 10.09 | 9.88 | 0.20 | 3.885858 | 2.60 |3.4% | nid.416
|===================================================================
Typical output:
* patrun_memory_traffic_global: 33.64 GB
* patrun_memory_traffic_local: 33.64 GB
* %patrun_memory_traffic_peak: 4.3 %
'''
res = {}
regex = (r'^Table \d+:\s+Memory Bandwidth by Numanode\n(.*\n){7}\|\s+'
r'(?P<GBytes>\S+)\s+\|\s+(?P<GBytes_localm>\S+)'
r'(\s+\|\s+\S+){2,3}\s+\|\s+(?P<peak_pct>\S+)%')
res['memory_traffic_global'] = sn.extractsingle(regex, self.stdout,
'GBytes', float)
res['memory_traffic_local'] = sn.extractsingle(regex, self.stdout,
'GBytes_localm', float)
res['memory_traffic_peak'] = sn.extractsingle(regex, self.stdout,
'peak_pct', float)
#
if self.patrun_perf_d:
self.patrun_perf_d = {**self.patrun_perf_d, **res}
else:
self.patrun_perf_d = res
# }}}
# {{{ patrun: table HW Performance Counter
@rfm.run_before('performance')
def patrun_hwpc(self):
'''This table shows HW performance counter data for the whole program,
averaged across ranks or threads, as applicable.
.. code-block::
Table 4: Program HW Performance Counter Data
...
Thread Time 11.352817 secs
UNHALTED_REFERENCE_CYCLES 28,659,167,096
CPU_CLK_THREAD_UNHALTED:THREAD_P 34,170,540,119
DTLB_LOAD_MISSES:WALK_DURATION 61,307,848
INST_RETIRED:ANY_P 22,152,242,298
RESOURCE_STALLS:ANY 19,793,119,676
OFFCORE_RESPONSE_0:ANY_REQUEST:L3_MISS_LOCAL 20,949,344
CPU CLK Boost 1.19 X
Resource stall cycles / Cycles --> 57.9%
Memory traffic GBytes --> 0.118G/sec 1.34 GB
Local Memory traffic GBytes 0.118G/sec 1.34 GB
Memory Traffic / Nominal Peak 0.2%
DTLB Miss Ovhd 61,307,848 cycles 0.2% cycles
Retired Inst per Clock --> 0.65
==============================================================================
Typical output:
* patrun_memory_traffic: 1.34 GB
* patrun_ipc: 0.65
* %patrun_stallcycles: 57.9 %
'''
res = {}
regex = (r'^Table \d+:\s+Program HW Performance Counter Data\n'
r'(.*\n){15}.*Resource stall cycles \/ Cycles\s+(?P<pp>\S+)%')
res['stallcycles'] = sn.extractsingle(regex, self.stdout, 'pp', float)
#
regex = (r'^Table \d+:\s+Program HW Performance Counter Data\n'
r'(.*\n){16}.*Memory traffic GBytes.*\s+(?P<GB>\S+) GB')
res['memory_traffic'] = sn.extractsingle(regex, self.stdout, 'GB',
float)
#
regex = (r'^Table \d+:\s+Program HW Performance Counter Data\n'
r'(.*\n){20}.*Retired Inst per Clock\s+(?P<ipc>\S+)')
res['ipc'] = sn.extractsingle(regex, self.stdout, 'ipc', float)
#
self.patrun_hwc_d = res
# if self.patrun_perf_d:
# self.patrun_perf_d = {**self.patrun_perf_d, **res}
# else:
# self.patrun_perf_d = res
# }}}
# {{{ patrun: table energy and power usage
@rfm.run_before('performance')
def patrun_energy_power(self):
'''This table shows HW performance counter data for the whole program,
averaged across ranks or threads, as applicable.
.. code-block::
Table 8: Program energy and power usage (from Cray PM)
Node | Node | Process | Node Id
Energy | Power | Time | PE=HIDE
(J) | (W) | |
7,891 | 692.806 | 11.389914 | Total <---
|-- --------------------------------------
| 2,076 | 182.356 | 11.384319 | nid.7
| 1,977 | 173.548 | 11.391657 | nid.4
| 1,934 | 169.765 | 11.392220 | nid.6
| 1,904 | 167.143 | 11.391461 | nid.5
|========================================
Typical output:
* patrun_avg_power: 692.806 W
'''
res = {}
# eiger:
regex = (r'^Table \d+:\s+Program energy and power usage \(from Cray '
r'PM\).*\n(.*\n){5}\s+(?P<nrgy>\S+)\s+\|\s+(?P<power>\S+).*'
r'(Total|Avg of PE values)$')
res['energy_avg'] = \
sn.extractsingle(regex, self.stdout, 'nrgy',
conv=lambda x: int(float(x.replace(',', '')))) \
/ self.num_cn
res['power_avg'] = \
sn.extractsingle(regex, self.stdout, 'power',
conv=lambda x: int(float(x.replace(',', '')))) \
/ self.num_cn
if self.patrun_perf_d:
self.patrun_perf_d = {**self.patrun_perf_d, **res}
else:
self.patrun_perf_d = res
# }}}
# {{{ patrun: table Profile by Function
@rfm.run_before('performance')
def patrun_samples(self):
'''Elapsed time (in samples) reported by the tool:
.. code-block::
Table 1: Profile by Function
Samp% | Samp | Imb. | Imb. | Group
| | Samp | Samp% | Function
| | | | PE=HIDE
100.0% | 382.8 | -- | -- | Total
TODO:
Experiment: samp_cs_time
Sampling interval: 10000 microsecs
'''
regex = (r'^Table 1: Profile by Function\n(.*\n){4}\s+100.0%\s+\|\s+'
r'(?P<sam>\S+)\s+')
self.patrun_sample = sn.extractsingle(regex, self.stdout, 'sam', float)
# }}}
# {{{ patrun: hotspot1
@rfm.run_after('sanity')
def patrun_hotspot1(self):
regex = (r'^Table \d+: Profile by Group, Function, and Line.*\n'
r'(.*\n){7}\s+.*Total\n(.*\n){3}(\|)+\s+(?P<pct>\S+)%.*\|\s+'
r'(?P<fname>(sphexa.*|MPI_.*))$')
# --- ok:
rpt = os.path.join(self.stagedir, self.rpt)
self.patrun_hotspot1_pct = sn.extractsingle(regex, rpt, 'pct', float)
self.patrun_hotspot1_name = sn.extractsingle(regex, rpt, 'fname')
# {{{ # --- ko:
# self.patrun_hotspot1_pct = \
# sn.extractsingle(regex, self.stdout, 'pct', float)
# self.patrun_hotspot1_name = \
# sn.extractsingle(regex, self.stdout, 'fname')
# --- ko:
# self.patrun_hotspot1_pct = \
# sn.extractsingle(regex, self.rpt, 'pct', float)
# self.patrun_hotspot1_name = \
# sn.extractsingle(regex, self.rpt, 'fname')
# }}}
# }}}
# {{{ patrun: hotspot1 MPI
@rfm.run_after('sanity')
def patrun_hotspot1_mpi(self):
'''
.. code-block::
Table 1: Profile by Function
Samp% | Samp | Imb. | Imb. | Group
| | Samp | Samp% | Function
| | | | PE=HIDE
100.0% | 1,126.4 | -- | -- | Total
...
||=================================================
| 9.9% | 111.4 | -- | -- | MPI
||-------------------------------------------------
|| 5.2% | 58.2 | 993.8 | 95.5% | MPI_Allreduce <--
|| 3.6% | 40.9 | 399.1 | 91.7% | MPI_Recv
'''
rpt = os.path.join(self.stagedir, self.rpt)
regex = (r'^Table 1: Profile by Function(.*\n){10}.*^\|.* '
r'(?P<samp_pct>\S+)%.* (?P<imb_pct>\S+)%.*'
r'(?P<fname>sphexa\S+|MPI_\S+)')
res = {}
res['mpi_h1'] = sn.extractsingle(regex, rpt, 'samp_pct', float)
res['mpi_h1_imb'] = sn.extractsingle(regex, rpt, 'imb_pct', float)
res['mpi_h1_name'] = sn.extractsingle(regex, rpt, 'fname')
#
self.mpi_h1 = res['mpi_h1']
self.mpi_h1_imb = res['mpi_h1_imb']
self.mpi_h1_name = res['mpi_h1_name']
# if self.patrun_perf_d:
# self.patrun_perf_d = {**self.patrun_perf_d, **res}
# else:
# self.patrun_perf_d = res
# }}}
# TODO: rpt from sqpatch.exe+5046-0s/rpt-files/RUNTIME.rpt
# {{{ patrun: imbalance
@rfm.run_after('sanity')
def patrun_imbalance(self):
# {{{
'''Load imbalance from csv report
.. code-block::
Table 1: load Balance with MPI Message Stats
'''
# }}}
rpt = os.path.join(self.stagedir, self.csv_rpt)
if self.num_tasks == 1:
regex_use = r'^(?P<pe>1),\S+,\s?(?P<samples>\S+),USER$'
regex_mpi = r'^(?P<pe>1),\S+,\s?(?P<samples>\S+),MPI$'
regex_etc = r'^(?P<pe>1),\S+,\s?(?P<samples>\S+),ETC$'
else:
regex_use = r'^2,\S+,\s?(?P<samples>\S+),USER/pe.(?P<pe>\d+)$'
regex_mpi = r'^2,\S+,\s?(?P<samples>\S+),MPI/pe.(?P<pe>\d+)$'
regex_etc = r'^2,\S+,\s?(?P<samples>\S+),ETC/pe.(?P<pe>\d+)$'
res_user_sm_l = sn.extractall(regex_use, rpt, 'samples', float)
res_user_pe_l = sn.extractall(regex_use, rpt, 'pe', int)
# MPI:
res_mpi_sm_l = sn.extractall(regex_mpi, rpt, 'samples', float)
res_mpi_pe_l = sn.extractall(regex_mpi, rpt, 'pe', int)
if not sn.evaluate(res_mpi_sm_l):
res_mpi_sm_l = [0 for i in sn.evaluate(res_user_sm_l)]
res_mpi_pe_l = [i for i in sn.evaluate(res_user_pe_l)]
# ETC:
res_etc_sm_l = sn.extractall(regex_etc, rpt, 'samples', float)
res_etc_pe_l = sn.extractall(regex_etc, rpt, 'pe', int)
# DICT from LISTs: dict(zip(pe,usr))
# TOTAL = USER+MPI+ETC
res_total_sm_l = []
# WARNING: this fails if data is not sorted by pe, use pat_report with:
# -s sort_by_pe='yes' !!!
res_total_sm_l = [sum(sam) for sam in zip(res_user_sm_l, res_mpi_sm_l,
res_etc_sm_l)]
# USER pes
# {{{ slowest pe (USER)
# slowest = max(max(res_user_sm_l),
# max(res_mpi_sm_l),
# max(res_etc_sm_l))
slowest = max(res_user_sm_l)
user_slowest_pe = -1
index = -1
if slowest in res_user_sm_l:
for sam in res_user_sm_l:
index += 1
if sam == slowest:
user_slowest_pe = index
if user_slowest_pe == -1:
user_slowest_pe = 0
# }}}
# {{{ fastest pe (USER)
fastest = min(res_user_sm_l)
user_fastest_pe = -1
index = -1
for sam in res_user_sm_l:
index += 1
if sam == fastest:
user_fastest_pe = index
if user_fastest_pe == -1:
user_fastest_pe = 0
# }}}
# MPI pes
# {{{ slowest pe (MPI)
slowest = max(res_mpi_sm_l)
# try:
# slowest = max(res_mpi_sm_l)
# except ValueError:
# slowest = 0
mpi_slowest_pe = -1
index = -1
if slowest in res_mpi_sm_l:
for sam in res_mpi_sm_l:
index += 1
if sam == slowest:
mpi_slowest_pe = index
if mpi_slowest_pe == -1:
mpi_slowest_pe = 0
# }}}
# {{{ fastest pe (MPI)
fastest = min(res_mpi_sm_l)
# try:
# fastest = min(res_mpi_sm_l)
# except ValueError:
# fastest = 0
mpi_fastest_pe = -1
index = -1
for sam in res_mpi_sm_l:
index += 1
if sam == fastest:
mpi_fastest_pe = index
if mpi_fastest_pe == -1:
mpi_fastest_pe = 0
# }}}
# ETC pes
# {{{ slowest pe (ETC)
slowest = max(res_etc_sm_l)
etc_slowest_pe = -1
index = -1
if slowest in res_etc_sm_l:
for sam in res_etc_sm_l:
index += 1
if sam == slowest:
etc_slowest_pe = index
if etc_slowest_pe == -1:
etc_slowest_pe = 0
# }}}
# {{{ fastest pe (ETC)
fastest = min(res_etc_sm_l)
etc_fastest_pe = -1
index = -1
for sam in res_etc_sm_l:
index += 1
if sam == fastest:
etc_fastest_pe = index
if etc_fastest_pe == -1:
etc_fastest_pe = 0
# }}}
# TOTAL pes
# {{{ slowest pe (TOTAL)
slowest = max(res_total_sm_l)
# try:
# slowest = max(res_total_sm_l)
# except ValueError:
# slowest = 0
total_slowest_pe = -1
index = -1
if slowest in res_total_sm_l:
for sam in res_total_sm_l:
index += 1
if sam == slowest:
total_slowest_pe = index
if total_slowest_pe == -1:
total_slowest_pe = 0
# }}}
# {{{ fastest pe (TOTAL)
fastest = min(res_total_sm_l)
# try:
# fastest = min(res_total_sm_l)
# except ValueError:
# fastest = 0
total_fastest_pe = -1
index = -1
for sam in res_total_sm_l:
index += 1
if sam == fastest:
total_fastest_pe = index
if total_fastest_pe == -1:
total_fastest_pe = 0
# }}}
# {{{ res dict
res = {}
# min/(mean=average)/median/max
res['user_samples_min'] = sn.round(sn.min(res_user_sm_l), 0)
res['mpi_samples_min'] = sn.round(sn.min(res_mpi_sm_l), 0)
res['etc_samples_min'] = sn.round(sn.min(res_etc_sm_l), 0)
res['total_samples_min'] = sn.round(sn.min(res_total_sm_l), 0)
#
res['user_samples_mean'] = sn.round(sn.avg(res_user_sm_l), 1)
res['mpi_samples_mean'] = sn.round(sn.avg(res_mpi_sm_l), 1)
res['etc_samples_mean'] = sn.round(sn.avg(res_etc_sm_l), 1)
res['total_samples_mean'] = sn.round(sn.avg(res_total_sm_l), 1)
#
res['user_samples_median'] = \
sn.sanity_function(np.median)(res_user_sm_l)
res['mpi_samples_median'] = sn.sanity_function(np.median)(res_mpi_sm_l)
res['etc_samples_median'] = sn.sanity_function(np.median)(res_etc_sm_l)
res['total_samples_median'] = \
sn.sanity_function(np.median)(res_total_sm_l)
#
res['user_samples_max'] = sn.round(sn.max(res_user_sm_l), 0)
res['mpi_samples_max'] = sn.round(sn.max(res_mpi_sm_l), 0)
res['etc_samples_max'] = sn.round(sn.max(res_etc_sm_l), 0)
res['total_samples_max'] = sn.round(sn.max(res_total_sm_l), 0)
#
res['%user_samples'] = sn.round(100 * res['user_samples_mean']
/ res['total_samples_mean'], 1)
res['%mpi_samples'] = sn.round(100 * res['mpi_samples_mean']
/ res['total_samples_mean'], 1)
res['%etc_samples'] = sn.round(100 * res['etc_samples_mean']
/ res['total_samples_mean'], 1)
# slowest pes
res['user_slowest_pe'] = user_slowest_pe
res['mpi_slowest_pe'] = mpi_slowest_pe
res['etc_slowest_pe'] = etc_slowest_pe
res['total_slowest_pe'] = total_slowest_pe
# --- debug with:
# print("> res_user_sm_l", sn.evaluate(res_user_sm_l))
# print("> res_user_pe_l", sn.evaluate(res_user_pe_l))
# print("> res_mpi_sm_l", sn.evaluate(res_mpi_sm_l))
# print("> res_mpi_pe_l", sn.evaluate(res_mpi_pe_l))
# print("> res_etc_sm_l", sn.evaluate(res_etc_sm_l))
# print("> res_etc_pe_l", sn.evaluate(res_etc_pe_l))
try:
res['%user_slowest'] = \
sn.round(100 * res_user_sm_l[user_slowest_pe] /
res_total_sm_l[user_slowest_pe], 1)
except ValueError:
res['%user_slowest'] = 0
try:
res['%mpi_slowest'] = \
sn.round(100 * res_mpi_sm_l[user_slowest_pe] /
res_total_sm_l[user_slowest_pe], 1)
except ValueError:
res['%mpi_slowest'] = 0
try:
res['%etc_slowest'] = \
sn.round(100 * res_etc_sm_l[user_slowest_pe] /
res_total_sm_l[user_slowest_pe], 1)
except ValueError:
res['%etc_slowest'] = 0
# fastest pes
res['user_fastest_pe'] = user_fastest_pe
res['mpi_fastest_pe'] = mpi_fastest_pe
res['etc_fastest_pe'] = etc_fastest_pe
res['total_fastest_pe'] = total_fastest_pe
try:
res['%user_fastest'] = \
sn.round(100 * res_user_sm_l[user_fastest_pe] /
res_total_sm_l[user_fastest_pe], 1)
except ValueError:
res['%user_fastest'] = 0
try:
res['%mpi_fastest'] = \
sn.round(100 * res_mpi_sm_l[user_fastest_pe] /
res_total_sm_l[user_fastest_pe], 1)
except ValueError:
res['%mpi_fastest'] = 0
try:
res['%etc_fastest'] = \
sn.round(100 * res_etc_sm_l[user_fastest_pe] /
res_total_sm_l[user_fastest_pe], 1)
except ValueError:
res['%etc_fastest'] = 0
# }}}
self.patrun_stats_d = res
# }}}
# {{{ rpt_path_stdout
# @rfm.run_before('sanity')
# def rpt_path_stdout(self):
# '''Get path to the report dir from stdout:
#
# .. code-block::
#
# Experiment data directory written:
# .../sqpatch.exe+19625-2s
# '''
# regex = r'^Experiment data directory written:\n(?P<rpt_path>.*)$'
# self.rpt_path = sn.extractsingle(regex, self.stdout, 'rpt_path')
# }}}
# }}}
# {{{ performance patterns
# --- 1
# @rfm.run_before('performance')
# def set_basic_perf_patterns(self):
# '''A set of basic perf_patterns shared between the tests
# '''
# self.perf_patterns = sn.evaluate(sphs.basic_perf_patterns(self))
# {{{ --- 2
@rfm.run_before('performance')
def set_tool_perf_patterns(self):
'''More perf_patterns for the tool
Typical performance reporting:
.. literalinclude:: ../../reframechecks/perftools/patrun.res
:lines: 141-169
'''
regex = r'^\|\s+(?P<pct>\S+)%\s+\|\s+(?P<sam>\S+).*USER$'
usr_pct = sn.extractsingle(regex, self.stdout, 'pct', float)
regex = r'^\|\s+(?P<pct>\S+)%\s+\|\s+(?P<sam>\S+).*MPI$'
mpi_pct = sn.extractsingle(regex, self.stdout, 'pct', float)
etc_pct = sn.round(100 - usr_pct - mpi_pct, 1)
self.patrun_stats_d['%total_samples'] = sn.round(
self.patrun_stats_d['%user_samples'] +
self.patrun_stats_d['%mpi_samples'] +
self.patrun_stats_d['%etc_samples'], 1)
perf_pattern = {
'patrun_cn': self.num_cn,
# 'patrun_wallt_max': self.patrun_perf_d['patrun_wallt_max'],
# 'patrun_wallt_avg': self.patrun_perf_d['patrun_wallt_avg'],
# 'patrun_wallt_min': self.patrun_perf_d['patrun_wallt_min'],
# #
# 'patrun_mem_max': self.patrun_perf_d['patrun_mem_max'],
# # 'patrun_mem_avg': self.patrun_perf_d['patrun_mem_avg'],
# 'patrun_mem_min': self.patrun_perf_d['patrun_mem_min'],
# #
# 'patrun_memory_traffic_global':
# self.patrun_perf_d['memory_traffic_global'],
# 'patrun_memory_traffic_local':
# self.patrun_perf_d['memory_traffic_local'],
# '%patrun_memory_traffic_peak':
# self.patrun_perf_d['memory_traffic_peak'],
# #
# 'patrun_memory_traffic': self.patrun_hwc_d['memory_traffic'],
# 'patrun_ipc': self.patrun_hwc_d['ipc'],
# '%patrun_stallcycles': self.patrun_hwc_d['stallcycles'],
# #
# 'ptl_high_mem': self.ptl_high_mem,
# 'ptl_high_mem_c': self.ptl_high_mem_c,
# %
'%patrun_user': self.patrun_stats_d['%user_samples'],
'%patrun_mpi': self.patrun_stats_d['%mpi_samples'],
'%patrun_etc': self.patrun_stats_d['%etc_samples'],
'%patrun_total': self.patrun_stats_d['%total_samples'],
# #
# '%patrun_user_slowest': self.patrun_stats_d['%user_slowest'],
# '%patrun_mpi_slowest': self.patrun_stats_d['%mpi_slowest'],
# '%patrun_etc_slowest': self.patrun_stats_d['%etc_slowest'],
# #
# '%patrun_user_fastest': self.patrun_stats_d['%user_fastest'],
# '%patrun_mpi_fastest': self.patrun_stats_d['%mpi_fastest'],
# '%patrun_etc_fastest': self.patrun_stats_d['%etc_fastest'],
# #
# '%patrun_avg_usr_reported': usr_pct,
# '%patrun_avg_mpi_reported': mpi_pct,
# '%patrun_avg_etc_reported': etc_pct,
# '%patrun_hotspot1': self.patrun_hotspot1_pct,
# #
# '%patrun_mpi_h1': self.mpi_h1,
# '%patrun_mpi_h1_imb': self.mpi_h1_imb,
# # ko:
# # '%patrun_mpi_h1': self.patrun_perf_d['mpi_h1'],
# # '%patrun_mpi_h1_imb': self.patrun_perf_d['mpi_h1_imb'],
# #
# 'patrun_avg_energy': self.patrun_perf_d['energy_avg'],
# 'patrun_avg_power': self.patrun_perf_d['power_avg'],
}
if self.perf_patterns:
self.perf_patterns = {**self.perf_patterns, **perf_pattern}
else:
self.perf_patterns = perf_pattern
# }}}
# }}}
# {{{ performance reference
# --- 1
# @rfm.run_before('performance')
# def set_basic_reference(self):
# self.reference = sn.evaluate(sphs.basic_reference_scoped_d(self))
# {{{ --- 2
@rfm.run_before('performance')
def set_tool_reference(self):
ref = ScopedDict()
# first, copy the existing self.reference (if any):
if self.reference:
for kk in self.reference:
ref[kk] = self.reference['*:%s' % kk]
# then add more:
myzero = (0, None, None, '')
myzero_s = (0, None, None, 's')
myzero_j = (0, None, None, 'J')
myzero_w = (0, None, None, 'W')
myzero_p = (0, None, None, '%')
myzero_mb = (0, None, None, 'MiBytes')
myzero_gb = (0, None, None, 'GB')
myzero_sam = (0, None, None, 'samples')
# -----------------------------------------------------------
# h1_name = '%% (%s)' % self.patrun_hotspot1_name
# myzero_h1 = (0, None, None, h1_name)
# # mpi_h1_name = '%% (%s)' % self.patrun_perf_d['mpi_h1_name']
# mpi_h1_name = '%% (%s)' % self.mpi_h1_name
# myzero_mpi_h1 = (0, None, None, mpi_h1_name)
# # -----------------------------------------------------------
# user_slowest_pe = '%% (pe.%s)' %
# self.patrun_stats_d['user_slowest_pe']
# myzero_slowest = (0, None, None, user_slowest_pe)
# # -----------------------------------------------------------
# user_fastest_pe =
# '%% (pe.%s)' % self.patrun_stats_d['user_fastest_pe']
# myzero_fastest = (0, None, None, user_fastest_pe)
# # -----------------------------------------------------------
# %patrun_user: 76.4 % (slowest:1015.0 [pe71] / mean:950.2 /
# median:985.0 / fastest:20.0 [pe94])
user_stats = ('%% (slow: %s samp [pe%s] / mean:%s median:%s / '
'fast:%s [pe%s])') \
% (self.patrun_stats_d['user_samples_max'],
self.patrun_stats_d['user_slowest_pe'],
self.patrun_stats_d['user_samples_mean'],
self.patrun_stats_d['user_samples_median'],
self.patrun_stats_d['user_samples_min'],
self.patrun_stats_d['user_fastest_pe'])
myzero_user = (0, None, None, user_stats)
# -----------------------------------------------------------
# %patrun_mpi: 18.2 % (slowest:1178.0 [pe95] / mean:226.8 /
# median:191.5 / fastest:150.0 [pe20])
mpi_stats = ('%% (slow: %s samp [pe%s] / mean:%s median:%s / '
'fast:%s [pe%s])') \
% (self.patrun_stats_d['mpi_samples_max'],
# 'xx',
self.patrun_stats_d['mpi_slowest_pe'],
self.patrun_stats_d['mpi_samples_mean'],
self.patrun_stats_d['mpi_samples_median'],
self.patrun_stats_d['mpi_samples_min'],
# 'xx')
self.patrun_stats_d['mpi_fastest_pe'])
myzero_mpi = (0, None, None, mpi_stats)
# -----------------------------------------------------------
# %patrun_etc: 5.4 % (slowest:83.0 [pe21] / mean:67.3 /
# median:67.5 / fastest:41.0 [pe93])
etc_stats = ('%% (slow: %s samp [pe%s] / mean:%s median:%s / '
'fast:%s [pe%s])') \
% (self.patrun_stats_d['etc_samples_max'],
# 'xx',
self.patrun_stats_d['etc_slowest_pe'],
self.patrun_stats_d['etc_samples_mean'],
self.patrun_stats_d['etc_samples_median'],
self.patrun_stats_d['etc_samples_min'],
# 'xx')
self.patrun_stats_d['etc_fastest_pe'])
myzero_etc = (0, None, None, etc_stats)
# -----------------------------------------------------------
# %patrun_total: 100% (slowest:1250.0 [pe33] / mean:1244.3 /
# median:1245.0 / fastest:1234.0 [pe20])
total_stats = ('%% (slow: %s samp [pe%s] / mean:%s median:%s / '
'fast:%s [pe%s])') \
% (self.patrun_stats_d['total_samples_max'],
# 'xx',
self.patrun_stats_d['total_slowest_pe'],
self.patrun_stats_d['total_samples_mean'],
self.patrun_stats_d['total_samples_median'],
self.patrun_stats_d['total_samples_min'],
# 'xx')
self.patrun_stats_d['total_fastest_pe'])
myzero_total = (0, None, None, total_stats)
# -----------------------------------------------------------
ref['patrun_cn'] = myzero
# {{{
# ref['patrun_wallt_max'] = myzero_s
# ref['patrun_wallt_avg'] = myzero_s
# ref['patrun_wallt_min'] = myzero_s
# #
# ref['patrun_mem_max'] = myzero_mb
# # ref['patrun_mem_avg'] = myzero_mb
# ref['patrun_mem_min'] = myzero_mb
# #
# ref['patrun_memory_traffic_global'] = myzero_gb
# ref['patrun_memory_traffic_local'] = myzero_gb
# ref['%patrun_memory_traffic_peak'] = myzero_p
# #
# ref['patrun_memory_traffic'] = myzero_gb
# ref['patrun_ipc'] = myzero
# ref['%patrun_stallcycles'] = myzero_p
# #
# ref['ptl_high_mem'] = myzero_mb
# ref['ptl_high_mem_c'] = myzero_mb
# #
ref['%patrun_user'] = myzero_user
ref['%patrun_mpi'] = myzero_mpi
ref['%patrun_etc'] = myzero_etc
ref['%patrun_total'] = myzero_total
# #
# ref['%patrun_user_slowest'] = myzero_slowest
# ref['%patrun_mpi_slowest'] = myzero_slowest
# ref['%patrun_etc_slowest'] = myzero_slowest
# #
# ref['%patrun_user_fastest'] = myzero_fastest
# ref['%patrun_mpi_fastest'] = myzero_fastest
# ref['%patrun_etc_fastest'] = myzero_fastest
# #
# ref['%patrun_avg_usr_reported'] = myzero_p
# ref['%patrun_avg_mpi_reported'] = myzero_p
# ref['%patrun_avg_etc_reported'] = myzero_p
# ref['%patrun_hotspot1'] = myzero_h1
# #
# ref['%patrun_mpi_h1'] = myzero_mpi_h1
# ref['%patrun_mpi_h1_imb'] = myzero_mpi_h1
# #
# ref['patrun_avg_power'] = myzero_w
# ref['patrun_avg_energy'] = myzero_j
# }}}
# final reference:
self.reference = ref
# }}}
# }}}
# {{{ TODO: perftools-lite
# }}}