pm-graph v5.10

sleepgraph:
- add -wifitrace argument for tracing all the way to wifi reconnect
- include more data in ftrace to mark the end of kernel resume
- add async_synchronize_full to the list of funcs to chart
- add thermal zone info to the log data
- include a check for s0ix support (s2idle is the default mem_sleep)
- if s2idle does not support s0ix, remove the SYS%LPI turbostat var
- fix -dev crash when kprobe caller is just an address (not a symbol)
- fix the cpuexec data in -proc to display in resume

sleepgraph.8:
- add -wifitrace documentation

README:
- change links from 01.org to developer.intel.com

Signed-off-by: Todd Brandt <todd.e.brandt@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
This commit is contained in:
Todd Brandt 2022-10-20 02:23:10 -07:00 committed by Rafael J. Wysocki
parent 247f34f7b8
commit 9bfb09774e
3 changed files with 118 additions and 122 deletions

View file

@ -6,22 +6,22 @@
|_| |___/ |_|
pm-graph: suspend/resume/boot timing analysis tools
Version: 5.9
Version: 5.10
Author: Todd Brandt <todd.e.brandt@intel.com>
Home Page: https://01.org/pm-graph
Home Page: https://www.intel.com/content/www/us/en/developer/topic-technology/open/pm-graph/overview.html
Report bugs/issues at bugzilla.kernel.org Tools/pm-graph
- https://bugzilla.kernel.org/buglist.cgi?component=pm-graph&product=Tools
Full documentation available online & in man pages
- Getting Started:
https://01.org/pm-graph/documentation/getting-started
https://www.intel.com/content/www/us/en/developer/articles/technical/usage.html
- Config File Format:
https://01.org/pm-graph/documentation/3-config-file-format
- Feature Summary:
https://www.intel.com/content/www/us/en/developer/topic-technology/open/pm-graph/features.html
- upstream version in git:
https://github.com/intel/pm-graph/
git clone https://github.com/intel/pm-graph/
Table of Contents
- Overview

View file

@ -78,6 +78,9 @@ This helps maintain the consistency of test data for better comparison.
If a wifi connection is available, check that it reconnects after resume. Include
the reconnect time in the total resume time calculation and treat wifi timeouts
as resume failures.
.TP
\fB-wifitrace\fR
Trace through the wifi reconnect time and include it in the timeline.
.SS "advanced"
.TP

View file

@ -86,7 +86,7 @@ def ascii(text):
# store system values and test parameters
class SystemValues:
title = 'SleepGraph'
version = '5.9'
version = '5.10'
ansi = False
rs = 0
display = ''
@ -100,6 +100,7 @@ class SystemValues:
ftracelog = False
acpidebug = True
tstat = True
wifitrace = False
mindevlen = 0.0001
mincglen = 0.0
cgphase = ''
@ -124,6 +125,7 @@ class SystemValues:
epath = '/sys/kernel/debug/tracing/events/power/'
pmdpath = '/sys/power/pm_debug_messages'
s0ixpath = '/sys/module/intel_pmc_core/parameters/warn_on_s0ix_failures'
s0ixres = '/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us'
acpipath='/sys/module/acpi/parameters/debug_level'
traceevents = [
'suspend_resume',
@ -180,6 +182,7 @@ class SystemValues:
tmstart = 'SUSPEND START %Y%m%d-%H:%M:%S.%f'
tmend = 'RESUME COMPLETE %Y%m%d-%H:%M:%S.%f'
tracefuncs = {
'async_synchronize_full': {},
'sys_sync': {},
'ksys_sync': {},
'__pm_notifier_call_chain': {},
@ -304,6 +307,7 @@ class SystemValues:
[2, 'suspendstats', 'sh', '-c', 'grep -v invalid /sys/power/suspend_stats/*'],
[2, 'cpuidle', 'sh', '-c', 'grep -v invalid /sys/devices/system/cpu/cpu*/cpuidle/state*/s2idle/*'],
[2, 'battery', 'sh', '-c', 'grep -v invalid /sys/class/power_supply/*/*'],
[2, 'thermal', 'sh', '-c', 'grep . /sys/class/thermal/thermal_zone*/temp'],
]
cgblacklist = []
kprobes = dict()
@ -777,7 +781,7 @@ class SystemValues:
return
if not quiet:
sysvals.printSystemInfo(False)
pprint('INITIALIZING FTRACE...')
pprint('INITIALIZING FTRACE')
# turn trace off
self.fsetVal('0', 'tracing_on')
self.cleanupFtrace()
@ -841,7 +845,7 @@ class SystemValues:
for name in self.dev_tracefuncs:
self.defaultKprobe(name, self.dev_tracefuncs[name])
if not quiet:
pprint('INITIALIZING KPROBES...')
pprint('INITIALIZING KPROBES')
self.addKprobes(self.verbose)
if(self.usetraceevents):
# turn trace events on
@ -1133,6 +1137,15 @@ class SystemValues:
self.cfgdef[file] = fp.read().strip()
fp.write(value)
fp.close()
def s0ixSupport(self):
if not os.path.exists(self.s0ixres) or not os.path.exists(self.mempowerfile):
return False
fp = open(sysvals.mempowerfile, 'r')
data = fp.read().strip()
fp.close()
if '[s2idle]' in data:
return True
return False
def haveTurbostat(self):
if not self.tstat:
return False
@ -1146,7 +1159,7 @@ class SystemValues:
self.vprint(out)
return True
return False
def turbostat(self):
def turbostat(self, s0ixready):
cmd = self.getExec('turbostat')
rawout = keyline = valline = ''
fullcmd = '%s -q -S echo freeze > %s' % (cmd, self.powerfile)
@ -1173,6 +1186,8 @@ class SystemValues:
for key in keyline:
idx = keyline.index(key)
val = valline[idx]
if key == 'SYS%LPI' and not s0ixready and re.match('^[0\.]*$', val):
continue
out.append('%s=%s' % (key, val))
return '|'.join(out)
def netfixon(self, net='both'):
@ -1183,14 +1198,6 @@ class SystemValues:
out = ascii(fp.read()).strip()
fp.close()
return out
def wifiRepair(self):
out = self.netfixon('wifi')
if not out or 'error' in out.lower():
return ''
m = re.match('WIFI \S* ONLINE (?P<action>\S*)', out)
if not m:
return 'dead'
return m.group('action')
def wifiDetails(self, dev):
try:
info = open('/sys/class/net/%s/device/uevent' % dev, 'r').read().strip()
@ -1220,11 +1227,6 @@ class SystemValues:
return '%s reconnected %.2f' % \
(self.wifiDetails(dev), max(0, time.time() - start))
time.sleep(0.01)
if self.netfix:
res = self.wifiRepair()
if res:
timeout = max(0, time.time() - start)
return '%s %s %d' % (self.wifiDetails(dev), res, timeout)
return '%s timeout %d' % (self.wifiDetails(dev), timeout)
def errorSummary(self, errinfo, msg):
found = False
@ -1346,6 +1348,20 @@ class SystemValues:
for i in self.rslist:
self.setVal(self.rstgt, i)
pprint('runtime suspend settings restored on %d devices' % len(self.rslist))
def start(self, pm):
if self.useftrace:
self.dlog('start ftrace tracing')
self.fsetVal('1', 'tracing_on')
if self.useprocmon:
self.dlog('start the process monitor')
pm.start()
def stop(self, pm):
if self.useftrace:
if self.useprocmon:
self.dlog('stop the process monitor')
pm.stop()
self.dlog('stop ftrace tracing')
self.fsetVal('0', 'tracing_on')
sysvals = SystemValues()
switchvalues = ['enable', 'disable', 'on', 'off', 'true', 'false', '1', '0']
@ -1643,19 +1659,20 @@ class Data:
ubiquitous = False
if kprobename in dtf and 'ub' in dtf[kprobename]:
ubiquitous = True
title = cdata+' '+rdata
mstr = '\(.*\) *(?P<args>.*) *\((?P<caller>.*)\+.* arg1=(?P<ret>.*)'
m = re.match(mstr, title)
if m:
c = m.group('caller')
a = m.group('args').strip()
r = m.group('ret')
mc = re.match('\(.*\) *(?P<args>.*)', cdata)
mr = re.match('\((?P<caller>\S*).* arg1=(?P<ret>.*)', rdata)
if mc and mr:
c = mr.group('caller').split('+')[0]
a = mc.group('args').strip()
r = mr.group('ret')
if len(r) > 6:
r = ''
else:
r = 'ret=%s ' % r
if ubiquitous and c in dtf and 'ub' in dtf[c]:
return False
else:
return False
color = sysvals.kprobeColor(kprobename)
e = DevFunction(displayname, a, c, r, start, end, ubiquitous, proc, pid, color)
tgtdev['src'].append(e)
@ -1772,6 +1789,14 @@ class Data:
e.time = self.trimTimeVal(e.time, t0, dT, left)
e.end = self.trimTimeVal(e.end, t0, dT, left)
e.length = e.end - e.time
if('cpuexec' in d):
cpuexec = dict()
for e in d['cpuexec']:
c0, cN = e
c0 = self.trimTimeVal(c0, t0, dT, left)
cN = self.trimTimeVal(cN, t0, dT, left)
cpuexec[(c0, cN)] = d['cpuexec'][e]
d['cpuexec'] = cpuexec
for dir in ['suspend', 'resume']:
list = []
for e in self.errorinfo[dir]:
@ -2086,75 +2111,43 @@ class Data:
return d
def addProcessUsageEvent(self, name, times):
# get the start and end times for this process
maxC = 0
tlast = 0
start = -1
end = -1
cpuexec = dict()
tlast = start = end = -1
for t in sorted(times):
if tlast == 0:
if tlast < 0:
tlast = t
continue
if name in self.pstl[t]:
if start == -1 or tlast < start:
if name in self.pstl[t] and self.pstl[t][name] > 0:
if start < 0:
start = tlast
if end == -1 or t > end:
end = t
end, key = t, (tlast, t)
maxj = (t - tlast) * 1024.0
cpuexec[key] = min(1.0, float(self.pstl[t][name]) / maxj)
tlast = t
if start == -1 or end == -1:
return 0
if start < 0 or end < 0:
return
# add a new action for this process and get the object
out = self.newActionGlobal(name, start, end, -3)
if not out:
return 0
phase, devname = out
dev = self.dmesg[phase]['list'][devname]
# get the cpu exec data
tlast = 0
clast = 0
cpuexec = dict()
for t in sorted(times):
if tlast == 0 or t <= start or t > end:
tlast = t
continue
list = self.pstl[t]
c = 0
if name in list:
c = list[name]
if c > maxC:
maxC = c
if c != clast:
key = (tlast, t)
cpuexec[key] = c
tlast = t
clast = c
dev['cpuexec'] = cpuexec
return maxC
if out:
phase, devname = out
dev = self.dmesg[phase]['list'][devname]
dev['cpuexec'] = cpuexec
def createProcessUsageEvents(self):
# get an array of process names
proclist = []
# get an array of process names and times
proclist = {'sus': dict(), 'res': dict()}
tdata = {'sus': [], 'res': []}
for t in sorted(self.pstl):
pslist = self.pstl[t]
for ps in sorted(pslist):
if ps not in proclist:
proclist.append(ps)
# get a list of data points for suspend and resume
tsus = []
tres = []
for t in sorted(self.pstl):
if t < self.tSuspended:
tsus.append(t)
else:
tres.append(t)
dir = 'sus' if t < self.tSuspended else 'res'
for ps in sorted(self.pstl[t]):
if ps not in proclist[dir]:
proclist[dir][ps] = 0
tdata[dir].append(t)
# process the events for suspend and resume
if len(proclist) > 0:
if len(proclist['sus']) > 0 or len(proclist['res']) > 0:
sysvals.vprint('Process Execution:')
for ps in proclist:
c = self.addProcessUsageEvent(ps, tsus)
if c > 0:
sysvals.vprint('%25s (sus): %d' % (ps, c))
c = self.addProcessUsageEvent(ps, tres)
if c > 0:
sysvals.vprint('%25s (res): %d' % (ps, c))
for dir in ['sus', 'res']:
for ps in sorted(proclist[dir]):
self.addProcessUsageEvent(ps, tdata[dir])
def handleEndMarker(self, time, msg=''):
dm = self.dmesg
self.setEnd(time, msg)
@ -3218,7 +3211,7 @@ class ProcessMonitor:
# markers, and/or kprobes required for primary parsing.
def doesTraceLogHaveTraceEvents():
kpcheck = ['_cal: (', '_ret: (']
techeck = ['suspend_resume', 'device_pm_callback']
techeck = ['suspend_resume', 'device_pm_callback', 'tracing_mark_write']
tmcheck = ['SUSPEND START', 'RESUME COMPLETE']
sysvals.usekprobes = False
fp = sysvals.openlog(sysvals.ftracefile, 'r')
@ -3241,7 +3234,7 @@ def doesTraceLogHaveTraceEvents():
check.remove(i)
tmcheck = check
fp.close()
sysvals.usetraceevents = True if len(techeck) < 2 else False
sysvals.usetraceevents = True if len(techeck) < 3 else False
sysvals.usetracemarkers = True if len(tmcheck) == 0 else False
# Function: appendIncompleteTraceLog
@ -3456,6 +3449,8 @@ def parseTraceLog(live=False):
continue
# process cpu exec line
if t.type == 'tracing_mark_write':
if t.name == 'CMD COMPLETE' and data.tKernRes == 0:
data.tKernRes = t.time
m = re.match(tp.procexecfmt, t.name)
if(m):
parts, msg = 1, m.group('ps')
@ -3674,6 +3669,9 @@ def parseTraceLog(live=False):
e = next((x for x in reversed(tp.ktemp[key]) if x['end'] < 0), 0)
if not e:
continue
if (t.time - e['begin']) * 1000 < sysvals.mindevlen:
tp.ktemp[key].pop()
continue
e['end'] = t.time
e['rdata'] = kprobedata
# end of kernel resume
@ -4213,6 +4211,8 @@ def callgraphHTML(sv, hf, num, cg, title, color, devid):
fmt = '<n>(%.3f ms @ '+sv.timeformat+')</n>'
flen = fmt % (line.length*1000, line.time)
if line.isLeaf():
if line.length * 1000 < sv.mincglen:
continue
hf.write(html_func_leaf.format(line.name, flen))
elif line.freturn:
hf.write(html_func_end)
@ -4827,14 +4827,11 @@ def createHTML(testruns, testfail):
if('cpuexec' in dev):
for t in sorted(dev['cpuexec']):
start, end = t
j = float(dev['cpuexec'][t]) / 5
if j > 1.0:
j = 1.0
height = '%.3f' % (rowheight/3)
top = '%.3f' % (rowtop + devtl.scaleH + 2*rowheight/3)
left = '%f' % (((start-m0)*100)/mTotal)
width = '%f' % ((end-start)*100/mTotal)
color = 'rgba(255, 0, 0, %f)' % j
color = 'rgba(255, 0, 0, %f)' % dev['cpuexec'][t]
devtl.html += \
html_cpuexec.format(left, top, height, width, color)
if('src' not in dev):
@ -5453,17 +5450,9 @@ def executeSuspend(quiet=False):
call('sync', shell=True)
sv.dlog('read dmesg')
sv.initdmesg()
# start ftrace
if sv.useftrace:
if not quiet:
pprint('START TRACING')
sv.dlog('start ftrace tracing')
sv.fsetVal('1', 'tracing_on')
if sv.useprocmon:
sv.dlog('start the process monitor')
pm.start()
sv.dlog('run the cmdinfo list before')
sv.dlog('cmdinfo before')
sv.cmdinfo(True)
sv.start(pm)
# execute however many s/r runs requested
for count in range(1,sv.execcount+1):
# x2delay in between test runs
@ -5500,6 +5489,7 @@ def executeSuspend(quiet=False):
if res != 0:
tdata['error'] = 'cmd returned %d' % res
else:
s0ixready = sv.s0ixSupport()
mode = sv.suspendmode
if sv.memmode and os.path.exists(sv.mempowerfile):
mode = 'mem'
@ -5509,9 +5499,10 @@ def executeSuspend(quiet=False):
sv.testVal(sv.diskpowerfile, 'radio', sv.diskmode)
if sv.acpidebug:
sv.testVal(sv.acpipath, 'acpi', '0xe')
if mode == 'freeze' and sv.haveTurbostat():
if ((mode == 'freeze') or (sv.memmode == 's2idle')) \
and sv.haveTurbostat():
# execution will pause here
turbo = sv.turbostat()
turbo = sv.turbostat(s0ixready)
if turbo:
tdata['turbo'] = turbo
else:
@ -5522,7 +5513,8 @@ def executeSuspend(quiet=False):
pf.close()
except Exception as e:
tdata['error'] = str(e)
sv.dlog('system returned from resume')
sv.fsetVal('CMD COMPLETE', 'trace_marker')
sv.dlog('system returned')
# reset everything
sv.testVal('restoreall')
if(sv.rtcwake):
@ -5535,33 +5527,29 @@ def executeSuspend(quiet=False):
sv.fsetVal('WAIT END', 'trace_marker')
# return from suspend
pprint('RESUME COMPLETE')
sv.fsetVal(datetime.now().strftime(sv.tmend), 'trace_marker')
if(count < sv.execcount):
sv.fsetVal(datetime.now().strftime(sv.tmend), 'trace_marker')
elif(not sv.wifitrace):
sv.fsetVal(datetime.now().strftime(sv.tmend), 'trace_marker')
sv.stop(pm)
if sv.wifi and wifi:
tdata['wifi'] = sv.pollWifi(wifi)
sv.dlog('wifi check, %s' % tdata['wifi'])
if sv.netfix:
netfixout = sv.netfixon('wired')
elif sv.netfix:
netfixout = sv.netfixon()
if sv.netfix and netfixout:
tdata['netfix'] = netfixout
if(count == sv.execcount and sv.wifitrace):
sv.fsetVal(datetime.now().strftime(sv.tmend), 'trace_marker')
sv.stop(pm)
if sv.netfix:
tdata['netfix'] = sv.netfixon()
sv.dlog('netfix, %s' % tdata['netfix'])
if(sv.suspendmode == 'mem' or sv.suspendmode == 'command'):
sv.dlog('read the ACPI FPDT')
tdata['fw'] = getFPDT(False)
testdata.append(tdata)
sv.dlog('run the cmdinfo list after')
sv.dlog('cmdinfo after')
cmdafter = sv.cmdinfo(False)
# stop ftrace
if sv.useftrace:
if sv.useprocmon:
sv.dlog('stop the process monitor')
pm.stop()
sv.fsetVal('0', 'tracing_on')
# grab a copy of the dmesg output
if not quiet:
pprint('CAPTURING DMESG')
sysvals.dlog('EXECUTION TRACE END')
sv.getdmesg(testdata)
# grab a copy of the ftrace output
if sv.useftrace:
@ -6350,6 +6338,8 @@ def data_from_html(file, outpath, issues, fulldetail=False):
if not m:
continue
name, time, phase = m.group('n'), m.group('t'), m.group('p')
if name == 'async_synchronize_full':
continue
if ' async' in name or ' sync' in name:
name = ' '.join(name.split(' ')[:-1])
if phase.startswith('suspend'):
@ -6701,6 +6691,7 @@ def printHelp():
' -skiphtml Run the test and capture the trace logs, but skip the timeline (default: disabled)\n'\
' -result fn Export a results table to a text file for parsing.\n'\
' -wifi If a wifi connection is available, check that it reconnects after resume.\n'\
' -wifitrace Trace kernel execution through wifi reconnect.\n'\
' -netfix Use netfix to reset the network in the event it fails to resume.\n'\
' [testprep]\n'\
' -sync Sync the filesystems before starting the test\n'\
@ -6828,6 +6819,8 @@ if __name__ == '__main__':
sysvals.sync = True
elif(arg == '-wifi'):
sysvals.wifi = True
elif(arg == '-wifitrace'):
sysvals.wifitrace = True
elif(arg == '-netfix'):
sysvals.netfix = True
elif(arg == '-gzip'):