Source code for pysys.perf.reporters

#!/usr/bin/env python
# PySys System Test Framework, Copyright (C) 2006-2022 M.B. Grieve

# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.

# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.

# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

"""
Contains the built-in reporter classes. 
"""

import collections, threading, time, math, sys, os
import io
import logging
import json
import glob

from pysys.perf.api import *
from pysys.constants import *
from pysys.utils.logutils import BaseLogFormatter
from pysys.utils.fileutils import mkdir, toLongPathSafe
from pysys.utils.pycompat import *

log = logging.getLogger('pysys.perfreporter')


[docs]class CSVPerformanceReporter(BasePerformanceReporter):
	"""Performance reporter which writes to a CSV file.
	
	This reporter writes to a UTF-8 file of 
	comma-separated values that is both machine and human readable and 
	easy to view and use in any spreadsheet program, and after the columns containing 
	the information for each result, contains comma-separated metadata containing 
	key=value information about the entire run (e.g. hostname, date/time, etc), 
	and (optionally) associated with each individual test result (e.g. test mode etc). 
	The per-run and per-result metadata is not arranged in columns since the structure 
	differs from row to row.

	After tests have run, the summary file is published with category ``CSVPerformanceReport`` 
	using the `pysys.writer.api.ArtifactPublisher` interface. 

	The following properties can be set in the project configuration for this reporter:		
	
	"""
	
	summaryFile = ''
	"""
	The filename pattern used for the summary file(s); see `DEFAULT_SUMMARY_FILE`. 
	
	For compatibility purposes, if not specified explicitly, the summary file for the CSVPerformanceReporter can be 
	configured with the project property ``csvPerformanceReporterSummaryFile``, however this is deprecated. 
	This property can also be accessed and configured under the alternative capitalization ``summaryfile``, however this 
	is discouraged as of PySys 2.1+, where ``summaryFile`` is the preferred name. 

	"""

	aggregateCycles = False
	"""
	Enable this if you want PySys to rewrite the summary file at the end of a multi-cycle test with an aggregated 
	file containing the mean and standard deviation for all the cycles, rather than a separate line for each cycle. 
	This may be easier to consume when triaging performance results and looking for regressions. 
	
	.. versionadded:: 2.1
	"""

	publishArtifactCategory = 'CSVPerformanceReport' 
	"""
	If specified, the output file will be published as an artifact using the specified category name. 

	.. versionadded:: 2.1
	"""

	DEFAULT_SUMMARY_FILE = '__pysys_performance/${outDirName}_${hostname}/perf_${startDate}_${startTime}.${outDirName}.csv'
	"""The default summary file if not overridden by the ``csvPerformanceReporterSummaryFile`` project property, or 
	the ``summaryFile=`` attribute. See `getRunSummaryFile()`. This is relative to the runner output+'/..' directory 
	(typically testRootDir, unless ``--outdir`` is overridden).
	"""

	def setup(self, **kwargs):
		super().setup()

		# for backwards compat
		self.summaryfile = self.summaryfile or self.summaryFile or getattr(self.project, 'csvPerformanceReporterSummaryFile', '') or self.DEFAULT_SUMMARY_FILE

		self.__summaryFilesWritten = set()

	def getRunHeader(self, testobj=None, **kwargs):
		"""Return the header string to the CSV file.
		
		There should usually be no reason to override this method. 
		
		:meta private:
		"""
		
		try:
			runDetails = self.getRunDetails(testobj)
		except Exception: # for pre-2.0 signature
			runDetails = self.getRunDetails()
		
		return CSVPerformanceFile.makeCSVHeaderLine(runDetails)

	def cleanup(self):
		with self._lock:
			if self.runner is not None and self.__summaryFilesWritten:
				for p in sorted(list(self.__summaryFilesWritten)):
					
					if self.runner.cycle > 1 and self.aggregateCycles:
						try:
							perfFile = CSVPerformanceFile.load(p)
							perfFile = CSVPerformanceFile.aggregate([perfFile])
						except Exception as ex: # pragma: no cover
							log.exception('Failed to read and aggregate performance information for %s: '%p)
							# Don't make it fatal, more useful to go ahead and publish it as best we can
						else:
							log.info('Rewriting CSV to aggregate results across all %d cycles'%self.runner.cycles)
							perfFile.dump(p)
				
					log.info('Performance results were written to: %s', os.path.normpath(p)) # absolute path is easiest to deal with
					log.info('  (add the above path to env %s to show a comparison against that baseline on future test runs)', PrintSummaryPerformanceReporter.BASELINES_ENV_VAR)
					
					if self.publishArtifactCategory:
						self.runner.publishArtifact(p, self.publishArtifactCategory)

	def reportResult(self, testobj, value, resultKey, unit, toleranceStdDevs=None, resultDetails=None):
		formatted = self.formatResult(testobj, value, resultKey, unit, toleranceStdDevs, resultDetails)
		self.recordResult(formatted, testobj)

	def formatResult(self, testobj, value, resultKey, unit, toleranceStdDevs, resultDetails):
		"""Retrieve an object representing the specified arguments that will be passed to recordResult to be written to the performance file(s).

		:meta private:

		:param testobj: the test case instance registering the value
		:param value: the value to be reported
		:param resultKey: a unique string that fully identifies what was measured
		:param unit: identifies the unit the value is measured in
		:param toleranceStdDevs: indicates how many standard deviations away from the mean for a regression
		:param resultDetails:  A dictionary of detailed information that should be recorded together with the result

		"""
		data = {'resultKey':resultKey,
				'testId':testobj.descriptor.id,
				'value':str(value),
				'unit':str(unit),
				'biggerIsBetter':str(unit.biggerIsBetter).upper(),
				'toleranceStdDevs':str(toleranceStdDevs) if toleranceStdDevs else '',
				'samples':'1',
				'stdDev':'0' ,
				'resultDetails':resultDetails
				}
		return CSVPerformanceFile.toCSVLine(data)+'\n'

	def recordResult(self, formatted, testobj):
		"""Record results to the performance summary file.

		:meta private:

		:param formatted: the formatted string to write
		:param testobj: object reference to the calling test

		"""
		# generate a file in the test output directory for convenience/triaging, plus add to the global summary
		path = testobj.output+'/performance_results.csv'
		encoding = 'utf-8'
		
		def callGetRunHeader():
			try:
				return self.getRunHeader(testobj)
			except Exception: # pragma: no cover - for pre-2.0 signature 
				return self.getRunHeader()
		
		if not os.path.exists(path):
			with io.open(toLongPathSafe(path), 'w', encoding=encoding) as f:
				f.write(callGetRunHeader())
		with io.open(toLongPathSafe(path), 'a', encoding=encoding) as f:
			f.write(formatted)
		
		# now the global one
		path = self.getRunSummaryFile(testobj)
		mkdir(os.path.dirname(path))
		with self._lock:
			alreadyexists = os.path.exists(toLongPathSafe(path))
			with io.open(toLongPathSafe(path), 'a', encoding=encoding) as f:
				if not alreadyexists: 
					testobj.log.info('Creating performance summary log file at: %s', os.path.normpath(path))
					f.write(callGetRunHeader())
				f.write(formatted)
			self.__summaryFilesWritten.add(path)
	
	@staticmethod
	def tryDeserializePerformanceFile(path):
		if not path.endswith('.csv'): return None
		return CSVPerformanceFile.load(path)

[docs]class JSONPerformanceReporter(BasePerformanceReporter):
	"""Performance reporter which writes to a JSON file.
	
	After tests have run, the summary file is published with category ``JSONPerformanceReport`` 
	using the `pysys.writer.api.ArtifactPublisher` interface. 

	.. versionadded:: 2.1

	The following properties can be set in the project configuration for this reporter:		
	
	"""
	
	summaryFile = ''
	"""
	The ``.json`` filename pattern used for the summary file(s); see `DEFAULT_SUMMARY_FILE`. 
	"""

	publishArtifactCategory = 'JSONPerformanceReport' 
	"""
	If specified, the output file will be published as an artifact using the specified category name. 
	"""

	DEFAULT_SUMMARY_FILE = '__pysys_performance/${outDirName}_${hostname}/perf_${startDate}_${startTime}.${outDirName}.json'
	"""The default summary file if not overridden by the ``summaryFile=`` attribute. See `getRunSummaryFile()`. 
	This is relative to the runner output+'/..' directory (typically testRootDir, unless ``--outdir`` is overridden).
	"""

	def setup(self, **kwargs):
		super().setup()
		self.__summaryFilesWritten = set()
		
	def reportResult(self, testobj, value, resultKey, unit, toleranceStdDevs=None, resultDetails=None):
		path = self.getRunSummaryFile(testobj)
		mkdir(os.path.dirname(path))
		with self._lock:
			alreadyexists = os.path.exists(toLongPathSafe(path))
			with io.open(toLongPathSafe(path), 'a', encoding='utf-8') as f:
				if not alreadyexists: 
					testobj.log.info('Creating performance summary log file at: %s', os.path.normpath(path))
					f.write('{"runDetails": ')
					json.dump(self.getRunDetails(testobj), f)
					f.write(', "results":[\n')
				else:
					f.write(',\n')
					
				json.dump({
					'resultKey':resultKey,
					'value':value,
					'unit':str(unit),
					'biggerIsBetter':unit.biggerIsBetter,
					'samples':1,
					'stdDev':0,
					'toleranceStdDevs':toleranceStdDevs,
					'testId':testobj.descriptor.id,
					'resultDetails':resultDetails or {}
				}, f)
			self.__summaryFilesWritten.add(path)

	def cleanup(self):
		with self._lock:
			if self.__summaryFilesWritten:
				for p in sorted(list(self.__summaryFilesWritten)):
					with io.open(toLongPathSafe(p), 'a', encoding='utf-8') as f:
						f.write('\n]}\n')
					
					log.info('Performance results were written to: %s', os.path.normpath(p).replace(os.path.normpath(self.project.testRootDir), '').lstrip('/\\'))

					if self.publishArtifactCategory:
						self.runner.publishArtifact(p, self.publishArtifactCategory)

	@staticmethod
	def tryDeserializePerformanceFile(path):
		if not path.endswith('.json'): return None
		with io.open(toLongPathSafe(path), encoding='utf-8') as f:
			data = json.load(f)
			return PerformanceRunData(path, data['runDetails'], data['results'])

[docs]class PrintSummaryPerformanceReporter(BasePerformanceReporter):
	"""Performance reporter which logs a human-friendly summary of all performance results to the console at the end of 
	the test run. 

	By setting the `BASELINES_ENV_VAR` environment variable, this reporter will also print out an 
	automatic comparison from the named baseline file(s) to the results from the current test run. 
	This feature is very useful when comparingdifferent strategies for optimizing your application. 
	
	.. versionadded:: 2.1

	"""

	BASELINES_ENV_VAR = 'PYSYS_PERFORMANCE_BASELINES'
	"""
	Set this environment variable to a comma-separated list of performance (e.g. ``.csv``) files to print out an 
	automatic comparison from the baseline file(s) to the results from the current test run. 
	
	This feature is very useful when comparing different strategies for optimizing your application. 
	
	For best results, use multiple cycles for all test runs so that standard deviation can be calculated. 
	
	The filenames can be absolute paths, glob paths using ``*`` and ``**``, or relative to the testRootDir. For example::
	
		export PYSYS_PERFORMANCE_BASELINES=__pysys_performance/mybaseline*/**/*.csv,__pysys_performance/optimization1*/**/*.csv
		
	"""

	def setup(self, **kwargs):
		super().setup()
		self.results = []
		
		import pysys.perf.perfreportstool
		self.comparisonGenerator = pysys.perf.perfreportstool.PerformanceComparisonGenerator(reporters=self.runner.performanceReporters)
		self.baselines = self.comparisonGenerator.loadFiles(
			baselineBaseDir=self.project.testRootDir,
			paths=os.getenv(self.BASELINES_ENV_VAR,'').split(','), 
			)
		if self.baselines:
			log.info('Successfully loaded performance comparison data from %d files: %s', len(self.baselines), ', '.join(b.name for b in self.baselines))

	def reportResult(self, testobj, value, resultKey, unit, toleranceStdDevs=None, resultDetails=None):
		with self._lock:
			self.results.append({
						'resultKey':resultKey,
						'value':value,
						'unit':str(unit),
						'biggerIsBetter':unit.biggerIsBetter,
						'samples':1,
						'stdDev':0,
						'toleranceStdDevs':toleranceStdDevs,
						'testId':testobj.descriptor.id,
						'resultDetails':resultDetails or {}
					})

	def isEnabled(self, **kwargs):
		return True
	
	def cleanup(self):
		if not self.isEnabled(): return
		if not self.results: return
		
		with self._lock:
			logmethod = logging.getLogger('pysys.perfreporter.summary').info

			# perform aggregation in case there are multiple cycles
			p = PerformanceRunData.aggregate(PerformanceRunData('this', self.getRunDetails(), self.results))

			if not self.baselines:
				logmethod('Performance results summary:')
				# simple formatting when there's no comparisons to be done
				for r in p.results:
					logmethod("  %s = %s %s (%s)%s %s", r['resultKey'], self.valueToDisplayString(r['value']), r['unit'],
						 'bigger is better' if r['biggerIsBetter'] else 'smaller is better',
							'' if r['samples']==1 or ['value'] == 0 else f", stdDev={self.valueToDisplayString(r['stdDev'])} ({100.0*r['stdDev']/r['value']:0.1f}% of mean)",
							r['testId'],
								extra = BaseLogFormatter.tag(LOG_TEST_PERFORMANCE, [0,1], suppress_prefix=True))
				
			else:
				logmethod('\n')
				self.comparisonGenerator.logComparisons(self.baselines+[p])