Example analysis of the Stream performance and power data collected with WattProf (developed by RNet Technologies), focusing on the Stream benchmark triad function.
%matplotlib inline
import pandas as pd
from numpy import nan as NA
import matplotlib.pyplot as plt
# Some general font settings for plots
font = {'family' : 'normal',
'weight' : 'normal', #bold
'size' : 10}
import matplotlib
matplotlib.rc('font', **font)
First, load the summary data for experiments on 11 different problem sizes.
summaryData = pd.read_csv('data/all.csv')
summaryData
A simple scatter plot to do a quick visual pairwise variable correlations check.
#colHeaders = list(summaryData.columns.values)
summary = summaryData[["Problem_size","C3_Power","C3_Energy","TIME","PAPI_L2_DCM","PAPI_L1_DCM"]]
_ = pd.scatter_matrix(summary, alpha=0.5, figsize = (12,12), diagonal='hist')
Correlations between the variables of interest. We don't include instruction counts because the hardware counter values on the test architecture are unreliable.
summary.corr()
Selecting energy on one of the cores as our modeling target, we perform a linear fit for C3_Energy. We only do single independent variables here. Including multiple dependent variables is not a great idea because a lot of the potential independent variables are highly correlated.
import statsmodels.formula.api as smf
indepcols = ["Problem_size","TIME","PAPI_L2_DCM","PAPI_L1_DCM"]
depvar = 'C3_Energy'
count = 1
for indepvar in indepcols:
print '+'*40 + '\n' + "Independent variable(s): " + str(indepvar)
# input points for testing predictions
xx = pd.DataFrame({indepvar: np.linspace(summary[indepvar].min(), summary[indepvar].max(), 10)})
# 1st order polynomial
poly_1 = smf.ols(formula='%s ~ 1 + %s' % (depvar, indepvar), data=summary).fit()
print "\n1st order polynomial r-squared: %f\n" % poly_1.rsquared
fig = plt.figure(count)
plt.scatter(summary[indepvar], summary[depvar], alpha=0.3) # Plot the raw data
plt.plot(xx, poly_1.predict(xx),
'g-', label='Poly n=1 $R^2$=%.2f' % poly_1.rsquared, alpha=0.9)
plt.xlabel(indepvar); plt.ylabel(depvar)
plt.show()
count += 1
Second-order polynomial fit:
indepcols = ["Problem_size","TIME","PAPI_L2_DCM","PAPI_L1_DCM"]
depvar = 'C3_Energy'
count = 1
for indepvar in indepcols:
print '+'*40 + '\n' + "Independent variable(s): " + str(indepvar)
# input points for testing predictions
xx = pd.DataFrame({indepvar: np.linspace(summary[indepvar].min(), summary[indepvar].max(), 10)})
# 1st order polynomial
poly_2 = smf.ols(formula='%s ~ 1 + %s + (%s) ** 2' % (depvar, indepvar, indepvar), data=summary).fit()
print "\n2nd order polynomial r-squared: %f\n" % poly_2.rsquared
fig = plt.figure(count)
plt.scatter(summary[indepvar], summary[depvar], alpha=0.3) # Plot the raw data
plt.plot(xx, poly_2.predict(xx),
'g-', label='Poly n=2 $R^2$=%.2f' % poly_2.rsquared, alpha=0.9)
plt.xlabel(indepvar); plt.ylabel(depvar)
plt.show()
count += 1
# Using statsmodels
import statsmodels.api as sm
import numpy as np
import itertools
from statsmodels.sandbox.regression.predstd import wls_prediction_std
# reshape the data from a pandas Series to columns
# the dependent variable
depvar = 'C3_Energy'
y = np.matrix(summary[depvar]).transpose()
indepcols = ["Problem_size","TIME","PAPI_L2_DCM","PAPI_L1_DCM"]
indepvars = summary[indepcols]
models = []
#for numvars in range(1,len(indepcols)):
# for indeps in itertools.combinations(indepvars, numvars):
for indepvar in indepcols:
print '+'*40 + '\n' + "Independent variable(s): ", indepvar, "; Dependent Variable: ", depvar
nsample= summary[indepvar].shape[0]
x = summary[indepvar]
y = summary[depvar]
X = np.column_stack((x,np.sin(x), (x)**3, np.ones(nsample)))
#X = np.column_stack((x,(x)**3, np.ones(nsample)))
# Categorical not appropriate here, but just as a future example
#dummy = sm.categorical(np.array(summary[indepvar]), drop=True)
#X = np.column_stack((x, dummy[:,1:]))
#X = sm.add_constant(X, prepend=False)
model = sm.OLS(y,X)
f = model.fit()
print f.summary()
print('Parameters: ', f.params)
print('Standard errors: ', f.bse)
print('Predicted values: ', f.predict())
# Draw a plot of to compare the true relationship to OLS predictions.
# Confidence intervals around the predictions are built using the wls_prediction_std command.
prstd, iv_l, iv_u = wls_prediction_std(f)
fig, ax = plt.subplots(figsize=(8,6))
x_hat= np.linspace(summary[indepvar].min(), summary[indepvar].max(), nsample)
ax.plot(summary[indepvar], y, 'o', label="data")
#ax.plot(x, y_true, 'b-', label="True")
ax.plot(x, f.fittedvalues, 'g--.', label="OLS")
ax.plot(x, iv_u, 'r--')
ax.plot(x, iv_l, 'r--')
ax.legend(loc='best');
plt.xlabel(indepvar)
plt.ylabel(depvar)
plt.show()
#models.append((model,x,y,f))
# Using numpy polyfit
depvar = 'C3_Energy'
indepcols = ["Problem_size","TIME","PAPI_L2_DCM","PAPI_L1_DCM"]
indepvars = summary[indepcols]
for indepvar in indepcols:
print '+'*40 + '\n' + "Independent variable(s): ", indepvar, "; Dependent Variable: ", depvar
nsample= summary[indepvar].shape[0]
x = summary[indepvar]
y = summary[depvar]
# Evaluate model on new inputs
x_hat= np.linspace(summary[indepvar].min(), summary[indepvar].max(), nsample)
#f = model(x_hat) # for poly1d only
#model = np.poly1d(np.polyfit(x, y, 3))
if indepvar in ['Problem_size','TIME']: degree = 4
elif indepvar == 'PAPI_L1_DCM': degree = 8
else: degree = 6 # PAPI_L2_DCM
model_cheb = np.polynomial.chebyshev.chebfit(x,y,degree)
print 'Chebyshev:', model_cheb
f_cheb = np.polynomial.chebyshev.chebval(x_hat,model_cheb)
print f_cheb
model_leg = np.polynomial.legendre.legfit(x,y,degree)
print 'Legendre:', model_leg
f_legendre = np.polynomial.legendre.legval(x_hat,model_leg)
print f_legendre
fig, ax = plt.subplots(figsize=(8,6))
ax.plot(summary[indepvar], y, 'o', label="Data")
#ax.plot(x, y_true, 'b-', label="True")
ax.plot(x_hat, f_cheb, 'r:', label="Chebyshev(%d)"%degree)
ax.plot(x_hat, f_legendre, 'g--.', label="Legendre(%d)"%degree)
#ax.plot(x, iv_u, 'r--')
#ax.plot(x, iv_l, 'r--')
ax.legend(loc='best');
plt.xlabel(indepvar)
plt.ylabel(depvar)
plt.show()
plt.savefig('figures/%s_%s.pdf'%(depvar,indepvar), bbox_inches='tight')
#models.append((model,x,y,f))