White Hot Chocolate, Mathematics and Computer Science: analyzedata.py


import datetime as dt
import numpy as np
import scipy as sp
import scipy.stats as sps
import scipy.optimize as spo
import matplotlib as mp
#mp.use('Agg') # If the graphs are to be redirected to a file 
               # instead of displayed on-screen
import matplotlib.pyplot as mpp
import matplotlib.cm as mpc
import matplotlib.dates as mpdates

def readFile(filename):
    f=open(filename,'r')
    tempH=[]
    tempL=[]
    dateH=[]
    dateL=[]
    for i in f:
        year=int(i[11:15])
        if (year==2015):
         continue
        month=int(i[15:17])
        if i[17:21] == 'TMAX':
            destT=tempH
            destD=dateH
        elif i[17:21] == 'TMIN':
            destT=tempL
            destD=dateL
        else:
            continue
        for j in range(0,31):
            index=j*8+21
            value=i[index:index+5]
            if  value != "-9999":
                destT.append(float(value)/10.0)
                destD.append(dt.date(year,month,j+1))
    return (np.array(dateH),np.array(tempH),np.array(dateL),np.array(tempL))

def readFile2(filename):
    f=open(filename,'r')
    tempH=[]
    tempL=[]
    dateH=[]
    dateL=[]
    for i in f:
        year=int(i[11:15])
        if (year!=2015):
         continue
        month=int(i[15:17])
        if i[17:21] == 'TMAX':
            destT=tempH
            destD=dateH
        elif i[17:21] == 'TMIN':
            destT=tempL
            destD=dateL
        else:
            continue
        for j in range(0,31):
            index=j*8+21
            value=i[index:index+5]
            if  value != "-9999":
                destT.append(float(value)/10.0)
                destD.append(dt.date(year,month,j+1))
    return (np.array(dateH),np.array(tempH),np.array(dateL),np.array(tempL))




def slidingAverage(data,n):
    convVector=np.array([1.0/(n*1.0)]*n)
    return sp.convolve(data,convVector,mode='valid')
    
# Read the file into the tuple
DATA=readFile("USW00094728.dly") # Central Park New York, NY
DATA2=readFile2("USW00094728.dly") # Central Park New York, NY
#DATA=readFile("USC00119029.dly") # Waukegan, IL
#DATA=readFile("USC00085660.dly") # Miami Beach, FL - not enough results
#DATA=readFile("USW00094846.dly") # Chicago O'Hare, IL
# Let's create the first plot
fig=mpp.figure()
fig.subplots_adjust(hspace=.5)
sp1=fig.add_subplot(211)
sp2=fig.add_subplot(212)

sp1.plot(DATA[0],DATA[1])
sp2.plot(DATA[2],DATA[3])
sp1.set_title("Temperature, Maximum")
sp1.set_ylabel("Temperature [C]")
sp1.set_xlabel("Date")
sp2.set_title("Temperature, Minimum")
sp2.set_ylabel("Temperature [C]")
sp2.set_xlabel("Date")
fig.savefig("tmaxtmin.png")
mpp.close(fig)

# Sliding average
# We will create the average for 7,30, 365 and 3650 days
# In order to do that, we will use a convolution operation between 
# two vectors: the vector of data and the "mask"

tempsPlot=[]
for slidingWindow in [7,30,182,365,3650]:
    tempsPlot.append(slidingAverage(DATA[1],slidingWindow))
    tempsPlot.append(slidingAverage(DATA[3],slidingWindow))
    
fig=mpp.figure(figsize=(8,12),dpi=600)
fig.subplots_adjust(hspace=.5)

sp=[]
for spIndex in range(1,11):
    sp.append(fig.add_subplot(5,2,spIndex))

spTitles=["7 days sliding average","30 days sliding average","182 days sliding average","365 days sliding average","3650 days sliding average"]

for spIndex in range(0,10,2):
    sp[spIndex].set_title("High temperatures\n"+spTitles[spIndex/2])
    sp[spIndex].plot(DATA[0][0:len(tempsPlot[spIndex])],tempsPlot[spIndex])
    sp[spIndex+1].set_title("Low temperatures\n"+spTitles[spIndex/2])
    sp[spIndex+1].plot(DATA[0][0:len(tempsPlot[spIndex+1])],tempsPlot[spIndex+1])
    
fig.savefig("slidingaverageT.png")
mpp.close(fig)

# Linear regression

htSlope, htInterc, htRValue, htPValue, htStdErr = sps.linregress(np.array([x.toordinal() for x in DATA[0]]),DATA[1])
ltSlope, ltInterc, ltRValue, ltPValue, ltStdErr = sps.linregress(np.array([x.toordinal() for x in DATA[2]]),DATA[3])

htLRvalues=[(htSlope, htInterc, htRValue)]
ltLRvalues=[(ltSlope, ltInterc, ltRValue)]

for dataSetIndex in range(0,10,2):
        dataSet=tempsPlot[dataSetIndex]
        htSlope, htInterc, htRValue, htPValue, htStdErr = sps.linregress(np.array([x.toordinal() for x in DATA[0][0:len(dataSet)]]),dataSet)
        dataSet=tempsPlot[dataSetIndex+1]
        ltSlope, ltInterc, ltRValue, ltPValue, ltStdErr = sps.linregress(np.array([x.toordinal() for x in DATA[2][0:len(dataSet)]]),dataSet)
        htLRvalues.append((htSlope, htInterc, htRValue))
        ltLRvalues.append((ltSlope, ltInterc, ltRValue))

# Print a table with the various values. 
dataSource=["Raw data set","7 days sliding average","30 days sliding average","182 days sliding average","365 days sliding average","3650 days sliding average"]
print "\n\nLinear regression for the sliding averages"
print "------------------------------------------\n\n"
print "{0:^32} {1:^16} {2:^16} {3:^16} {4:^16} {5:^16} {6:^16}".format("Data set source","Slope (High)","Intercept (High)","R-value (High)","Slope (low)","Intercept (low)","R-value (low)")
for dsIndex in range(0,6):
    # Usage of format to present the table nicely
    print "{0:<32>16.6e} {2:>16.6e} {3:>16.6e} {4:>16.6e} {5:>16.6e} {6:>16.6e}".format(dataSource[dsIndex],htLRvalues[dsIndex][0],htLRvalues[dsIndex][1],htLRvalues[dsIndex][2], ltLRvalues[dsIndex][0],ltLRvalues[dsIndex][1],ltLRvalues[dsIndex][2]) 

# Let's get the slope and intercept for the raw data source - for later

htdtSlope,htdtIntercept,_=htLRvalues[0]
ltdtSlope,ltdtIntercept,_=ltLRvalues[0]

# Let's consider now specific dates in the year, to avoid averaging winter, spring, summer and fall. 

DHjan, THjan = zip(*[ (x,y) for x,y in zip(DATA[0],DATA[1]) if x.month == 1 and x.day == 1 ])
DHapr, THapr = zip(*[ (x,y) for x,y in zip(DATA[0],DATA[1]) if x.month == 4 and x.day == 1 ])
DHjul, THjul = zip(*[ (x,y) for x,y in zip(DATA[0],DATA[1]) if x.month == 7 and x.day == 1 ])
DHoct, THoct = zip(*[ (x,y) for x,y in zip(DATA[0],DATA[1]) if x.month == 10 and x.day == 1 ])

DLjan, TLjan = zip(*[ (x,y) for x,y in zip(DATA[2],DATA[3]) if x.month == 1 and x.day == 1 ])
DLapr, TLapr = zip(*[ (x,y) for x,y in zip(DATA[2],DATA[3]) if x.month == 4 and x.day == 1 ])
DLjul, TLjul = zip(*[ (x,y) for x,y in zip(DATA[2],DATA[3]) if x.month == 7 and x.day == 1 ])
DLoct, TLoct = zip(*[ (x,y) for x,y in zip(DATA[2],DATA[3]) if x.month == 10 and x.day == 1 ])

fig=mpp.figure(figsize=(8,12),dpi=600)
sp1=fig.add_subplot(211)
sp2=fig.add_subplot(212)
ax1,=sp1.plot(DHjan,THjan)
ax2,=sp1.plot(DHapr,THapr)
ax3,=sp1.plot(DHjul,THjul)
ax4,=sp1.plot(DHoct,THoct)
sp1.legend([ax1,ax2,ax3,ax4],["January 1st","April 1st","July 1st","October 1st"],prop={'size':6})
sp1.set_title("High Temperatures",fontsize=8)
ax1,=sp2.plot(DLjan,TLjan)
ax2,=sp2.plot(DLapr,TLapr)
ax3,=sp2.plot(DLjul,TLjul)
ax4,=sp2.plot(DLoct,TLoct)
sp2.legend([ax1,ax2,ax3,ax4],["January 1st","April 1st","July 1st","October 1st"],prop={'size':6})
sp2.set_title("Low Temperatures",fontsize=8)
fig.savefig("specific-t.png")
mpp.close(fig)

Xdata=[DHjan,DLjan,DHapr,DLapr,DHjul,DLjul,DHoct,DLoct]
Ydata=[THjan,TLjan,THapr,TLapr,THjul,TLjul,THoct,TLoct]
Titles=["January 1st","April 1st","July 1st","October 1st"]
print "\n\nTrends for specific days in the year"
print "------------------------------------\n\n"
print "{0:^32} {1:^16} {2:^16} {3:^16} {4:^16} {5:^16} {6:^16}".format("Date in year","Slope (High)","Intercept (High)","R-value (High)","Slope (low)","Intercept (low)","R-value (low)")
for index in range(0,8,2):
    x,y,t = Xdata[index],Ydata[index],Titles[index/2]
    htSlope,htIntercept,htRvalue,_,_ = sps.linregress(np.array([xd.toordinal() for xd in x]),y)
    x,y,t = Xdata[index+1],Ydata[index+1],Titles[index/2]
    ltSlope,ltIntercept,ltRvalue,_,_ = sps.linregress(np.array([xd.toordinal() for xd in x]),y)
    print "{0:<32>16.6e} {2:>16.6e} {3:>16.6e} {4:>16.6e} {5:>16.6e} {6:>16.6e}".format(t,htSlope,htIntercept,htRvalue, ltSlope,ltIntercept,ltRvalue)

# Let's detrend the data sets and plot them.

htDT=DATA[1]-(htdtSlope*np.array([x.toordinal() for x in DATA[0]])+htdtIntercept)
ltDT=DATA[3]-(ltdtSlope*np.array([x.toordinal() for x in DATA[2]])+ltdtIntercept)
fig=mpp.figure(figsize=(8,12),dpi=600)
sp1=fig.add_subplot(211)
sp2=fig.add_subplot(212)
sp1.plot(DATA[0],htDT)
sp1.set_title("High Temperatures\nTrend removed",fontsize=8)
sp2.plot(DATA[2],ltDT)
sp2.set_title("Low Temperatures\nTrend removed",fontsize=8)
fig.savefig("temp-notrend.png")
mpp.close(fig)

htYearsum=np.zeros((367))
htYearcount=np.zeros((367))
ltYearsum=np.zeros((367))
ltYearcount=np.zeros((367))

for index in range(0,len(DATA[0])):
    doy=DATA[0][index].timetuple().tm_yday    
    htYearsum[doy]+=htDT[index]
    htYearcount[doy]+=1

for index in range(0,len(DATA[2])):
    doy=DATA[2][index].timetuple().tm_yday    
    ltYearsum[doy]+=ltDT[index]
    ltYearcount[doy]+=1

# Let's calculate a sinusoidal fit for the high temperature and the low temperature
avHT=sum(DATA[1])/(1.0*len(DATA[1]))
avLT=sum(DATA[3])/(1.0*len(DATA[3]))
t=np.array(range(1,367))
sin_fit=lambda x: x[0]*np.sin((2.0*np.pi*t/366.0)+x[2])+x[1] - (avHT+htYearsum[1:367]/htYearcount[1:367])
ht_amp,ht_offset,ht_phase=spo.leastsq(sin_fit,[max(htYearsum)-min(htYearsum), avHT, 0])[0]
sin_fit=lambda x: x[0]*np.sin((2.0*np.pi*t/366.0)+x[2])+x[1] - (avLT+ltYearsum[1:367]/ltYearcount[1:367])
lt_amp,lt_offset,lt_phase=spo.leastsq(sin_fit,[max(ltYearsum)-min(ltYearsum), avLT, 0])[0]
fig=mpp.figure(figsize=(8,12),dpi=600)
sp1=fig.add_subplot(111)
p1=sp1.scatter(range(1,367),avHT+htYearsum[1:367]/htYearcount[1:367],color=mpc.rainbow(0.0))
p2=sp1.scatter(range(1,367),avLT+ltYearsum[1:367]/ltYearcount[1:367],color=mpc.rainbow(0.25))
p3=sp1.scatter(range(1,367),(avHT+avLT+htYearsum[1:367]/htYearcount[1:367]+ltYearsum[1:367]/ltYearcount[1:367])/2.0,color=mpc.rainbow(0.5))
p4,=sp1.plot(t,ht_amp*np.sin((2.0*np.pi*t/366.0)+ht_phase)+ht_offset,color="black")
p5,=sp1.plot(t,lt_amp*np.sin((2.0*np.pi*t/366.0)+lt_phase)+lt_offset,color=mpc.rainbow(1.0))
sp1.grid()
sp1.legend([p1,p2,p3,p4,p5],["High temperature","Low temperature","Average temperature","Sinusoidal fit (high temp)","Sinusoidal fit (low temp)"],prop={'size':11})
sp1.set_title("Year-round averages",fontsize=18)
fig.savefig("temp-yearround-notrend.png")
mpp.close(fig)
print "\n\nSinusoidal fit"
print "--------------\n\n"
print "{0:^12} {1:^12} {2:^12}".format("Variable","High Temp","Low Temp")
print "{0:^12} {1:>12.6e} {2:>12.6e}".format("Offset",ht_offset,lt_offset)
print "{0:^12} {1:>12.6e} {2:>12.6e}".format("Amplitude",ht_amp,lt_amp)
print "{0:^12} {1:>12.6e} {2:>12.6e}".format("Phase",ht_phase,lt_phase)

# Let's get the values for the same doy as before - I don't add the offset in the sinusoidal fitting - it is already present in the linear regression

# January 1st - day of year: 1
HTJanNorm = ht_amp*np.sin((2.0*np.pi*0/366.0)+ht_phase)
HTJanDiff = [ x-HTJanNorm - (y.toordinal()*htdtSlope+htdtIntercept) for x,y in zip(DATA[1],DATA[0]) if y.day == 1 and y.month ==1]
LTJanNorm = lt_amp*np.sin((2.0*np.pi*0/366.0)+lt_phase)
LTJanDiff = [ x-LTJanNorm - (y.toordinal()*ltdtSlope+ltdtIntercept) for x,y in zip(DATA[3],DATA[2]) if y.day == 1 and y.month ==1]

# April 1st - day of year: 91
HTAprNorm = ht_amp*np.sin((2.0*np.pi*91/366.0)+ht_phase)
HTAprDiff = [ x-HTAprNorm - (y.toordinal()*htdtSlope+htdtIntercept) for x,y in zip(DATA[1],DATA[0]) if y.day == 1 and y.month ==4]
LTAprNorm = lt_amp*np.sin((2.0*np.pi*91/366.0)+lt_phase)
LTAprDiff = [ x-LTAprNorm - (y.toordinal()*ltdtSlope+ltdtIntercept) for x,y in zip(DATA[3],DATA[2]) if y.day == 1 and y.month ==4]

# July 1st - day of year: 182
HTJulNorm = ht_amp*np.sin((2.0*np.pi*182/366.0)+ht_phase)
HTJulDiff = [ x-HTJulNorm - (y.toordinal()*htdtSlope+htdtIntercept) for x,y in zip(DATA[1],DATA[0]) if y.day == 1 and y.month ==7]
LTJulNorm = lt_amp*np.sin((2.0*np.pi*182/366.0)+lt_phase)
LTJulDiff = [ x-LTJulNorm - (y.toordinal()*ltdtSlope+ltdtIntercept) for x,y in zip(DATA[3],DATA[2]) if y.day == 1 and y.month ==7]

# October 1st - day of year: 274
HTOctNorm = ht_amp*np.sin((2.0*np.pi*274/366.0)+ht_phase)
HTOctDiff = [ x-HTOctNorm - (y.toordinal()*htdtSlope+htdtIntercept) for x,y in zip(DATA[1],DATA[0]) if y.day == 1 and y.month ==10]
LTOctNorm = lt_amp*np.sin((2.0*np.pi*274/366.0)+lt_phase)
LTOctDiff = [ x-LTOctNorm - (y.toordinal()*ltdtSlope+ltdtIntercept) for x,y in zip(DATA[3],DATA[2]) if y.day == 1 and y.month ==10]


fig=mpp.figure(figsize=(8,12),dpi=600)
sp1=fig.add_subplot(111)
n, bins, patches=sp1.hist([HTJanDiff,LTJanDiff,HTAprDiff,LTAprDiff,HTJulDiff,LTJulDiff,HTOctDiff,LTOctDiff],bins=20,label=["January High","January Low","April High","April Low","July High","July Low","October High","October Low"])
sp1.legend()
fig.savefig("temp-diff-corrected.png")
mpp.close(fig)

fig=mpp.figure(figsize=(8,12),dpi=600)
sp1=fig.add_subplot(111)
n, bins, patches=sp1.hist([HTJanDiff,LTJanDiff,HTAprDiff,LTAprDiff,HTJulDiff,LTJulDiff,HTOctDiff,LTOctDiff],bins=20,label=["January High","January Low","April High","April Low","July High","July Low","October High","October Low"], histtype="barstacked")
sp1.legend()
fig.savefig("temp-diff-corrected-stacked.png")
mpp.close(fig)

nItems, Range, Mean, Variance, t1, t2 = sps.describe(np.concatenate((HTJanDiff,LTJanDiff,HTAprDiff,LTAprDiff,HTJulDiff,LTJulDiff,HTOctDiff,LTOctDiff)))

print "\n\n"
print "Distribution around the trend+cycle"
print "-----------------------------------\n\n"
print "Average: {0:>16.6e}    Std Deviation: {1:>16.6e}".format(Mean, np.sqrt(Variance))


y=1/(np.sqrt(Variance*2 * np.pi))*np.exp( - (bins - Mean)**2 / (2 * Variance))
fig=mpp.figure(figsize=(8,12),dpi=600)
sp1=fig.add_subplot(111)
n, bins, patches=sp1.hist([HTJanDiff,LTJanDiff,HTAprDiff,LTAprDiff,HTJulDiff,LTJulDiff,HTOctDiff,LTOctDiff],bins=20,label=["January High","January Low","April High","April Low","July High","July Low","October High","October Low"], histtype="barstacked", normed=True)
sp1.plot(bins,y,'k--', linewidth=1.5)
sp1.legend()
fig.savefig("temp-diff-corrected-stacked-cdf.png")
mpp.close(fig)


nItems1, Range1, Mean1, Variance1, t11, t21 = sps.describe(np.concatenate((HTJanDiff,HTAprDiff,HTJulDiff,HTOctDiff)))
nItems2, Range2, Mean2, Variance2, t12, t22 = sps.describe(np.concatenate((LTJanDiff,LTAprDiff,LTJulDiff,LTOctDiff)))


# Let's predict what the weather will look like with 95% Probability (2 sigmas)
offDay=dt.date(2014,1,1).toordinal()
dayX=np.array(range(offDay,offDay+365))
doyX=np.array(range(0,365))
predHTh=htdtSlope*dayX+htdtIntercept+ht_amp*np.sin((2.0*np.pi*doyX/366.0)+ht_phase)+2*np.sqrt(Variance1)
predHTm=htdtSlope*dayX+htdtIntercept+ht_amp*np.sin((2.0*np.pi*doyX/366.0)+ht_phase)
predHTl=htdtSlope*dayX+htdtIntercept+ht_amp*np.sin((2.0*np.pi*doyX/366.0)+ht_phase)-2*np.sqrt(Variance1)
predLTh=ltdtSlope*dayX+ltdtIntercept+lt_amp*np.sin((2.0*np.pi*doyX/366.0)+ht_phase)+2*np.sqrt(Variance2)
predLTm=ltdtSlope*dayX+ltdtIntercept+lt_amp*np.sin((2.0*np.pi*doyX/366.0)+ht_phase)
predLTl=ltdtSlope*dayX+ltdtIntercept+lt_amp*np.sin((2.0*np.pi*doyX/366.0)+ht_phase)-2*np.sqrt(Variance2)
monthLoc = mpdates.MonthLocator()
monthFormat = mpdates.DateFormatter('%B')
fig=mpp.figure(figsize=(8,12),dpi=600)
sp1=fig.add_subplot(111)
sp1.xaxis.set_major_locator(monthLoc)
sp1.xaxis.set_major_formatter(monthFormat)
sp1.set_title('Temperature predictions for 2014\n(90% certainty)')
sp1.set_xlabel('Month')
sp1.set_ylabel('Temperature [C]')
sp1.plot([dt.date.fromordinal(d) for d in dayX],predHTh,color="red")
sp1.plot([dt.date.fromordinal(d) for d in dayX],predHTm,':',color="red")
sp1.plot([dt.date.fromordinal(d) for d in dayX],predHTl,color="red")
sp1.plot([dt.date.fromordinal(d) for d in dayX],predLTh,color="blue")
sp1.plot([dt.date.fromordinal(d) for d in dayX],predLTm,':',color="blue")
sp1.plot([dt.date.fromordinal(d) for d in dayX],predLTl,color="blue")
sp1.grid()
fig.autofmt_xdate()
fig.savefig("temp-prediction-2014.png")
mpp.close(fig)



# More generally, for each year, how many data points fall out of the 95% confidence interval for both the high and the low.
extraHT=[]
extraLT=[]
excessHT=[]
excessLT=[]
for yearIndex in range(1876,2013):
    htTuples = zip([ (x,y) for (x,y) in zip(DATA[0],DATA[1]) if x.year == yearIndex])
    ltTuples = zip([ (x,y) for (x,y) in zip(DATA[2],DATA[3]) if x.year == yearIndex])    
    countExtraHT=0
    avgExcessHT=0.0
    countExtraLT=0
    avgExcessLT=0.0
    for tuple, in htTuples:
        dataIndex,data, = tuple
        dayX=dataIndex.toordinal()
        doyX=dataIndex.timetuple().tm_yday
        predHTh=htdtSlope*dayX+htdtIntercept+ht_amp*np.sin((2.0*np.pi*doyX/366.0)+ht_phase)+2*np.sqrt(Variance1)
        predHTl=htdtSlope*dayX+htdtIntercept+ht_amp*np.sin((2.0*np.pi*doyX/366.0)+ht_phase)-2*np.sqrt(Variance1)
        if ( data > predHTh ) or (data < predHTl):
                avgExcessHT+=np.abs(min(data-predHTh,data-predHTl))
                countExtraHT+=1
    for tuple, in ltTuples:
        dataIndex,data, = tuple
        dayX=dataIndex.toordinal()
        doyX=dataIndex.timetuple().tm_yday
        predLTh=ltdtSlope*dayX+ltdtIntercept+lt_amp*np.sin((2.0*np.pi*doyX/366.0)+ht_phase)+2*np.sqrt(Variance2)
        predLTl=ltdtSlope*dayX+ltdtIntercept+lt_amp*np.sin((2.0*np.pi*doyX/366.0)+ht_phase)-2*np.sqrt(Variance2)
        if ( data > predLTh ) or (data < predLTl):
                avgExcessLT+=np.abs(min(data-predLTh,data-predLTl))
                countExtraLT+=1
    extraHT.append(countExtraHT)
    excessHT.append(avgExcessHT/(1.0*countExtraHT))
    extraLT.append(-countExtraLT)
    excessLT.append(-avgExcessLT/(1.0*countExtraLT))

fig=mpp.figure(figsize=(8,12),dpi=600)
sp1=fig.add_subplot(111)
sp1.bar(range(1876,2013),extraHT,color="red")
sp1.bar(range(1876,2013),extraLT,color="blue")
sp1.grid()
fig.savefig("temp-exceptions.png")
mpp.close(fig)

fig=mpp.figure(figsize=(8,12),dpi=600)
sp1=fig.add_subplot(111)
sp1.bar(range(1876,2013),excessHT,color="red")
sp1.bar(range(1876,2013),excessLT,color="blue")
sp1.grid()
fig.savefig("temp-exceptions-diff.png")
mpp.close(fig)

# From here, make "predictions"  for 2015
nDay=min(len(DATA2[1]),len(DATA2[3]))

offDay=dt.date(2015,1,1).toordinal()
dayX=np.array(range(offDay,offDay+nDay))
doyX=np.array(range(0,nDay))
predHTh=htdtSlope*dayX+htdtIntercept+ht_amp*np.sin((2.0*np.pi*doyX/366.0)+ht_phase)+2*np.sqrt(Variance1)
predHTm=htdtSlope*dayX+htdtIntercept+ht_amp*np.sin((2.0*np.pi*doyX/366.0)+ht_phase)
predHTl=htdtSlope*dayX+htdtIntercept+ht_amp*np.sin((2.0*np.pi*doyX/366.0)+ht_phase)-2*np.sqrt(Variance1)
predLTh=ltdtSlope*dayX+ltdtIntercept+lt_amp*np.sin((2.0*np.pi*doyX/366.0)+ht_phase)+2*np.sqrt(Variance2)
predLTm=ltdtSlope*dayX+ltdtIntercept+lt_amp*np.sin((2.0*np.pi*doyX/366.0)+ht_phase)
predLTl=ltdtSlope*dayX+ltdtIntercept+lt_amp*np.sin((2.0*np.pi*doyX/366.0)+ht_phase)-2*np.sqrt(Variance2)
monthLoc = mpdates.MonthLocator()
monthFormat = mpdates.DateFormatter('%B')
fig=mpp.figure(figsize=(8,12),dpi=600)
sp1=fig.add_subplot(111)
sp1.xaxis.set_major_locator(monthLoc)
sp1.xaxis.set_major_formatter(monthFormat)
sp1.set_title('Temperature predictions for 2015\n(90% certainty)')
sp1.set_xlabel('Month')
sp1.set_ylabel('Temperature [C]')
sp1.plot([dt.date.fromordinal(d) for d in dayX],predHTh,color="red")
sp1.plot([dt.date.fromordinal(d) for d in dayX],predHTm,':',color="red")
sp1.plot([dt.date.fromordinal(d) for d in dayX],predHTl,color="red")
sp1.plot([dt.date.fromordinal(d) for d in dayX],predLTh,color="blue")
sp1.plot([dt.date.fromordinal(d) for d in dayX],predLTm,':',color="blue")
sp1.plot([dt.date.fromordinal(d) for d in dayX],predLTl,color="blue")
# add the data for 2015

sp1.scatter([dt.date.fromordinal(d+offDay) for d in range(0,len(DATA2[1]))],DATA2[1],color='red')
sp1.scatter([dt.date.fromordinal(d+offDay) for d in range(0,len(DATA2[3]))],DATA2[3],color='blue')

sp1.grid()
fig.autofmt_xdate()
fig.savefig("temp-prediction-2015.png")
mpp.close(fig)