This is the program file for Frank Schmid's article: "Stock Return and Interest Rate Risk at Fannie Mae and Freddie Mac" in the January/February 2005 issue of the Federal Reserve Bank of St. Louis's Review. The data are available from the Center for Research in Security Prices (CRSP), The Board of Governers of the Federal Reserve System, and Bloomberg LP. The first GAUSS program runs the Models used in Figures 2 and 3. The second GAUSS program runs M-Plots for smoothing selection. The third GAUSS program runs the Backfitting of Appendix B. The fourth GAUSS program runs the ANOVA of Table 1. @ THIS PROGRAM RUNS THE MODELS USED IN SCHMID (2005) @ @ @ @ @ @ @ @ @ @ May 20, 2004 @ @ no parametric component--original Cleveland/Devlin (1988) approach @ @ three variables in nonparametric component @ @ third variable is time index; plots only the first two variables @ @ @ @ @ @ @ @ @ @ @ @ takes vector of dependent variable from bak4d01.prg @ new; #lineson; tstart=date; library pgraph; save path=c:\data\gse2\gauss\loess; @ loess-smoother; Cleveland and Devlin (1988) @ @ generates conditioning plots @ @ the intercept must be included in the nonparametric component @ @ general principle: always sort back to original order @ @ Eucledian distances are based on variables in nonparametric component @ @ (nonconstant) regressors in nonparametric component must be scaled @ review=1; @ for the Review article (different ytics) @ ff=1; @ 1: Fannie Mae; 2: Freddie Mac @ boot=0; @ 1: bootstrap @ trim_=0; @ do not trim extreme observations @ comp=1; @ 1: do the calculations; 0: no calc.; graph partial impact @ ig=1; @ i: estimate partial impact for ith variable of nonparametric component @ ftest=0; @ 1: do F-tests (only for ig.==1) @ var3=3; @ for time index; 1: pick 25th percentile; 2: pick mean; 3: pick 75th percentile @ lin=0; @ lin=1: locally linear fitting; otherwise: locally quadratic fitting @ fs=0.8; @ smoothing parameter; fs=q/n; q: number of neighbors; n: number of obs.; the higher fs, the smoother the graph @ krow=250; @ do 250 rows at a time in do-loop--for display only @ ki=0; @ 1: vertial lines for confidence intervals; otherwise: bands @ dtset1="c:\\data\\gse2\\gauss\\wklyapril"; @ GSE data @ if comp.==0; ftest=0; endif; clear i1,i2,i3,i4,i5,i6; vnames="xx"; @ unrestricted model @ dtset2 ="c:\\data\\gse2\\gauss\\loess\\n3t01fa"; @ degrees of freedom @ dtset3 ="c:\\data\\gse2\\gauss\\loess\\n3t01ya"; @ yhat @ dtset4 ="c:\\data\\gse2\\gauss\\loess\\n3t01ta"; @ overall residuals @ dtset5 ="c:\\data\\gse2\\gauss\\loess\\n3t01ra"; @ matrix RA @ dtset6 ="c:\\data\\gse2\\gauss\\loess\\n3t01sa"; @ smoother matrix S @ dtset7 ="c:\\data\\gse2\\gauss\\loess\\n3t01da"; @ diag(RA) @ dtset8 ="c:\\data\\gse2\\gauss\\loess\\n3t01ga"; @ diag(RA^2) @ @ restricted model @ dtset22="c:\\data\\gse2\\gauss\\loess\\n3t01fn"; @ degrees of freedom @ dtset33="c:\\data\\gse2\\gauss\\loess\\n3t01yn"; @ yhat @ dtset44="c:\\data\\gse2\\gauss\\loess\\n3t01tn"; @ overall residuals @ dtset55="c:\\data\\gse2\\gauss\\loess\\n3t01rn"; @ matrix RN @ dtset66="c:\\data\\gse2\\gauss\\loess\\n3t01sn"; @ smoother matrix S @ dtset77="c:\\data\\gse2\\gauss\\loess\\n3t01dn"; @ diag(RA-RN) @ dtset88="c:\\data\\gse2\\gauss\\loess\\n3t01gn"; @ diag((RA-RN)^2) @ if comp.==1; @ 1: do the calculations @ if ig.==1; dos del c:\data\gse2\gauss\loess\n3t01*.*; dos del c:\data\gse2\gauss\loess\n3m01*.*; dos del c:\data\gse2\gauss\loess\n3o01*.*; endif; if ftest.==1; output file=c:\prg\gse2\\nop3d01.out reset; endif; "GAUSS time ";; timestr(0);; " date (US)";; datestr(0); ?;"input file: nop3d01.prg";?; if ig.==1; @ read the data @ open f1=^dtset1 for read; rw1=rowsf(f1); dat=readr(f1,rw1); f1=close(f1); clear f1; dat=dat~seqa(1,1,rows(dat)); @ time index @ @-----------------------------------------------------------@ format /rd 10,0; "number of observations in dataset:" rows(dat); @ define variables @ @ left-hand side variable @ if ismiss(dat[.,3]).==1; dat[.,3]=missrv(dat[.,3],minc(dat[.,3])-1); dat=delif(dat,dat[.,3].==minc(dat[.,3])); endif; if ismiss(dat[.,6]).==1; dat[.,6]=missrv(dat[.,6],minc(dat[.,6])-1); dat=delif(dat,dat[.,6].==minc(dat[.,6])); endif; if ismiss(dat[.,14]).==1; dat[.,14]=missrv(dat[.,14],minc(dat[.,14])-1); dat=delif(dat,dat[.,14].==minc(dat[.,14])); endif; if ismiss(dat[.,15]).==1; dat[.,15]=missrv(dat[.,15],minc(dat[.,15])-1); dat=delif(dat,dat[.,15].==minc(dat[.,15])); endif; if ff.==1; @ 1: Fannie Mae @ if ismiss(dat[.,5]).==1; dat[.,5]=missrv(dat[.,5],minc(dat[.,5])-1); dat=delif(dat,dat[.,5].==minc(dat[.,5])); endif; y=dat[.,5]-ln(1+(dat[.,3]./36000).*7); @ Log excess return of Fannie Mae @ elseif ff.==2; @ 2: Freddie Mac @ if ismiss(dat[.,12]).==1; dat[.,12]=missrv(dat[.,12],minc(dat[.,12])-1); dat=delif(dat,dat[.,12].==minc(dat[.,12])); endif; y=dat[.,12]-ln(1+(dat[.,3]./36000).*7); @ Log excess return of Fannie Mae @ endif; x= (dat[.,14]-dat[.,15])./1e2 @ change in yield spread @ ~(dat[.,15])./1e2 @ change in 3-month yield @ ~dat[.,cols(dat)]-minc(dat[.,cols(dat)])+1; @ time index, starting on 1 @ format /rd 10,0; "number of obsevations analzyed:" rows(dat); quart(x[.,3]); @ percentiles of time index @ dat[quart(x[.,3]),1]; wait; clear dat; @ descriptive statistics @ format /rd 6,4; @ minc(exp(y))~median(exp(y))~meanc(exp(y))~maxc(exp(y))~(stdc(exp(y)).*sqrt(rows(y)-1)./sqrt(rows(y))); @ minc((y))~median((y))~meanc((y))~maxc((y))~(stdc((y)).*sqrt(rows(y)-1)./sqrt(rows(y))); /* ************************** core program ************************* */ @ nonparametric component; take median of asset category; breakpoints as in call reports @ /* @ market excess return--second and third variables @ n3t01m=quart(x[.,2])~quart(x[.,3]); save n3t01m; @ change of short rate--first variable and third variables @ n3m01m=quart(x[.,1])~quart(x[.,3]); save n3m01m; @ change of term spread--first variable and second variables @ n3o01m=quart(x[.,1])~quart(x[.,2]); save n3o01m; */ @ change of term spread--second variable and third variables @ n3t01m= (median(selif(x[.,2],x[.,2].<0))|0|median(selif(x[.,2],x[.,2].>0))) ~quart(x[.,3]); save n3t01m; @ change of short rate--first variable and third variables @ n3m01m= (median(selif(x[.,1],x[.,1].<0))|0|median(selif(x[.,1],x[.,1].>0))) ~quart(x[.,3]); save n3m01m; @ time index--first and second variables @ n3o01m= (median(selif(x[.,1],x[.,1].<0))|0|median(selif(x[.,1],x[.,1].>0))) ~(median(selif(x[.,2],x[.,2].<0))|0|median(selif(x[.,2],x[.,2].>0))); save n3o01m; omin=rows(y); @ info for smoother-matrix @ @--------------------------------------------------------------------@ @ substitute partial impact for y @ open f1=c:\data\gse2\gauss\loess\b4t01f2.fmt; rw=rowsf(f1); y=readr(f1,rw); @ not scaled, original order @ f1=close(f1); n3t01x=x; @ in original order; not scaled @ n3t01y=y; @ in original order @ save n3t01x, n3t01y; q=int(fs.*rows(x)); @ smoothing parameter; choose nearest integer @ n3t01q=q; save n3t01q; /* ********************** total impact ********************** */ y__=y; x___=x./(stdc(x)'); @ scale; see Cleveland & Devlin @ svec__=seqa(1,1,rows(y__)); @ vector which gives the original order of the obs. @ @ locally linear fitting oder locally quadratic fitting @ if lin./=1; @ locally quadratic fitting @ x__=x___~zeros(rows(x___),cols(x___)+cols(x___)*(cols(x___)-1)./2); @ last couple of cols: quadratic terms & cross-products @ i=0; k=0; do until i.==cols(x___); i=i+1; j=0; do until j.==i; j=j+1; k=k+1; x__[.,cols(x___)+k]=x___[.,i].*x___[.,j]; endo; endo; else; x__=x___; @ locally linear fitting @ endif; x__=ones(rows(x__),1)~x__; @ intercept @ clear x,y; "created x__"; /* *************** smoothing: total impact ****************** */ @ variables have not beeen sorted yet @ "smoothing"; i=0; k=0; do until i==rows(x__); @ number of obs. to be smoothed over @ i=i+1; @ Eucledian distances @ dists=sqrt(sumc((x___'-x___[i,.]').^2)); @ Eucledian distance @ distx=sortc(dists~x__~y__~svec__,1); @ sorted by Eucledian distance @ disti=distx[.,1]; @ distance of i-th vector of obs. to all other vectors of observations @ xs=distx[.,cols(dists)+1:(cols(x__)+1)]; @ sorted regressor matrix @ ys=distx[.,cols(distx)-1]; @ sorted vector y @ svec=distx[.,cols(distx)]; @ vector used for sorting back to orig. order; has to be sorted too @ dx=disti[q,.]; @ distance to (q-1)-th neighbor @ @ dx; wait; @ @ determine the q weights wi @ uvec=disti./dx; @ vector of weights; if program goes down here, then there is not enough variation in the explanatory variables @ @ format 10,6; uvec[1:20,.]; wait; @ wuvec=(1-uvec.^3).^3; @ tricube weight function @ exvec=(uvec .ge 0).*(uvec .lt 1); if sumc(exvec) ne rows(exvec); loc=indexcat(exvec,0); wuvec[loc]=zeros(rows(loc),1); endif; clear loc,exvec; @ format /re 10,6; wuvec'; wait; @ @ generate matrix wx (see own notes) @ wx=(wuvec.*xs)'; @ weights, multiplied with regressors @ @ number of rows equals number of regressors @ wbh=inv(wx*xs)*wx*ys; @ S, be it noted: vector yd has a different order for each row of this matrix; thus, sort columns(!) back to original order; rows are appended in original order anyway @ sortvec=svec; @ svec: has same order as columns of this matrix @ s__ =sortc((xs[1,.]*inv(wx*xs)*wx)'~sortvec,2); if i==1; create f6=^dtset6 with ^vnames,cols(s__[.,1]'),8; call writer(f6,s__[.,1]'); f6=close(f6); elseif i==2; open f6=^dtset6 for append; @ keep file open @ call writer(f6,s__[.,1]'); else; call writer(f6,s__[.,1]'); endif; clear s__; if k.==krow; format /rd 2,0; i.*100./rows(x__);; "%"; k=0; endif; endo; @ end of smoothing over all observations @ f6=close(f6); clear f6,disti,dists,distx,p,sortvec,svec,uvec,wuvec,wx; clear i,k,x,xs,x_,y_,svec__,svecs,x__,x___,y,ys; clear y__,n3t01p,n3t01x,n3t01y; /* ************** estimated total impact ******************* */ @ read data, sorted in original order @ open f1=c:\data\gse2\gauss\loess\n3t01y.fmt; rw=rowsf(f1); y=readr(f1,rw); @ not scaled, original order @ f1=close(f1); open f6=^dtset6 for read; rw6=rowsf(f6); s=readr(f6,rw6); @ smoother matrix S @ f6=close(f6); clear f6; @ GCV--generalized cross-validation @ cv_=cv(s,y); format /re 10,6; "gcv:" cv_[1]; @ CV--(delete-one) cross-validation @ format /re 10,6; "cv:" cv_[2]; yhat=s*y; @ n x 1 Vektor of level of impact @ create f3=^dtset3 with ^vnames,1,8; call writer(f3,yhat); f3=close(f3); res=y-yhat; @ overall residuals @ create f4=^dtset4 with ^vnames,1,8; call writer(f4,res); f4=close(f4); @ Hastie and Tibshirani (1990, p. 66) @ i1=0; do while i1> up into do-loop @ sigy=reshape(sige,rows(yhat),1); i8=0; do while i81; nextwind; endif; _paxht=.13; @ size characters of axes labels @ _ptitlht=.15; @ size of characters in title @ _pnotify=0; @ no screen activity while writing tkf file @ _plwidth=3; @ _plwidth={1 3 1}; line width @ _pcsel=15; @ white @ _pnum=2; @ controls axes numbering @ _pltype={6}; @ dots etc. @ _plctrl={0 -1}; @ _plctrl={-1 0 -1}; frequency of symbols ; -1: symbols only @ _pstype=7; @ symbol type @ _psymsiz=2.5; @ size of the symbols on the main curve @ fonts("simplex"); ylabel("left-hand side variable"); s_=sortc(x~lb~yhat~ub~y,ig4); @ sort by x[.,ig4] for graph @ xg=s_[.,1:cols(x)]; @ variable on horizontal axis of graph @ @ xg; wait; @ yg=s_[.,(cols(xg)+1):cols(s_)]; @ variable on vertical axis of graph @ scale(xg[.,ig4],yg); xlabel("right-hand side variable"); xy(xg[.,ig4],yg[.,2 4]); @ confidence intervals @ if ki.==1; _plctrl={0 0}; @ for confidence bounds @ _plwidth=1.5; @ 0: thinnes possible line @ ig1=0; do until ig1==rows(xg); ig1=ig1+1; xg1=xg[ig1,.]|xg[ig1,.]; yg1=(yg[ig1,1 3]|yg[ig1,1 3])'; xy(xg1,yg1); endo; endif; endo; endwind; */ /***************************************************************/ "creating ra=(imat-s)*(imat-s)' and diag(ra)"; open f6=^dtset6 for read; rw6=rowsf(f6); l=readr(f6,rw6); @ smoother matrix S @ f6=close(f6); clear f6; diagra=reshape(0,rows(l),1); ih=0; do while ih=n3t01m[1,1]).*(n3t01x[.,2].<=n3t01m[3,1])),1); elseif q1.==3; igm1=sortc(selif(n3t01x[.,1],n3t01x[.,2].>=n3t01m[1,1]),1); endif; if var3.==1; igm2=sortc(selif(n3t01x[.,1],n3t01x[.,3].<=n3t01m[2,2]),1); elseif var3.==2; igm2=sortc(selif(n3t01x[.,1],((n3t01x[.,3]).>=n3t01m[1,2]).*(n3t01x[.,3].<=n3t01m[3,2])),1); elseif var3.==3; igm2=sortc(selif(n3t01x[.,1],n3t01x[.,3].>=n3t01m[1,2]),1); endif; elseif ig.==2; if q1.==1; igm1=sortc(selif(n3t01x[.,2],n3t01x[.,1].<=n3m01m[2,1]),1); elseif q1.==2; igm1=sortc(selif(n3t01x[.,2],((n3t01x[.,1]).>=n3m01m[1,1]).*(n3t01x[.,1].<=n3m01m[3,1])),1); elseif q1.==3; igm1=sortc(selif(n3t01x[.,2],n3t01x[.,1].>=n3m01m[1,1]),1); endif; if var3.==1; igm2=sortc(selif(n3t01x[.,2],n3t01x[.,3].<=n3m01m[2,2]),1); elseif var3.==2; igm2=sortc(selif(n3t01x[.,2],((n3t01x[.,3]).>=n3m01m[1,2]).*(n3t01x[.,3].<=n3m01m[3,2])),1); elseif var3.==3; igm2=sortc(selif(n3t01x[.,2],n3t01x[.,3].>=n3m01m[1,2]),1); endif; endif; igm=sortc(intrsect(igm1,igm2,1),1); @ take intersection of vectors @ if trim_.==1; igmin=igm[4]; @ ignore the ten smallest observations @ igmax=igm[rows(igm)-3]; @ ignore the ten largest observations @ else; igmin=igm[1]; @ do not ignore the ten smallest observations @ igmax=igm[rows(igm)]; @ do not ignore the ten largest observations @ endif; clear igm; @ general principle: ad1: One observation after another is chosen from the set of observations, its vector of explantory variables is altered--taking into account the values that are held constant-- and this observations is then added--as observation n+1--to the set of observations; no value of the dependent variable is necessary because this observation receives a zero value anyway when estimated. ad2: The model is re-estimated completely. The program saves to disk the estimated value and the pertinent confidence bounds. ad3: Although there is one more observation, the program uses the same value of q for smoothing @ z=zeros(rows(n3t01x),4)~n3t01x; @ y_hat, confidence bounds, x; first column: dummy @ i3=0; i4=0; i5=0; @ i4,i5 are counters @ do while i3.>>>>>>>>>>>> Hier konstant zu haltende Werte festlegen <<<<<<<<<<<<<< @ @ pay attention to the columns! @ if ig.==1; x[1,2 3]=med; elseif ig.==2; x[1,1 3]=med; endif; @ >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @ @ routine that eliminates arithmetically impossible or economically meaningless combinations of the variables of the nonparametric component @ i6=0; @ 0: do not use obs; default @ if ig.==1; if (x[1,1].>=igmin).*(x[1,1].<=igmax); i6=1; @ 1: use obs @ endif; elseif ig.==2; if (x[1,2].>=igmin).*(x[1,2].<=igmax); i6=1; @ 1: use obs @ endif; endif; @ ig @ if i6.==0; goto flag1; @ do not use obs @ else; z[i3,1]=1; i5=i5+1; endif; @ use obs @ @ preparation of matrices for smoothing @ x___ =x./(stdc(x)'); @ includes new obsrvation @ svec__=seqa(1,1,rows(y__)); @ for sorting @ @ locally linear fitting oder locally quadratic fitting @ if lin /= 1; @ locally quadratic fitting @ x__=x___~zeros(rows(x___),cols(x___)+cols(x___)*(cols(x___)-1)./2); @ for squares and cross-products @ i=0; k=0; do until i==cols(x___); i=i+1; j=0; do until j==i; j=j+1; k=k+1; x__[.,cols(x___)+k]=x___[.,i].*x___[.,j]; endo; endo; else; x__=x___; @ locally linear fitting @ endif; x__=ones(rows(x__),1)~x__; @ intercept @ /* *************** smoothing: partial impact ****************** */ @ at this point, nothing must be sorted! @ i=0; do while i.<1; @ rows(x__); number of observations to be smoothed @ i=i+1; @ Eucledian distances @ dists=sqrt(sumc((x___'-x___[i,.]').^2)); @ Eucledian distance @ distx=sortc(dists~x__~y__~svec__,1); @ sorted by Eucledian distance @ disti=distx[.,1]; @ distance of i-th vector of obs. to all other vectors of observations @ xs=distx[.,cols(dists)+1:(cols(x__)+1)]; @ sorted regressor matrix @ ys=distx[.,cols(distx)-1]; @ sorted vector y @ svec=distx[.,cols(distx)]; @ vector used for sorting back to orig. order; has to be sorted too @ dx=disti[q,.]; @ distance to (q-1)-th neighbor @ @ dx; wait; @ @ determine the q weights wi @ uvec=disti./dx; @ vector of weights; if program goes down here, then there is not enough variation in the explanatory variables @ @ format 10,6; uvec[1:20,.]; wait; @ wuvec=(1-uvec.^3).^3; @ tricube weight function @ exvec=(uvec .ge 0).*(uvec .lt 1); if sumc(exvec) ne rows(exvec); loc=indexcat(exvec,0); wuvec[loc]=zeros(rows(loc),1); endif; wuvec[1]=0; @ observation itself has zero weight @ clear loc,exvec; @ format /re 10,6; wuvec'; wait; @ @ generate matrix wx (see own notes) @ wx=(wuvec.*xs)'; @ weights, multiplied with regressors; number of rows equals number of regressors @ @ S, be it noted: vector y has a different order for each row of this matrix; thus, sort columns(!) back to original order; rows are appended in original order anyway @ sortvec=svec; @ sved: has same order as cols of this matrix @ s__=sortc(( x__[1,.] *inv(wx*xs)*wx)'~sortvec,2); snew=s__[.,1]'; @ row for smoother matrix x; rows is not being added @ endo; @ do-loop over obs to be smoothed; run to 1 only @ yhat=snew*y__; @ 1 x 1 vector of level of impact @ @ y__ is not sorted; bphat: original bphat @ @ sum of li squared (Cleveland & Devlin, page 599) @ @ sigp=sige*(1+sumc((snew.^2)')); p: "prediction" @ @ break up into do-loop @ sigp=reshape(sige,rows(snew),1); @ f is symmetric @ i9=0; do while i90; n3t01z=selif(z[.,2:cols(z)],exvec); else; "error: no valid observations in conditioning plot - wait"; wait; endif; clear z,exvec; if ig.==1; if q1.==1; n3t01z1=n3t01z; save n3t01z1; elseif q1.==2; n3t01z2=n3t01z; save n3t01z2; elseif q1.==3; n3t01z3=n3t01z; save n3t01z3; endif; elseif ig.==2; if q1.==1; n3m01z1=n3t01z; save n3m01z1; elseif q1.==2; n3m01z2=n3t01z; save n3m01z2; elseif q1.==3; n3m01z3=n3t01z; save n3m01z3; endif; endif; clear n3t01z1,n3t01z2,n3t01z3; clear n3m01z1,n3m01z2,n3m01z3; endo; @ q1 @ endif; @ comp.==1 @ tend=date; hs=ethsec(tstart,tend); "time elapsed before drawing charts: ";; etstr(hs); /* ******************* graph: partial impact ******************** */ if ftest./=1; graphset; begwind; window(3,3,1); xsize=9./3; ysize=6.855./3; makewind(xsize,ysize,0,0,1); setwind(1); if ig.==1; igraph=3; elseif ig.==2; igraph=6; endif; q1=0; do while q11; nextwind; endif; if ig.==1; if q1.==1; open f1=c:\data\gse2\gauss\loess\n3t01z1.fmt; elseif q1.==2; open f1=c:\data\gse2\gauss\loess\n3t01z2.fmt; elseif q1.==3; open f1=c:\data\gse2\gauss\loess\n3t01z3.fmt; endif; elseif ig.==2; if q1.==1; open f1=c:\data\gse2\gauss\loess\n3t01z1.fmt; elseif q1.==2; open f1=c:\data\gse2\gauss\loess\n3t01z2.fmt; elseif q1.==3; open f1=c:\data\gse2\gauss\loess\n3t01z3.fmt; elseif q1.==4; open f1=c:\data\gse2\gauss\loess\n3m01z1.fmt; elseif q1.==5; open f1=c:\data\gse2\gauss\loess\n3m01z2.fmt; elseif q1.==6; open f1=c:\data\gse2\gauss\loess\n3m01z3.fmt; endif; endif; @ ig @ rw=rowsf(f1); yxg=readr(f1,rw); @ in original order @ f1=close(f1); yg=yxg[.,1:3]; @ y_hat and confidence bounds @ xg=yxg[.,4:cols(yxg)]; clear yxg; @ not scaled; original order @ @ sort for chart @ if ig.==1; s__=sortc(xg~yg,ig); xg1=s__[.,ig]; elseif ig.==2; if q1.<=3; s__=sortc(xg~yg,ig-1 ); xg1=s__[.,ig-1]; elseif q1.>3; s__=sortc(xg~yg,ig); xg1=s__[.,ig]; endif; endif; yg1=s__[.,(cols(xg)+1):cols(s__)]; clear s__; _pltype={3 6 3}; @ line type @ _pcolor={8 8 8}; @ line color @ _pframe={1,1}; _pmcolor=8; _paxht=.2; @ size characters of axes labels @ _ptitlht=.2; @ size of characters in title @ _pnotify=0; @ no screen activity while writing tkf file @ _plwidth={5 5 5}; @ _plwidth={1 3 1}; line width @ _pcsel=15; @ white @ _pnum=2; @ controls axes numbering @ _pnumht=0.2; @ size of axis numbering @ _plctrl={0 0 0}; @ frequency of symbols ; -1: symbols only @ _pstype=1; @ 1: circle; symbol type @ _psymsiz=2; @ size of the symbols on the main curve @ _ptitle=0; @ 0: no title @ _pxlabel=0; @ no y label @ _pylabel=0; @ no x label @ fonts("simplex"); if ig.==1; if q1.==1; title("Change of Term Spread at Negative Median"); elseif q1.==2; title("Change of Term Spread at Zero"); elseif q1.==3; title("Change of Term Spread at Positive Median"); endif; elseif ig.==2; if q1.==1; title("\201 Change of Term Spread at Negative Median"); elseif q1.==2; title("\201 Change of Term Spread at Zero"); elseif q1.==3; title("\201 Change of Term Spread at Positive Median"); elseif q1.==4; title("\201 Change of Short Rate at Negative Median"); elseif q1.==5; title("\201 Change of Short Rate at Zero"); elseif q1.==6; title("\201 Change of Short Rate at Positive Median"); endif; endif; ylabel("Fannie Mae's Log Excess Return"); if ig.==1; xlabel("\201 Change of Short Rate"); elseif ig.==2; if q1.<=3; xlabel("\201 Change of Short Rate"); elseif q1.>3; xlabel("\201 Change of Term Spread"); endif; endif; scale(xg1,yg1); if ff.==1; if ig.==1; @ xtics(-0.006,0.006,0.003,1); ytics(-0.12,0.04,0.02,2); @ os6=-0.055; os4=-0.108; xh=xg1; elseif ig.==2; if q1.<=3; xtics(-0.006,0.006,0.003,3); ytics(-0.08,0.04,0.02,2); os6=-0.0061; os4=-0.075; if review.==1; xtics(-0.009,0.006,0.003,3); ytics(-0.14,0.04,0.02,2); os6=-0.081; os4=-0.13; endif; xh=xg1; else; xtics(-0.006,0.006,0.003,3); ytics(-0.08,0.04,0.02,2); os6=-0.0061; os4=-0.075; if review.==1; xtics(-0.009,0.006,0.003,3); ytics(-0.14,0.04,0.02,2); os6=-0.081; os4=-0.13; endif; xh=xg1; endif; endif; elseif ff.==2; if ig.==1; @ xtics(-0.006,0.006,0.003,1); ytics(-0.12,0.04,0.02,2); @ os6=-0.055; os4=-0.108; xh=xg1; elseif ig.==2; if q1.<=3; @ xtics(-0.006,0.006,0.003,1); ytics(-0.12,0.04,0.04,2); @ os6=-0.005; os4=-0.108; xh=xg1; else; @ xtics(-0.008,0.006,0.004,1); ytics(-0.16,0.04,0.04,2); @ os6=-0.135; os4=-0.1475; xh=xg1; endif; endif; endif; os=ones(rows(yg1),1); e1=unique(xg1,1); os6=zeros(rows(xg1),1); i2=0; do while i2=1; flag2_: bbindex=ceil(rndu(n,1).*n); @Set up re-sampling index@ if minc(bbindex).==0; goto flag2_; endif; x_bb=x_b[bbindex,.]; @Defines re-sample b vector of Xi*'s@ var_bb=sumc(x_bb)./n; varbb_bt[bb,1]=var_bb; @Setting up vector of bootstrapped var*'s@ bb=bb-1; endo; @end of double bootstrap loop@ sd_v_b=stdc(varbb_bt); @Calculating the SD of var for a re-sample@ var_bt[i,1]=var_b; @Setting up vector of bootstrapped var*'s@ sd_v_bt[i,1]=sd_v_b; @Setting up vector of SD's of var*'s@ @Setting up for BCa zit@ if var_b <= bias_var; prop=prop+1/b; endif; i=i+1; endo; @end of resampling loop@ @Constructing parametric confidence interval around the _unbiased_ variance (for comparison), @ @ assuming normality for x, using chi-square value p=.025(two-tailed) df=n-1 @ chi_low=invcfchi(0.025,b-1); chi_up=invcfchi(0.975,b-1); unpmlcl=((n-1).*un_var_x)./chi_low; unpmucl=((n-1).*un_var_x)./chi_up; @Constructing parametric confidence interval, assuming normality for x, using chi-square value p=.025(two-tailed) df=n-1 @ pmlcl=((n-1).*bias_var)./chi_low; pmucl=((n-1).*bias_var)./chi_up; @Constructing normal approximation confidence interval @ tcal=cdfni(0.975); nalcl=bias_var-(tcal.*stdc(var_bt)); naucl=bias_var+(tcal.*stdc(var_bt)); @Constructing percentile confidence interval @ var_bt=var_bt~sd_v_bt; @Attach var*'s and their sd's for sorting together (needed for percentile-t)@ var_bt=sorthc(var_bt,1); @Sort x_bar*'s vector @ @Defining the CI percentile points for alpha = 0.05; see Efron and Tibshirani (1993), p. 160 @ ll=floor((b+1).*0.025); ul=b+1-ll; if ll.==0 or ul.>b; "increase the re-sampling number--wait"; wait; endif; @Defining lower and upper percentile CI endpoints@ perlcl=var_bt[ll,1]; perucl=var_bt[ul,1]; @Constructing percentile-t confidence interval@ @Standardize the var*'s using the SD from each re-sample and the full sample mean@ t_boot=(var_bt[.,1]-bias_var)./var_bt[.,2]; t_boot=sorthc(t_boot,1); @Sort the vector of t_boot@ @Define percentile-t endpoints. NOTE: Subtract t_boot conversion of opposite end from both limits@ prtlcl=bias_var-(t_boot[ul].*stdc(var_bt[.,1]) ); prtucl=bias_var-(t_boot[ll].*stdc(var_bt[.,1]) ); @Constructing BCa confidence interval@ @Step #1: Calculate the acceleration factor, a @ @Jackknifing for BCa; FAS: correct @ do while j<=n; xjack=delif(x,seqa(1,1,rows(x)).==j); jck_var=sumc(xjack)./(n-1); @Calculate the bias_var of xjack)@ var_jack[j,1]=jck_var; @Placing the var(i) into the jackknifed x-bar vector@ j=j+1; @Increasing jackknife index@ endo; @end jackknifing loop@ @Calculating acceleration factor, a, from the jackknifed vector of var's; FAS: correct @ a_num=sumc((var_jack-meanc(var_jack)).^3); @Numerator for acceleration factor@ @Calculate the denominator for a, setting format for small numbers@ a_den=6.*((sumc((var_jack-meanc(var_jack)).^2)).^(2./3)); format /rd 2,9; if a_den == 0; @To assure division by 0 doesn`t occur@ a=0; else; a=a_num/a_den; @Calculate a @ endif; @Step #2: Calculate z0; FAS: correct @ @Making sure prop =/ 0 or 1 (so zit =/ infinity)@ if prop.==0; prop=0.0001; elseif prop.==1; prop=0.9999; endif; zit=cdfni(prop); @ inverse of normal cdf @ @Step #3: Construct the BCa confidence interval; FAS: correct @ @Define the lower and upper BCa .05 percentile points@ z=cdfni(0.05); @ normal distribution; 0.5 percent per tail @ bc_u_per=cdfn(zit+((zit-z)./(1-(a*(zit-z))))); @ Hastie & Tibshirani, eq. (14.10) @ bc_l_per=cdfn(zit+((zit+z)./(1-(a*(zit+z))))); @ Hastie & Tibshirani, eq. (14.10) @ @Calculate the bootstrap vector case number for the lower and upper BCa bounds; FAS: correct @ l_bca = round(b.*bc_l_per); @Lower endpoint case number@ if l_bca.==0; @To avoid case #0 being choosen@ l_bca.==1; endif; u_bca = round(b.*bc_u_per); @Upper endpoint case number@ if u_bca > b; @To avoid case #(b+1) being choosen@ u_bca=b; endif; bcalcl = var_bt[l_bca,1]; @Lower BCa CI endpoint@ bcaucl = var_bt[u_bca,1]; @Upper BCa CI endpoint@ @Conducting test of normality on x @ {b1,b2,wb,rw}=jarqbera(x,n); @Conducting test of normality on bootstrapped var* vector@ {sk,ku,w,p_w}=jarqbera(var_bt[.,1],b); print; print "Full sample bias_var value = " bias_var; format /re 2,5; print; @Leaves space between output lines@ print "Bootstrap estimate of bias_var =" meanc(var_bt[.,1]); print "Bootstrapped full sample SD(bias_var) = " stdc(var_bt[.,1]); print; print "Parametric CI (biased var)= " pmlcl pmucl; print "Bootstrap CI's using the biased var:"; print "Normal approx CI = " nalcl naucl; print "Percentile CI =" perlcl perucl; print "Percentile-t CI =" prtlcl prtucl; print "BCa CI=" bcalcl bcaucl; print; print "Normality tests on x "; print "Skew of x = " b1; print "Kurtosis of x = " b2; print "Jarque-Bera omnibus test statistic = " wb; print "Probability of normality = " rw; print; print "Normality tests on vector of var*'s:"; print "Skew of var* =" sk; print "Kurtosis of var* =" ku; print "Jarque-Bera omnibus test statistic =" w; print "Probablity of normality =" p_w; print; print "Minutes elapsed = " (hsec-t_)/6000; /* ************************** end of bootstrapping ************************* */ endif; @ boot @ clear x,resn,resa,g1,g2,p1,f,df1,df2,d1,d2,v1,v2,diagra,diagrn,y,cv_,l; endo; @ il @ /* %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% */ "F-Tests on significance of variables in nonparametric part"; i7=0; do while i7<2; @ across the variables in the nonparametric part @ i7=i7+1; /* *************** prepare matrices for smoothing ****************** */ open f1=c:\data\gse2\gauss\loess\n3t01q.fmt; rw=rowsf(f1); q=readr(f1,rw); @ original order; not scaled @ f1=close(f1); open f1=c:\data\gse2\gauss\loess\n3t01x.fmt; rw=rowsf(f1); x=readr(f1,rw); @ original order; not scaled @ f1=close(f1); open f1=c:\data\gse2\gauss\loess\n3t01y.fmt; rw=rowsf(f1); y=readr(f1,rw); @ original order; not scaled @ f1=close(f1); if i7.==1; "F-test on significance of first variable"; x=x[.,2 3]; elseif i7.==2; "F-test on significance of second variable"; x=x[.,1 3]; endif; y__=y; x___=x./(stdc(x)'); @ scale; see Cleveland & Devlin @ svec__=seqa(1,1,rows(y__)); @ vector which gives the original order of the obs. @ @ locally linear fitting oder locally quadratic fitting @ if lin./=1; @ locally quadratic fitting @ x__=x___~zeros(rows(x___),cols(x___)+cols(x___)*(cols(x___)-1)./2); @ last couple of cols: quadratic terms & cross-products @ i=0; k=0; do until i.==cols(x___); i=i+1; j=0; do until j.==i; j=j+1; k=k+1; x__[.,cols(x___)+k]=x___[.,i].*x___[.,j]; endo; endo; else; x__=x___; @ locally linear fitting @ endif; x__=ones(rows(x__),1)~x__; @ intercept @ clear x,y; "created x__"; /* *************** smoothing: total impact ****************** */ @ variables have not beeen sorted yet @ "smoothing"; i=0; k=0; do until i==rows(x__); @ number of obs. to be smoothed over @ i=i+1; @ Eucledian distances @ dists=sqrt(sumc((x___'-x___[i,.]').^2)); @ Eucledian distance @ distx=sortc(dists~x__~y__~svec__,1); @ sorted by Eucledian distance @ disti=distx[.,1]; @ distance of i-th vector of obs. to all other vectors of observations @ xs=distx[.,cols(dists)+1:(cols(x__)+1)]; @ sorted regressor matrix @ ys=distx[.,cols(distx)-1]; @ sorted vector y @ svec=distx[.,cols(distx)]; @ vector used for sorting back to orig. order; has to be sorted too @ dx=disti[q,.]; @ distance to (q-1)-th neighbor @ @ dx; wait; @ @ determine the q weights wi @ uvec=disti./dx; @ vector of weights; if program goes down here, then there is not enough variation in the explanatory variables @ @ format 10,6; uvec[1:20,.]; wait; @ wuvec=(1-uvec.^3).^3; @ tricube weight function @ exvec=(uvec .ge 0).*(uvec .lt 1); if sumc(exvec) ne rows(exvec); loc=indexcat(exvec,0); wuvec[loc]=zeros(rows(loc),1); endif; clear loc,exvec; @ format /re 10,6; wuvec'; wait; @ @ generate matrix wx (see own notes) @ wx=(wuvec.*xs)'; @ weights, multiplied with regressors @ @ number of rows equals number of regressors @ wbh=inv(wx*xs)*wx*ys; @ S, be it noted: vector yd has a different order for each row of this matrix; thus, sort columns(!) back to original order; rows are appended in original order anyway @ sortvec=svec; @ svec: has same order as columns of this matrix @ s__ =sortc((xs[1,.]*inv(wx*xs)*wx)'~sortvec,2); if i==1; create f6=^dtset66 with ^vnames,cols(s__[.,1]'),8; call writer(f6,s__[.,1]'); f6=close(f6); elseif i==2; open f6=^dtset66 for append; @ keep file open @ call writer(f6,s__[.,1]'); else; call writer(f6,s__[.,1]'); endif; clear s__; if k.==krow; format /rd 2,0; i.*100./rows(x__);; "%"; k=0; endif; endo; @ end of smoothing over all observations @ f6=close(f6); clear disti,dists,distx,p,sortvec,svec,uvec,wuvec,wx; clear i,k,x,xs,x_,y_,svec__,svecs,x__,x___,y,ys; clear y__,n3t01p,n3t01x,n3t01y; /* ************** estimated total impact ******************* */ @ read data, sorted in original order @ open f1=c:\data\gse2\gauss\loess\n3t01y.fmt; rw=rowsf(f1); y=readr(f1,rw); @ not scaled, original order @ f1=close(f1); open f6=^dtset66 for read; rw6=rowsf(f6); s=readr(f6,rw6); @ smoother matrix S @ f6=close(f6); clear f6; yhat=s*y; @ n x 1 Vektor of level of impact @ create f3=^dtset33 with ^vnames,1,8; call writer(f3,yhat); f3=close(f3); res=y-yhat; @ overall residuals @ create f4=^dtset44 with ^vnames,1,8; call writer(f4,res); f4=close(f4); i1=0; do while i1=1; flag2__: bbindex=ceil(rndu(n,1).*n); @Set up re-sampling index@ if minc(bbindex).==0; goto flag2__; endif; x_bb=x_b[bbindex,.]; @Defines re-sample b vector of Xi*'s@ var_bb=sumc(x_bb)./n; varbb_bt[bb,1]=var_bb; @Setting up vector of bootstrapped var*'s@ bb=bb-1; endo; @end of double bootstrap loop@ sd_v_b=stdc(varbb_bt); @Calculating the SD of var for a re-sample@ var_bt[i,1]=var_b; @Setting up vector of bootstrapped var*'s@ sd_v_bt[i,1]=sd_v_b; @Setting up vector of SD's of var*'s@ @Setting up for BCa zit@ if var_b <= bias_var; prop=prop+1/b; endif; i=i+1; endo; @end of resampling loop@ @Constructing parametric confidence interval around the _unbiased_ variance (for comparison), @ @ assuming normality for x, using chi-square value p=.025(two-tailed) df=n-1 @ chi_low=invcfchi(0.025,b-1); chi_up=invcfchi(0.975,b-1); unpmlcl=((n-1).*un_var_x)./chi_low; unpmucl=((n-1).*un_var_x)./chi_up; @Constructing parametric confidence interval, assuming normality for x, using chi-square value p=.025(two-tailed) df=n-1 @ pmlcl=((n-1).*bias_var)./chi_low; pmucl=((n-1).*bias_var)./chi_up; @Constructing normal approximation confidence interval @ tcal=cdfni(0.975); nalcl=bias_var-(tcal.*stdc(var_bt)); naucl=bias_var+(tcal.*stdc(var_bt)); @Constructing percentile confidence interval @ var_bt=var_bt~sd_v_bt; @Attach var*'s and their sd's for sorting together (needed for percentile-t)@ var_bt=sorthc(var_bt,1); @Sort x_bar*'s vector @ @Defining the CI percentile points for alpha = 0.05; see Efron and Tibshirani (1993), p. 160 @ ll=floor((b+1).*0.025); ul=b+1-ll; if ll.==0 or ul.>b; "increase the re-sampling number--wait"; wait; endif; @Defining lower and upper percentile CI endpoints@ perlcl=var_bt[ll,1]; perucl=var_bt[ul,1]; @Constructing percentile-t confidence interval@ @Standardize the var*'s using the SD from each re-sample and the full sample mean@ t_boot=(var_bt[.,1]-bias_var)./var_bt[.,2]; t_boot=sorthc(t_boot,1); @Sort the vector of t_boot@ @Define percentile-t endpoints. NOTE: Subtract t_boot conversion of opposite end from both limits@ prtlcl=bias_var-(t_boot[ul].*stdc(var_bt[.,1]) ); prtucl=bias_var-(t_boot[ll].*stdc(var_bt[.,1]) ); @Constructing BCa confidence interval@ @Step #1: Calculate the acceleration factor, a @ @Jackknifing for BCa; FAS: correct @ do while j<=n; xjack=delif(x,seqa(1,1,rows(x)).==j); jck_var=sumc(xjack)./(n-1); @Calculate the bias_var of xjack)@ var_jack[j,1]=jck_var; @Placing the var(i) into the jackknifed x-bar vector@ j=j+1; @Increasing jackknife index@ endo; @end jackknifing loop@ @Calculating acceleration factor, a, from the jackknifed vector of var's; FAS: correct @ a_num=sumc((var_jack-meanc(var_jack)).^3); @Numerator for acceleration factor@ @Calculate the denominator for a, setting format for small numbers@ a_den=6.*((sumc((var_jack-meanc(var_jack)).^2)).^(2./3)); format /rd 2,9; if a_den == 0; @To assure division by 0 doesn`t occur@ a=0; else; a=a_num/a_den; @Calculate a @ endif; @Step #2: Calculate z0; FAS: correct @ @Making sure prop =/ 0 or 1 (so zit =/ infinity)@ if prop.==0; prop=0.0001; elseif prop.==1; prop=0.9999; endif; zit=cdfni(prop); @ inverse of normal cdf @ @Step #3: Construct the BCa confidence interval; FAS: correct @ @Define the lower and upper BCa .05 percentile points@ z=cdfni(0.05); @ normal distribution; 0.5 percent per tail @ bc_u_per=cdfn(zit+((zit-z)./(1-(a*(zit-z))))); @ Hastie & Tibshirani, eq. (14.10) @ bc_l_per=cdfn(zit+((zit+z)./(1-(a*(zit+z))))); @ Hastie & Tibshirani, eq. (14.10) @ @Calculate the bootstrap vector case number for the lower and upper BCa bounds; FAS: correct @ l_bca = round(b.*bc_l_per); @Lower endpoint case number@ if l_bca.==0; @To avoid case #0 being choosen@ l_bca.==1; endif; u_bca = round(b.*bc_u_per); @Upper endpoint case number@ if u_bca > b; @To avoid case #(b+1) being choosen@ u_bca=b; endif; bcalcl = var_bt[l_bca,1]; @Lower BCa CI endpoint@ bcaucl = var_bt[u_bca,1]; @Upper BCa CI endpoint@ @Conducting test of normality on x @ {b1,b2,wb,rw}=jarqbera(x,n); @Conducting test of normality on bootstrapped var* vector@ {sk,ku,w,p_w}=jarqbera(var_bt[.,1],b); print; print "Full sample bias_var value = " bias_var; format /re 2,5; print; @Leaves space between output lines@ print "Bootstrap estimate of bias_var =" meanc(var_bt[.,1]); print "Bootstrapped full sample SD(bias_var) = " stdc(var_bt[.,1]); print; print "Parametric CI (biased var)= " pmlcl pmucl; print "Bootstrap CI's using the biased var:"; print "Normal approx CI = " nalcl naucl; print "Percentile CI =" perlcl perucl; print "Percentile-t CI =" prtlcl prtucl; print "BCa CI=" bcalcl bcaucl; print; print "Normality tests on x "; print "Skew of x = " b1; print "Kurtosis of x = " b2; print "Jarque-Bera omnibus test statistic = " wb; print "Probability of normality = " rw; print; print "Normality tests on vector of var*'s:"; print "Skew of var* =" sk; print "Kurtosis of var* =" ku; print "Jarque-Bera omnibus test statistic =" w; print "Probablity of normality =" p_w; print; print "Minutes elapsed = " (hsec-t_)/6000; /* ************************** end of bootstrapping ************************* */ endif; @ boot @ clear x,resn,resa,g1,g2,p1,f,df1,df2,d1,d2,v1,v2,diagra,diagrn,y,cv_,l; endo; @ i7 @ endif; @ ftest.==1 @ closeall; "n3t01.prg finished gracefully"; closeall; @ -------------------- procedures --------------------- @ @ calculated first, second, and third quartiles @ proc quart(x); local r,y,i,med,quart1,quart3; @ ---- first quartile ----- @ i=0; do until i==cols(x); i=i+1; y=sortc(x,i); @ lowest number on top @ r=0.25.*(rows(y)+2); if i.==1; quart1= meanc(y[floor(r),i]|y[ceil(r),i]); else; quart1=quart1|meanc(y[floor(r),i]|y[ceil(r),i]); endif; endo; @ ---- median ---- @ r=rows(x); i=0; do until i==cols(x); i=i+1; y=sortc(x,i); @ lowest numer on top @ r=0.5.*(rows(y)+1); if i.==1; med= meanc(y[floor(r),i]|y[ceil(r),i]); else; med=med|meanc(y[floor(r),i]|y[ceil(r),i]); endif; endo; @ ---- third quartile ---- @ i=0; do until i==cols(x); i=i+1; y=sortc(x,i); @ lowest number on top @ r=0.75.*(rows(y)+2); if i.==1; quart3= meanc(y[floor(r),i]|y[ceil(r),i]); else; quart3=quart3|meanc(y[floor(r),i]|y[ceil(r),i]); endif; endo; retp(quart1|med|quart3); endp; @ --------------------------------------------------------------------- @ proc cv(l,y); local res,gcv_,cv_; @ GCV--generalized cross-validation @ res=y-l*y; gcv_=(res'res)./(rows(res).*(1-sumc(diag(l))./rows(res)).^2); @ CV--(delete-one) cross-validation @ res=y-diagrv(l,zeros(rows(l),1))*y; cv_=(res'res)./(rows(res)-1); retp(gcv_|cv_); endp; @Defining the proc for Jarque-Bera normality testing@ proc(4) = jarqbera(vartest,n1) ; local m5, m2, m3, xbar,sskew, skew, y, b2skew, w, var,m1, m4, kurt, prob_w; skew = 1; kurt = 1; m1 = (vartest)-meanc(vartest); m3 = sumc(m1^3)/n1; m4 = sumc(m1^4)/n1; var=(stdc(m1))^2; sskew = (m3/(var^1.5)) ; skew=sskew^2; kurt = m4/((var)^2) ; @Calculating the Bera-Jarque W, omnibus test for normality@ w = n1*(((skew)/6)+(((kurt-3)^2)/24)); prob_w = cdfchic(w,2); retp (sskew,kurt,w,prob_w) ; endp ; /* INVCFCHI - Inverse of the Chi squared Cumulative Distribution function ** with n1 degrees of freedom ** ** Usage: x = invcfchi(p,n) ** ** Input: P - matrix of percentage points ( 0 < p < 1 ) ** N - matrix of degrees of freedom (N > 0) (conformable with X) ** ** Output: X - matrix of critical values st Pr(x < X) = P and x ~ Chi(n) */ proc invcfchi(p,n) ; local converge,k,tol,t,d,z,x0,x1,f0,df0 ; if not(p > 0 and p < 1) ; errorlog "ERROR: INVCFCHI - P not in (0,1) " ; retp(error(99)) ; endif ; if not(n > 0) ; errorlog "ERROR: INVCFCHI - N is not positive" ; retp(error(99)) ; endif ; tol = 1e-6.*sqrt(n) ; clear converge,k ; /*Use Wilson-Hilferty approximation as initial value*/ t = sqrt(-2*ln(abs((p.>0.5) - p))); z = 2.515517 + t.*(0.802853 + t.*0.010328) ; d = 1 + t.*(1.432788 + t.*(0.189269 + t.*0.001308)) ; z = t - (z./d) ; z = (p.>0.5).*z - (p.<=0.5).*z ; d = 2./(9.*n) ; x0 = n.*(z.*sqrt(d) + 1 - d)^3 ; x0 = x0 + (0.1 - x0).*(x0 .<= 0) ; p = 1 - p ; do until(converge or k > 50) ; f0 = cdfchic(x0,n) ; df0 = missrv(miss(pdfchi(x0,n),0),tol) ; x1 = (x0 - (p-f0)./df0) ; x1 = x1 + (tol - x1).*(x1.<=0) ; converge = abs(x0 - x1) < tol ; x0 = x1 ; k = k + 1 ; endo ; if not converge ; errorlog "Warning: INVCDFF has not converged " ; endif ; ndpclex ; retp(x0) ; endp ; /* PDFCHI - Chi squared Probability density function with n1 degrees ** of freedom ** ** Usage: D = pdfchi(x,n) ** ** Input: X - matrix of chi-squared values (X > 0) ** N - matrix of degrees of freedom (N > 0) (conformable with X) ** ** Output: D - matrix of density of Chi square(n) at X */ proc pdfchi(x,n) ; if not(n > 0) ; errorlog "ERROR: PDFCHI - N is not positive" ; retp(error(99)) ; endif ; if not(x > 0) ; errorlog "ERROR: PDFCHI - X is not positive" ; retp(error(99)) ; endif ; if n < 100 ; retp(exp(-n.*ln(2)./2-ln(gamma(n./2))+(n./2-1).*ln(x)-x./2)) ; else ; retp(exp(-n.*ln(2)./2-lnfact(n./2-1)+(n./2-1).*ln(x)-x./2)) ; endif ; endp ; @ PRODUCES THE M-plots USED IN SCHMID (2005) @ @ @ @ @ @ @ @ @ @ May 19, 2004 @ @ M-plots; nonparametric (Cleveland) and semi-parametric (Speckman) approaches @ @ @ @ @ @ @ @ @ new; #lineson; tstart=date; library pgraph; save path=c:\data\gse2\gauss\loess; ff=1; @ 1: Fannie Mae; 2: Freddie Mac @ comp=1; @ 1: do the calculations; 0: no calc.; graph partial impact @ semi=0; @ do semi-parametric @ olsi=1; @ do OLS @ lin=0; @ lin=1: locally linear fitting; otherwise: locally quadratic fitting @ krow=250; @ do 250 rows at a time in do-loop--for display only @ dtset1="c:\\data\\gse2\\gauss\\wklyapril"; @ GSE data @ "GAUSS time ";; timestr(0);; " date (US)";; datestr(0); ?;"input file: mplot01.prg";?; if comp.==1; output file=c:\prg\gse2\mplot01.out reset; dos del c:\data\gse2\gauss\loess\mplot01.*; dos del c:\data\gse2\gauss\loess\cv01.*; fs=0.25; @ smoothing parameter @ fsi=0; @ counter @ do while fs<1; fs=fs+0.05; @ smoothing parameter @ fsi=fsi+1; @ counter @ ?; format /rd 10,2; "smoothing parameter:" fs; @ read the data @ open f1=^dtset1 for read; rw1=rowsf(f1); dat=readr(f1,rw1); f1=close(f1); clear f1; dat=dat~seqa(1,1,rows(dat)); @ time index @ @-----------------------------------------------------------@ format /rd 10,0; "number of observations in dataset:" rows(dat); @ define variables @ @ left-hand side variable @ if ismiss(dat[.,3]).==1; dat[.,3]=missrv(dat[.,3],minc(dat[.,3])-1); dat=delif(dat,dat[.,3].==minc(dat[.,3])); endif; if ismiss(dat[.,6]).==1; dat[.,6]=missrv(dat[.,6],minc(dat[.,6])-1); dat=delif(dat,dat[.,6].==minc(dat[.,6])); endif; if ismiss(dat[.,14]).==1; dat[.,14]=missrv(dat[.,14],minc(dat[.,14])-1); dat=delif(dat,dat[.,14].==minc(dat[.,14])); endif; if ismiss(dat[.,15]).==1; dat[.,15]=missrv(dat[.,15],minc(dat[.,15])-1); dat=delif(dat,dat[.,15].==minc(dat[.,15])); endif; if ff.==1; @ 1: Fannie Mae @ if ismiss(dat[.,5]).==1; dat[.,5]=missrv(dat[.,5],minc(dat[.,5])-1); dat=delif(dat,dat[.,5].==minc(dat[.,5])); endif; y=dat[.,5]-ln(1+(dat[.,3]./36000).*7); @ Log excess return of Fannie Mae @ elseif ff.==2; @ 2: Freddie Mac @ if ismiss(dat[.,12]).==1; dat[.,12]=missrv(dat[.,12],minc(dat[.,12])-1); dat=delif(dat,dat[.,12].==minc(dat[.,12])); endif; y=dat[.,12]-ln(1+(dat[.,3]./36000).*7); @ Log excess return of Fannie Mae @ endif; x= dat[.,6]-ln(1+(dat[.,3]./36000).*7) @ Log excess market return @ ~(dat[.,14]-dat[.,15])./1e2 @ change in yield spread @ ~(dat[.,15])./1e2 @ change in 3-month yield @ ~dat[.,cols(dat)]-minc(dat[.,cols(dat)])+1; @ time index, starting on 1 @ format /rd 10,0; "number of obsevations analzyed:" rows(dat); clear dat; @ descriptive statistics @ format /rd 6,4; @ minc(exp(y))~median(exp(y))~meanc(exp(y))~maxc(exp(y))~(stdc(exp(y)).*sqrt(rows(y)-1)./sqrt(rows(y))); @ minc((y))~median((y))~meanc((y))~maxc((y))~(stdc((y)).*sqrt(rows(y)-1)./sqrt(rows(y))); /* ************************** core program ************************* */ mplot01x=x; save mplot01x; @ in original order; not normalized @ /* mplot01p=p; save mplot01p; @ in original order; not normalized @ do not use it here */ mplot01y=y; save mplot01y; @ in original order; not normalized @ /* ************** nonparametric estimation: total imapct ****************** */ @ this is nonparametric, not semi-parametric @ y__=y; x___=x./(stdc(x)'); @ normieren gem„á Cleveland & Devlin @ svec__=seqa(1,1,rows(y__)); @ dieser Vektor l„uft mit und gibt jeweils die Reihenfolge der Beobachtungen an @ q=int(fs*rows(x)); @ smoothing parameter; nearest integer @ format /rd 10,2; "smoothing parameter ~ number of observations chosen:" fs~q; @ Euklidische Distanzen @ @ Die Matrix der euklidischen Distanzen ist symmetrisch zur Hauptdiagonalen und quadratisch @ dist=zeros(rows(x___),rows(x___)); @ rows(x___): Zahl der Beobachtungen @ i=0; do until i.==rows(x___); i=i+1; j=0; do until j.==rows(x___); j=j+1; if i/=j; dist[i,j]=sqrt(sumc((x___[i,.]'-x___[j,.]').^2)); @ euklidische Distanz @ endif; endo; endo; @ locally linear fitting oder locally quadratic fitting @ if lin /= 1; @ locally quadratic fitting @ x__=x___~zeros(rows(x___),cols(x___)+cols(x___)*(cols(x___)-1)./2); @ letzteres fr Quadrate & cross-products @ i=0; k=0; do until i.==cols(x___); i=i+1; j=0; do until j.==i; j=j+1; k=k+1; x__[.,cols(x___)+k]=x___[.,i].*x___[.,j]; endo; endo; else; x__=x___; @ locally linear fitting @ endif; x__=ones(rows(x__),1)~x__; @ intercept @ @ zu fllende Matrix @ mplot01s=zeros(rows(x__),rows(x__)); @ Smoother-Matrix S @ @ smoothing: total impact @ @ bisher darf nicht sortiert worden sein! @ i=0; do until i.==rows(x__); @ Zahl der zu smoothenden Beobachtungen @ i=i+1; dists=dist[.,i]; svecs=svec__; xs=x__; ys=y__; @ unsortiert @ distx=sortc(dists~xs~ys~svecs,1); @ nach eukl. Distanz sortieren @ disti=distx[.,1]; @ Distanzen des i-ten Beobachtungsvektors zu allen anderen Beobachtungsvektoren @ x =distx[.,2:(cols(xs)+1)]; @ sortierte Regressormatrix @ y =distx[.,cols(distx)-1]; @ sortierter Vektor y @ svec=distx[.,cols(distx)]; @ Sortiervektor; l„uft mit @ dx=disti[q,.]; @ Distanz zum "q-1"-ten Nachbarn @ @ dx; wait; @ @ Bestimmung der q Gewichte wi @ uvec=disti./dx; @ Vektor der Gewichte; falls Programm hier abstrzt: nicht genug Variation in den erkl„renden Variablen @ @ format 10,6; uvec[1:20,.]; wait; @ wuvec=(1-uvec.^3).^3; @ tricube weight function @ i1=0; do until i1.==rows(uvec); i1=i1+1; if uvec[i1,.] < 0 or uvec[i1,.] ge 1; wuvec[i1,.]=0; endif; endo; @ Herstellen der Matrix wx (vgl. Notizen) @ wx=(wuvec.*x)'; @ Gewichte, multipliziert mit den Regressoren @ @ Zahl der Zeilen gleich Zahl der Regressoren @ wbh=inv(wx*x)*wx*y; @ Problem bei S: Vektor y hat fr jede Zeile dieser Matrix eine andere Reihenfolge, deshalb: Spalten (!) wieder in ursprgliche Reihenfolge bringen; Zeilen werden sowieso in ursprngl. Reihen- folge aneinander gefgt @ sortvec=svec; @ sved: hat jeweils gleiche Reihenfolge wie Spalten dieser Matrix @ s__ =sortc((x[1,.]*inv(wx*x)*wx)'~sortvec,2); mplot01s[i,.]=s__[.,1]'; @ Zeile fr Smoother-Matrix S @ endo; @ Ende des smoothens ber alle Beobachtungen @ save mplot01s; @ estimated impact: total @ @ starting from here, the program is self-sustained @ open f1=c:\data\gse2\gauss\loess\mplot01s.fmt; rw=rowsf(f1); s=readr(f1,rw); @ Smoother-Matrix S, vollst. in ursprngl. Reienfolge @ f1=close(f1); open f1=c:\data\gse2\gauss\loess\mplot01x.fmt; rw=rowsf(f1); x=readr(f1,rw); @ ohne Normierung, in ursprngl. Reihenfolge @ f1=close(f1); open f1=c:\data\gse2\gauss\loess\mplot01y.fmt; rw=rowsf(f1); y=readr(f1,rw); @ y, in ursprnglicher Reihenfolge @ f1=close(f1); @ GCV--generalized cross-validation @ cv_=cv(s,y); format /re 10,6; "gcv:" cv_[1]; @ CV--(delete-one) cross-validation @ format /re 10,6; "cv:" cv_[2]; if fsi.==1; cv01ncv= fs~cv_'; else; cv01ncv=(fs~cv_')|cv01ncv; endif; save cv01ncv; @--------------------@ imat=eye(rows(y)); @ identity matrix @ yhat=s*y; @ n x 1 Vektor of level of impact @ res=y-yhat; @ overall residuals @ rn=(imat-s)*(imat-s)'; sige=(res'res)./sumc(diag(rn)); @ estimator for variance of residuals @ if fsi.==1; sigs=sige; mplot01c=sigs; @ estimator for variance for smallest smoothing parameter @ ra=rn; mplot01ra=ra; @ Matrix RA in Cleveland and Devlin (1988); for smallest smooth. param. @ save mplot01c, mplot01ra; else; open f1=c:\data\gse2\gauss\loess\mplot01c.fmt; rw=rowsf(f1); sigs=readr(f1,rw); @ estimated variance of residuals for smallest smoothing paramter @ f1=close(f1); open f1=c:\data\gse2\gauss\loess\mplot01ra.fmt; rw=rowsf(f1); ra=readr(f1,rw); @ Matrix RA; for smallest smoothing parameter @ f1=close(f1); endif; if fsi ne 1; diagra=diag(ra); diagrn=diag(rn); d1=sumc(diagra); d2=sumc(diag(ra*ra)); v1=sumc(diagrn-diagra); v2=sumc(diag((rn-ra)*(rn-ra))); df1=((v1.^2)./v2); df2=((d1.^2)./d2); fb=invcdff(0.05|0.95,df1|df1,df2|df2); flb=fb[1]; fub=fb[2]; @ format /rd 12,8; "F-check:" df1~df2~cdffc(flb,df1,df2)~cdffc(fub,df1,df2);?; @ mlb=flb*v1+d1-rows(y)+2*sumc(diag(s)); @ lower bound for mf @ mub=fub*v1+d1-rows(y)+2*sumc(diag(s)); @ upper bound for mf @ endif; if fsi.==1; mplot01bf= ((res'res)./sigs)-sumc(diag(rn)); mplot01vf= sumc(diag(s's)); mplot01df= 0~0; @ gibt es noch nicht @ mplot01lu= 0~0; @ gibt es noch nicht @ else; mplot01bf=(((res'res)./sigs)-sumc(diag(rn)))|mplot01bf; mplot01vf=(sumc(diag(s's)))|mplot01vf; mplot01df=(df1~df2)|mplot01df; @ degrees of freedom @ mplot01lu=(mlb~mub)|mplot01lu; if mplot01vf[1]+mplot01bf[1]>mub; "outside the interval"; else; "inside the interval"; endif; endif; endo; /* *********** OLS quadratic fitting *********** */ if olsi.==1; /* open f1=c:\data\gse2\gauss\loess\mplot01x.fmt; rw=rowsf(f1); x=readr(f1,rw); f1=close(f1); open f1=c:\data\gse2\gauss\loess\mplot01y.fmt; rw=rowsf(f1); y=readr(f1,rw); f1=close(f1); open f1=c:\data\gse2\gauss\loess\mplot01p.fmt; rw=rowsf(f1); p=readr(f1,rw); f1=close(f1); @ locally linear fitting oder locally quadratic fitting @ x___=x; x__=x___~zeros(rows(x___),cols(x___)+cols(x___)*(cols(x___)-1)./2); @ letzteres fr Quadrate & cross-products @ i=0; k=0; do until i==cols(x___); i=i+1; j=0; do until j==i; j=j+1; k=k+1; x__[.,cols(x___)+k]=x___[.,i].*x___[.,j]; endo; endo; x__=ones(rows(x__),1)~x__; @ intercept @ x=x__~p; l=x*inv(x'x)*x'; @ Matrix L @ yhat=l*y; res=y-yhat; open f1=c:\data\gse2\gauss\loess\mplot01c.fmt; rw=rowsf(f1); sigs=readr(f1,rw); @ estimator of variance of residuals for smallest smoothing parameter @ f1=close(f1); open f1=c:\data\gse2\gauss\loess\mplot01ra.fmt; rw=rowsf(f1); ra=readr(f1,rw); @ matrix RA (smallest smoothing parameter) @ f1=close(f1); rn=(imat-l)*(imat-l)'; diagra=diag(ra); diagrn=diag(rn); d1=sumc(diagra); d2=sumc(diag(ra*ra)); v1=sumc(diagrn-diagra); v2=sumc(diag((rn-ra)*(rn-ra))); df1=((v1.^2)./v2); df2=((d1.^2)./d2); fb=invcdff(0.05|0.95,df1|df1,df2|df2); flb=fb[1]; fub=fb[2]; @ format /rd 12,8; "F-check:" df1~df2~cdffc(flb,df1,df2)~cdffc(fub,df1,df2);?; @ mlb=flb*v1+d1-rows(y)+2*sumc(diag(l)); @ lower bound for mf @ mub=fub*v1+d1-rows(y)+2*sumc(diag(l)); @ upper bound for mf @ @ update matrices @ mplot01bf=(((res'res)./sigs)-sumc(diagrn))|mplot01bf; mplot01vf=(sumc(diag(l'l)))|mplot01vf; mplot01df=(df1~df2)|mplot01df; @ degrees of freedom @ mplot01lu=(mlb~mub)|mplot01lu; if mplot01vf[1]+mplot01bf[1]>mub; "outside the interval"; else; "inside the interval"; endif; */ /**************** OLS linear fitting ******************/ open f1=c:\data\gse2\gauss\loess\mplot01x.fmt; rw=rowsf(f1); x=readr(f1,rw); f1=close(f1); open f1=c:\data\gse2\gauss\loess\mplot01y.fmt; rw=rowsf(f1); y=readr(f1,rw); f1=close(f1); /* open f1=c:\data\gse2\gauss\loess\mplot01p.fmt; @ do not use this here @ rw=rowsf(f1); p=readr(f1,rw); f1=close(f1); */ x=ones(rows(x),1)~x@~p@; l=x*inv(x'x)*x'; @ Matrix L @ yhat=l*y; res=y-yhat; @ GCV--generalized cross-validation @ ?; "OLS linear fitting"; cv_=cv(l,y); format /re 10,6; "gcv:" cv_[1]; @ CV--(delete-one) cross-validation @ format /re 10,6; "cv:" cv_[2]; cv01ocv=cv_'; save cv01ocv; @--------------------@ open f1=c:\data\gse2\gauss\loess\mplot01c.fmt; rw=rowsf(f1); sigs=readr(f1,rw); @ estimator of variance of residuals for smallest smoothing parameter @ f1=close(f1); open f1=c:\data\gse2\gauss\loess\mplot01ra.fmt; rw=rowsf(f1); ra=readr(f1,rw); @ matrix RA (smallest smoothing parameter) @ f1=close(f1); rn=(imat-l)*(imat-l)'; diagra=diag(ra); diagrn=diag(rn); d1=sumc(diagra); d2=sumc(diag(ra*ra)); v1=sumc(diagrn-diagra); v2=sumc(diag((rn-ra)*(rn-ra))); df1=((v1.^2)./v2); df2=((d1.^2)./d2); fb=invcdff(0.05|0.95,df1|df1,df2|df2); flb=fb[1]; fub=fb[2]; @ format /rd 12,8; "F-check:" df1~df2~cdffc(flb,df1,df2)~cdffc(fub,df1,df2);?; @ mlb=flb*v1+d1-rows(y)+2*sumc(diag(l)); @ lower bound for mf @ mub=fub*v1+d1-rows(y)+2*sumc(diag(l)); @ upper bound for mf @ @ update matrices @ mplot01bf=(((res'res)./sigs)-sumc(diagrn))|mplot01bf; mplot01vf=(sumc(diag(l'l)))|mplot01vf; mplot01df=(df1~df2)|mplot01df; @ degrees of freedom @ mplot01lu=(mlb~mub)|mplot01lu; if mplot01vf[1]+mplot01bf[1]>mub; "outside the interval"; else; "inside the interval"; endif; endif; @ olsi.==1; @ @ save matrices--the only time when they are saved to disk @ save mplot01bf,mplot01vf,mplot01df,mplot01lu; @ cross validation @ endif; @ comp.==1; @ /* ****************** Graphik ******************* */ open f1=c:\data\gse2\gauss\loess\mplot01vf.fmt; rw=rowsf(f1); vf=readr(f1,rw); f1=close(f1); open f1=c:\data\gse2\gauss\loess\mplot01lu.fmt; rw=rowsf(f1); lun=readr(f1,rw); @ lower and upper bound for m-plot @ f1=close(f1); open f1=c:\data\gse2\gauss\loess\mplot01bf.fmt; rw=rowsf(f1); bf=readr(f1,rw); f1=close(f1); mf=vf+bf; graphset; begwind; makewind(9,6.855,0,0,0); @ Graph @ setwind(1); /* if iss.==1; title("...- The M Plot"); endif; */ ylabel("M-Statistic"); _pcolor=8; _pmcolor=8; _pframe={0,0}; _paxht=.15; @ size characters of axes labels @ _pnumht=.15; @ Gr”áe der Achsennummerierung @ _ptitlht=.15; @ size of characters in title @ _pnotify=0; @ no screen activity while writing tkf file @ _plwidth=2; @ _plwidth={1 3 1}; line width @ _pcsel=15; @ white @ _pnum=2; @ controls axes numbering @ _pltype={6}; @ dots etc. @ _plctrl={-1}; @ _plctrl={-1 0 -1}; frequency of symbols ; -1: symbols only @ _pstype=7; @ symbol type @ _psymsiz=3; @ size of the symbols on the main curve @ fonts("simplex"); xg=vf; yg=mf; scale(xg,yg~lun~vf); xlabel("Equivalent Number of Parameters"); xtics(0,100,10,2); ytics(-40,120,20,2); if semi.==1 and olsi.==1; xy(xg[3:rows(xg),.],yg[3:rows(yg),.]); elseif semi.==1 and olsi./=1; xy(xg[2:rows(xg),.],yg[2:rows(yg),.]); elseif semi./=1 and olsi.==1; xy(xg[2:rows(xg),.],yg[2:rows(yg),.]); else; xy(xg[1:rows(xg),.],yg[1:rows(yg),.]); endif; _plctrl=0; xy(vf,vf); @ diagonal line @ @ OLS and semiparametric, if so chosen @ _plctrl=-1; if semi.==1 and olsi.==1; _pstype=2; xy(xg[1,.]|xg[1,.],yg[1,.]|yg[1,.]); _pstype=1; xy(xg[2,.]|xg[2,.],yg[2,.]|yg[2,.]); elseif semi.==1 and olsi./=1; _pstype=6; @ 1: empty circle; 6: empty triangle @ xy(xg[1,.]|xg[1,.],yg[1,.]|yg[1,.]); elseif semi./=1 and olsi.==1; _pstype=2; @ 2: empty square @ xy(xg[1,.]|xg[1,.],yg[1,.]|yg[1,.]); endif; @ confidence intervals @ _plctrl={0 0}; _plwidth=1.5; @ 0: thinnest line @ yg=lun; ig1=0; do until ig1.==rows(xg); ig1=ig1+1; xg1=xg[ig1,.]|xg[ig1,.]; yg1=(yg[ig1,.]|yg[ig1,.])'; xy(xg1,yg1); endo; grphstr="-w=20 -c=1 -cf=c:\\prg\\gse2\\mplot01.eps"; graphprt(grphstr); endwind; @---------------------------------------------------@ @ cross validation results @ open f1=c:\data\gse2\gauss\loess\cv01ncv.fmt; @ nonparametric @ rw=rowsf(f1); cv01ncv=readr(f1,rw); f1=close(f1); if semi.==1; open f1=c:\data\gse2\gauss\loess\cv01pcv.fmt; @ semi-parametric @ rw=rowsf(f1); cv01pcv=readr(f1,rw); f1=close(f1); endif; open f1=c:\data\gse2\gauss\loess\cv01ocv.fmt; @ OLS @ rw=rowsf(f1); cv01ocv=readr(f1,rw); f1=close(f1); mask=ones(1,2); let fmt[2,3] = "*.*lf" 8 3 "*.*le" 16 6; "min cv smoothing parameter for nonparametric estimation:"; spmax=sortc(cv01ncv,3); omat=spmax[1,1 3]; call printfm(omat,mask,fmt);?; "cv for OLS:"; spmax=sortc(cv01ocv,2); omat=0~spmax[1,2]; call printfm(omat,mask,fmt);?; "mplot01.prg finished gracefully"; closeall; @---------------------------------------------------@ @ procedures follow @ /* INVCDFF - Inverse of the F Cumulative Distribution function ** with n1,n2 degrees of freedom ** ** Usage: x = invcdff(p,n1,n2) ** ** Input: P - matrix of percentage points ( 0 < p < 1 ) ** N1 - matrix of numerator df (conformable with p) ** N2 - matrix of denominator df (conformable with p) ** ** Output: X - matrix of critical values st Pr(x < X) = P and x ~ F(n1,n2) */ proc invcdff(p,n1,n2) ; local converge,negative,tol,tol2,t,z,a,b,c,d,x0,x1,f0,df0,k ; if not(p > 0 and p < 1) ; errorlog "ERROR: INVCDFF - P not in range (0,1)" ; endif ; if not(n1 > 0) ; errorlog "ERROR: INVCDFT - N1 is not positive" ; retp(error(99)) ; endif ; if not(n2 > 0) ; errorlog "ERROR: INVCDFT - N2 is not positive" ; retp(error(99)) ; endif ; tol = 1e-7 ; tol2 = tol^2 ; /* Use Paulson normal approx to start */ t = sqrt(-2*ln(abs((p.>0.5) - p))); z = 2.515517 + t.*(0.802853 + t.*0.010328) ; d = 1 + t.*(1.432788 + t.*(0.189269 + t.*0.001308)) ; z = t - (z./d) ; z = (p.>0.5).*z - (p.<=0.5).*z ; c = 2./(9.*n2) ; d = 2./(9.*n1) ; a = 1 - c ; b = 1 - d ; d = (a^2).*d + (b^2).*c - c.*d.*(z^2) ; c = a^2 - c.*(z^2) ; x0 = abs((a.*b + z.*sqrt(d + (tol-d).*(d.<0)))./(c + (1-c).*(c.<0.3)))^3 ; x0 = x0 + (d.<=0 .or c.< 0.3).*(0.5.*(p.<0.5) + 2.0.*(p.>=0.5) - x0) ; p = 1 - p ; clear converge,k ; do until( converge or k > 50 ) ; f0 = cdffc(x0,n1,n2) ; df0 = missrv(miss(pdff(x0,n1,n2),0),tol) ; x1 = x0 - (p-f0)./df0 ; negative = not(x1 > tol2) ; if negative ; x1 = x1 + (x1.<=tol2).*(x0.*(0.5 + 1.5.*(p.< f0)) - x1) ; endif ; converge = abs(x0 - x1) < tol and not negative ; x0 = x1 ; k = k + 1 ; endo ; if not converge ; errorlog "Warning: INVCDFF has not converged " ; endif ; ndpclex ; retp(x0) ; endp ; /* PDFF - Probability Density function of F distribution ** ** Format: D = pdff(f,n1,n2) ** ** Input: X - R x C matrix of f ratio's (F > 0) ** N1 - matrix of numerator degress of freedom (N1 > 0) ** N2 - matrix of denominator degress of freedom (N2 > 0) ** ( N1 & N2 must be E x E conformable with X) ** ** Output: D - R x C matrix of densities of F(n1,n2) */ proc pdff(x,n1,n2) ; if not(x > 0) ; errorlog "ERROR: PDFF - X is not positive" ; retp(error(99)) ; endif ; if not(n1 > 0) ; errorlog "ERROR: PDFF - Numerator Degrees of Freedom are not positive" ; retp(error(99)) ; endif ; if not(n2 > 0) ; errorlog"ERROR: PDFF - Denominator Degrees of Freedom are not positive"; retp(error(99)) ; endif ; if (n1+n2) < 100 ; retp(exp(ln(gamma((n1+n2)./2))-ln(gamma(n1./2))-ln(gamma(n2./2)) + n1.*ln(n1)./2+n2.*ln(n2)./2+(n1./2-1).*ln(x) - (n1+n2).*ln(n2+n1.*x)./2)) ; else ; retp(exp(lnfact((n1+n2)./2-1)-lnfact(abs(n1./2-1))-lnfact(abs(n2./2-1)) + n1.*ln(n1)./2+n2.*ln(n2)./2+(n1./2-1).*ln(x) - (n1+n2).*ln(n2+n1.*x)./2)) ; endif ; endp ; @ procedure for cross-validation @ proc cv(l,y); local res,gcv_,cv_; @ GCV--generalized cross-validation @ res=y-l*y; gcv_=(res'res)./(rows(res).*(1-sumc(diag(l))./rows(res)).^2); @ CV--(delete-one) cross-validation @ res=y-diagrv(l,zeros(rows(l),1))*y; cv_=(res'res)./(rows(res)-1); retp(gcv_|cv_); endp; @ BACKFITTING PROGRAM USED IN SCHMID (2005) @ @ @ @ @ @ @ @ May 20, 2004 @ @ backfitting--for F-test only @ @ no parametric component--original Cleveland/Devlin (1988) approach @ @ four variables in nonparametric component @ @ time index and intercept enter both components @ @ @ @ @ @ @ @ @ new; #lineson; tstart=date; library pgraph; save path=c:\data\gse2\gauss\loess; @ loess-smoother; Cleveland and Devlin (1988) @ @ the intercept must be included in the nonparametric component @ @ general principle: always sort back to original order @ @ Eucledian distances are based on variables in nonparametric component @ @ (nonconstant) regressors in nonparametric component must be scaled @ ff=1; @ 1: Fannie Mae; 2: Freddie Mac @ fs=0.45; @ smoothing parameter @ maxit=1e2; @ number of iterations @ iter=1; @ 1: iterate to obtain partial impacts @ inter=1; @ 1: use intercept in x1 @ chart=0; comp=1; lin=0; @ lin=1: locally linear fitting; otherwise: locally quadratic fitting @ krow=250; @ do 250 rows at a time in do-loop--for display only @ vnames="xx"; dtset1="c:\\data\\gse2\\gauss\\wklyapril"; @ GSE data @ @ unrestricted model @ dtset2 ="c:\\data\\gse2\\gauss\\loess\\b4t01fa"; @ degrees of freedom @ dtset3 ="c:\\data\\gse2\\gauss\\loess\\b4t01ya"; @ yhat @ dtset4 ="c:\\data\\gse2\\gauss\\loess\\b4t01ta"; @ overall residuals @ dtset5 ="c:\\data\\gse2\\gauss\\loess\\b4t01ra"; @ matrix RA @ dtset6 ="c:\\data\\gse2\\gauss\\loess\\b4t01sa"; @ smoother matrix S @ dtset7 ="c:\\data\\gse2\\gauss\\loess\\b4t01da"; @ diagRA @ dtset8 ="c:\\data\\gse2\\gauss\\loess\\b4t01aa"; @ smoother matrix S1 @ dtset9 ="c:\\data\\gse2\\gauss\\loess\\b4t01ba"; @ smoother matrix S2 @ @ unrestricted model--where needed @ /* dtset22="c:\\data\\gse2\\gauss\\loess\\b4t01fn"; @ degrees of freedom @ dtset33="c:\\data\\gse2\\gauss\\loess\\b4t01yn"; @ yhat @ dtset44="c:\\data\\gse2\\gauss\\loess\\b4t01tn"; @ overall residuals @ dtset55="c:\\data\\gse2\\gauss\\loess\\b4t01rn"; @ matrix RA @ dtset66="c:\\data\\gse2\\gauss\\loess\\b4t01sn"; @ smoother matrix S @ dtset77="c:\\data\\gse2\\gauss\\loess\\b4t01dn"; @ diagRA @ dtset88="c:\\data\\gse2\\gauss\\loess\\b4t01an"; @ smoother matrix S1 @ dtset99="c:\\data\\gse2\\gauss\\loess\\b4t01bn"; @ smoother matrix S2 @ */ if comp.==1; dos del c:\data\gse2\gauss\loess\b4t01*.*; dos del c:\data\gse2\gauss\loess\b4m01*.*; dos del c:\data\gse2\gauss\loess\b4o01*.*; endif; if comp.==1; output file=c:\prg\gse2\bak4d01.out reset; endif; "GAUSS time ";; timestr(0);; " date (US)";; datestr(0); ?;"input file: bak4d01.prg";?; fs=0.75; @ smoothing parameter @ fsi=0; @ counter @ do while fs<0.8; fs=fs+0.05; @ smoothing parameter @ fsi=fsi+1; @ counter @ ?; format /rd 10,2; "smoothing parameter:" fs; if comp.==1; @ read the data @ open f1=^dtset1 for read; rw1=rowsf(f1); dat=readr(f1,rw1); f1=close(f1); clear f1; dat=dat~seqa(1,1,rows(dat)); @ time index @ @ define variables @ @ left-hand side variable @ if ismiss(dat[.,3]).==1; dat[.,3]=missrv(dat[.,3],minc(dat[.,3])-1); dat=delif(dat,dat[.,3].==minc(dat[.,3])); endif; if ismiss(dat[.,6]).==1; dat[.,6]=missrv(dat[.,6],minc(dat[.,6])-1); dat=delif(dat,dat[.,6].==minc(dat[.,6])); endif; if ismiss(dat[.,14]).==1; dat[.,14]=missrv(dat[.,14],minc(dat[.,14])-1); dat=delif(dat,dat[.,14].==minc(dat[.,14])); endif; if ismiss(dat[.,15]).==1; dat[.,15]=missrv(dat[.,15],minc(dat[.,15])-1); dat=delif(dat,dat[.,15].==minc(dat[.,15])); endif; if ff.==1; @ 1: Fannie Mae @ if ismiss(dat[.,5]).==1; dat[.,5]=missrv(dat[.,5],minc(dat[.,5])-1); dat=delif(dat,dat[.,5].==minc(dat[.,5])); endif; y=dat[.,5]-ln(1+(dat[.,3]./36000).*7); @ Log excess return of Fannie Mae @ elseif ff.==2; @ 2: Freddie Mac @ if ismiss(dat[.,12]).==1; dat[.,12]=missrv(dat[.,12],minc(dat[.,12])-1); dat=delif(dat,dat[.,12].==minc(dat[.,12])); endif; y=dat[.,12]-ln(1+(dat[.,3]./36000).*7); @ Log excess return of Fannie Mae @ endif; x1= dat[.,6]-ln(1+(dat[.,3]./36000).*7) @ Log excess market return @ ~dat[.,cols(dat)]-minc(dat[.,cols(dat)])+1; @ time index, starting on 1 @ x2= (dat[.,14]-dat[.,15])./1e2 @ change in yield spread @ ~(dat[.,15])./1e2 @ change in 3-month yield @ ~dat[.,cols(dat)]-minc(dat[.,cols(dat)])+1; @ time index, starting on 1 @ format /rd 10,0; "number of obsevations analzyed:" rows(dat); clear dat; @ descriptive statistics @ format /rd 6,4; @ minc(exp(y))~median(exp(y))~meanc(exp(y))~maxc(exp(y))~(stdc(exp(y)).*sqrt(rows(y)-1)./sqrt(rows(y))); @ minc((y))~median((y))~meanc((y))~maxc((y))~(stdc((y)).*sqrt(rows(y)-1)./sqrt(rows(y))); /* ************************** core program ************************* */ omin=rows(y); @ info for smoother-matrix @ @-----------------------------------------------------------@ b4t01x1=x1; @ in original order; not scaled @ b4t01x2=x2; @ in original order; not scaled @ b4t01y=y; @ in original order @ save b4t01x1,b4t01x2,b4t01y; q=int(fs.*rows(x1)); @ smoothing parameter; choose nearest integer @ b4t01q=q; save b4t01q; @ note that the smoother matrices are independent of the dependent variable; @ @ hence, no iteration is necessary @ /* ********************** smooth x1 ********************** */ y__=y; x___=x1./(stdc(x1)'); @ scale; see Cleveland & Devlin @ svec__=seqa(1,1,rows(y__)); @ vector which gives the original order of the obs. @ @ locally linear fitting oder locally quadratic fitting @ if lin./=1; @ locally quadratic fitting @ x__=x___~zeros(rows(x___),cols(x___)+cols(x___)*(cols(x___)-1)./2); @ last couple of cols: quadratic terms & cross-products @ i=0; k=0; do until i.==cols(x___); i=i+1; j=0; do until j.==i; j=j+1; k=k+1; x__[.,cols(x___)+k]=x___[.,i].*x___[.,j]; endo; endo; else; x__=x___; @ locally linear fitting @ endif; if inter.==1; x__=ones(rows(x__),1)~x__; @ intercept; typically, there is an intercept included @ endif; @-------------------------------------------@ @ variables have not beeen sorted yet @ "smoothing"; i=0; k=0; do until i==rows(x__); @ number of obs. to be smoothed over @ i=i+1; @ Eucledian distances @ dists=sqrt(sumc((x___'-x___[i,.]').^2)); @ Eucledian distance @ distx=sortc(dists~x__~y__~svec__,1); @ sorted by Eucledian distance @ disti=distx[.,1]; @ distance of i-th vector of obs. to all other vectors of observations @ xs=distx[.,cols(dists)+1:(cols(x__)+1)]; @ sorted regressor matrix @ ys=distx[.,cols(distx)-1]; @ sorted vector y @ svec=distx[.,cols(distx)]; @ vector used for sorting back to orig. order; has to be sorted too @ dx=disti[q,.]; @ distance to (q-1)-th neighbor @ @ dx; wait; @ @ determine the q weights wi @ uvec=disti./dx; @ vector of weights; if program goes down here, then there is not enough variation in the explanatory variables @ @ format 10,6; uvec[1:20,.]; wait; @ wuvec=(1-uvec.^3).^3; @ tricube weight function @ exvec=(uvec .ge 0).*(uvec .lt 1); if sumc(exvec) ne rows(exvec); loc=indexcat(exvec,0); wuvec[loc]=zeros(rows(loc),1); endif; clear loc,exvec; @ format /re 10,6; wuvec'; wait; @ @ generate matrix wx (see own notes) @ wx=(wuvec.*xs)'; @ weights, multiplied with regressors @ @ number of rows equals number of regressors @ wbh=inv(wx*xs)*wx*ys; @ S, be it noted: vector yd has a different order for each row of this matrix; thus, sort columns(!) back to original order; rows are appended in original order anyway @ sortvec=svec; @ svec: has same order as columns of this matrix @ s__ =sortc((xs[1,.]*inv(wx*xs)*wx)'~sortvec,2); if i==1; create f8=^dtset8 with ^vnames,cols(s__[.,1]'),8; call writer(f8,s__[.,1]'); f8=close(f8); elseif i==2; open f8=^dtset8 for append; @ keep file open @ call writer(f8,s__[.,1]'); else; call writer(f8,s__[.,1]'); endif; clear s__; if k.==krow; format /rd 2,0; i.*100./rows(x__);; "%"; k=0; endif; endo; @ end of smoothing over all observations @ f8=close(f8); /* ********************** smooth x2 ********************** */ y__=y; x___=x2./(stdc(x2)'); @ scale; see Cleveland & Devlin @ svec__=seqa(1,1,rows(y__)); @ vector which gives the original order of the obs. @ @ locally linear fitting oder locally quadratic fitting @ if lin./=1; @ locally quadratic fitting @ x__=x___~zeros(rows(x___),cols(x___)+cols(x___)*(cols(x___)-1)./2); @ last couple of cols: quadratic terms & cross-products @ i=0; k=0; do until i.==cols(x___); i=i+1; j=0; do until j.==i; j=j+1; k=k+1; x__[.,cols(x___)+k]=x___[.,i].*x___[.,j]; endo; endo; else; x__=x___; @ locally linear fitting @ endif; x__=ones(rows(x__),1)~x__; @ intercept @ @-------------------------------------------@ @ variables have not beeen sorted yet @ "smoothing"; i=0; k=0; do until i==rows(x__); @ number of obs. to be smoothed over @ i=i+1; @ Eucledian distances @ dists=sqrt(sumc((x___'-x___[i,.]').^2)); @ Eucledian distance @ distx=sortc(dists~x__~y__~svec__,1); @ sorted by Eucledian distance @ disti=distx[.,1]; @ distance of i-th vector of obs. to all other vectors of observations @ xs=distx[.,cols(dists)+1:(cols(x__)+1)]; @ sorted regressor matrix @ ys=distx[.,cols(distx)-1]; @ sorted vector y @ svec=distx[.,cols(distx)]; @ vector used for sorting back to orig. order; has to be sorted too @ dx=disti[q,.]; @ distance to (q-1)-th neighbor @ @ dx; wait; @ @ determine the q weights wi @ uvec=disti./dx; @ vector of weights; if program goes down here, then there is not enough variation in the explanatory variables @ @ format 10,6; uvec[1:20,.]; wait; @ wuvec=(1-uvec.^3).^3; @ tricube weight function @ exvec=(uvec .ge 0).*(uvec .lt 1); if sumc(exvec) ne rows(exvec); loc=indexcat(exvec,0); wuvec[loc]=zeros(rows(loc),1); endif; clear loc,exvec; @ format /re 10,6; wuvec'; wait; @ @ generate matrix wx (see own notes) @ wx=(wuvec.*xs)'; @ weights, multiplied with regressors @ @ number of rows equals number of regressors @ wbh=inv(wx*xs)*wx*ys; @ S, be it noted: vector yd has a different order for each row of this matrix; thus, sort columns(!) back to original order; rows are appended in original order anyway @ sortvec=svec; @ svec: has same order as columns of this matrix @ s__ =sortc((xs[1,.]*inv(wx*xs)*wx)'~sortvec,2); if i==1; create f9=^dtset9 with ^vnames,cols(s__[.,1]'),8; call writer(f9,s__[.,1]'); f9=close(f9); elseif i==2; open f9=^dtset9 for append; @ keep file open @ call writer(f9,s__[.,1]'); else; call writer(f9,s__[.,1]'); endif; clear s__; if k.==krow; format /rd 2,0; i.*100./rows(x__);; "%"; k=0; endif; endo; @ end of smoothing over all observations @ f9=close(f9); /* ************** aggregate smoother matrices ******************* */ @ read data, sorted in original order @ open f1=c:\data\gse2\gauss\loess\b4t01y.fmt; rw=rowsf(f1); y=readr(f1,rw); @ not scaled, original order @ f1=close(f1); open f8=^dtset8 for read; rw8=rowsf(f8); s1=readr(f8,rw8); @ partial smoother matrix S1 @ f8=close(f8); clear f8; open f9=^dtset9 for read; rw9=rowsf(f9); s2=readr(f9,rw9); @ partial smoother matrix S2 @ f9=close(f9); clear f9; s=eye(rows(s1))-(eye(rows(s1))-s2)*inv(eye(rows(s1))-s1*s2)*(eye(rows(s1))-s1); create f6=^dtset6 with ^vnames,cols(s),8; call writer(f6,s); f6=close(f6); open f6=^dtset6 for read; rw6=rowsf(f6); s=readr(f6,rw6); @ smoother matrix S @ f6=close(f6); clear f6; @ GCV--generalized cross-validation @ cv_=cv(s,y); format /re 10,6; "gcv:" cv_[1]; @ CV--(delete-one) cross-validation @ format /re 10,6; "cv:" cv_[2]; if fsi.==1; b4t01cv= fs~cv_'; else; b4t01cv=(fs~cv_')|b4t01cv; endif; save b4t01cv; yhat=s*y; @ n x 1 Vektor of level of impact @ create f3=^dtset3 with ^vnames,1,8; call writer(f3,yhat); f3=close(f3); res=y-yhat; @ overall residuals @ create f4=^dtset4 with ^vnames,1,8; call writer(f4,res); f4=close(f4); @ Hastie and Tibshirani (1990, p. 66) @ i1=0; do while i1> up into do-loop @ sigy=reshape(sige,rows(yhat),1); i8=0; do while i8=0)); @ nearest neighbor that's positive @ yminus=maxc(selif(yg1[.,1],xg1[.,1].<=0)); @ nearest neighbor that's negative @ xplus =minc(selif(xg1[.,1],xg1[.,1].>=0)); @ nearest neighbor that's positive @ xminus=maxc(selif(xg1[.,1],xg1[.,1].<=0)); @ nearest neighbor that's negative @ @ interpolate linearly @ yb= yplus .*(abs(xminus)./(abs(xminus)+xplus)) +yminus.*( xplus ./(abs(xminus)+xplus)); /* xy(0|0,0|yb); */ graphprt(grphstr); endwind; endif; @ chart.==1 @ @---------------------------------------------------@ endo; @ fs; smoothing parameter @ @ cross validation results @ open f1=c:\data\gse2\gauss\loess\b4t01cv.fmt; @ nonparametric @ rw=rowsf(f1); b4t01cv=readr(f1,rw); f1=close(f1); mask=ones(1,2); let fmt[2,3] = "*.*lf" 8 3 "*.*le" 16 6; "min cv smoothing parameter for backfitting model:"; spmax=sortc(b4t01cv,3); omat=spmax[1,1 3]; call printfm(omat,mask,fmt);?; "bak4d01.prg finished gracefully"; closeall; @-------------------------------------------@ proc cv(l,y); local res,gcv_,cv_; @ GCV--generalized cross-validation @ res=y-l*y; gcv_=(res'res)./(rows(res).*(1-sumc(diag(l))./rows(res)).^2); @ CV--(delete-one) cross-validation @ res=y-diagrv(l,zeros(rows(l),1))*y; cv_=(res'res)./(rows(res)-1); retp(gcv_|cv_); endp; @ THIS PROGRAM RUNS THE ANOVA (Table 1) USED IN SCHMID (2005) @ @ @ @ @ @ @ @ @ @ May 20, 2004 @ @ analysis of variance--traditional and bootstrapping @ @ draws on matrices created by nop4d01.prg, bak4d01.prg, and cle3d01.prg @ @ @ @ @ @ @ @ @ new; #lineson; tstart=date; save path=c:\data\gse2\gauss\loess; boot=0; @ 1: with bootstrapping @ me="s"; @ s: compare with semiparametric model; b: compare with backfitting @ krow=250; @ do 250 rows at a time in do-loop--for display only @ vnames="xx"; @ unrestricted model @ dtset2 ="c:\\data\\gse2\\gauss\\loess\\n4t01fa"; @ degrees of freedom @ dtset3 ="c:\\data\\gse2\\gauss\\loess\\n4t01ya"; @ yhat @ dtset4 ="c:\\data\\gse2\\gauss\\loess\\n4t01ta"; @ overall residuals @ dtset5 ="c:\\data\\gse2\\gauss\\loess\\n4t01ra"; @ matrix RA @ dtset6 ="c:\\data\\gse2\\gauss\\loess\\n4t01sa"; @ smoother matrix S @ dtset7 ="c:\\data\\gse2\\gauss\\loess\\n4t01da"; @ diag(RA) @ dtset8 ="c:\\data\\gse2\\gauss\\loess\\n4t01ga"; @ diag(RA^2) @ @ restricted model @ if me $== "b"; dtset22 ="c:\\data\\gse2\\gauss\\loess\\b4t01fa"; @ degrees of freedom @ dtset33 ="c:\\data\\gse2\\gauss\\loess\\b4t01ya"; @ yhat @ dtset44 ="c:\\data\\gse2\\gauss\\loess\\b4t01ta"; @ overall residuals @ dtset55 ="c:\\data\\gse2\\gauss\\loess\\b4t01ra"; @ matrix RN @ dtset66 ="c:\\data\\gse2\\gauss\\loess\\b4t01sa"; @ smoother matrix S @ dtset77 ="c:\\data\\gse2\\gauss\\loess\\b4t01da"; @ diagRN @ dtset88 ="c:\\data\\gse2\\gauss\\loess\\n4t01gn"; @ diag((RA-RN)^2); to be constructed @ elseif me $== "s"; dtset22 ="c:\\data\\gse2\\gauss\\loess\\c2t01fa"; @ degrees of freedom @ dtset33 ="c:\\data\\gse2\\gauss\\loess\\c2t01ya"; @ yhat @ dtset44 ="c:\\data\\gse2\\gauss\\loess\\c2t01ta"; @ overall residuals @ dtset55 ="c:\\data\\gse2\\gauss\\loess\\c2t01ra"; @ matrix RN @ dtset66 ="c:\\data\\gse2\\gauss\\loess\\c2t01sa"; @ smoother matrix S @ dtset77 ="c:\\data\\gse2\\gauss\\loess\\c2t01da"; @ diagRN @ dtset88 ="c:\\data\\gse2\\gauss\\loess\\c2t01gn"; @ diag((RA-RN)^2); to be constructed @ endif; @ ************************* F-test ****************************** @ open f1=c:\data\gse2\gauss\loess\n4t01y.fmt; rw=rowsf(f1); y=readr(f1,rw); @ not scaled, original order @ f1=close(f1); @---@ open f4=^dtset4 for read; rw4=rowsf(f4); resa=readr(f4,rw4); @ residuals of unrestricted model @ f4=close(f4); clear f4; open f7=^dtset7 for read; rw7=rowsf(f7); diagra=readr(f7,rw7); @ diag(RA) @ f7=close(f7); clear f7; open f8=^dtset8 for read; rw8=rowsf(f8); diagra2=readr(f8,rw8); @ diag((RA)^2) @ f8=close(f8); clear f8; @---@ open f4=^dtset44 for read; rw4=rowsf(f4); resn=readr(f4,rw4); @ residuals of restricted model @ f4=close(f4); clear f4; open f7=^dtset77 for read; rw7=rowsf(f7); diagrn=readr(f7,rw7)-diagra; @ diag(RN) @ f7=close(f7); clear f7; @---@ "creating diag((rn-ra)^2)"; open f5=^dtset5 for read; rw5=rowsf(f5); ra=readr(f5,rw5); @ matrix RA--unrestricted model @ f5=close(f5); clear f5; open f5=^dtset55 for read; rw5=rowsf(f5); rn=readr(f5,rw5); @ matrix RN--restricted model @ f5=close(f5); clear f5; ih=0; k=0; do while ih=1; flag2_: bbindex=ceil(rndu(n,1).*n); @Set up re-sampling index@ if minc(bbindex).==0; goto flag2_; endif; x_bb=x_b[bbindex,.]; @Defines re-sample b vector of Xi*'s@ var_bb=sumc(x_bb)./n; varbb_bt[bb,1]=var_bb; @Setting up vector of bootstrapped var*'s@ bb=bb-1; endo; @end of double bootstrap loop@ sd_v_b=stdc(varbb_bt); @Calculating the SD of var for a re-sample@ var_bt[i,1]=var_b; @Setting up vector of bootstrapped var*'s@ sd_v_bt[i,1]=sd_v_b; @Setting up vector of SD's of var*'s@ @Setting up for BCa zit@ if var_b <= bias_var; prop=prop+1/b; endif; i=i+1; endo; @end of resampling loop@ @Constructing parametric confidence interval around the _unbiased_ variance (for comparison), @ @ assuming normality for x, using chi-square value p=.025(two-tailed) df=n-1 @ chi_low=invcfchi(0.025,b-1); chi_up=invcfchi(0.975,b-1); unpmlcl=((n-1).*un_var_x)./chi_low; unpmucl=((n-1).*un_var_x)./chi_up; @Constructing parametric confidence interval, assuming normality for x, using chi-square value p=.025(two-tailed) df=n-1 @ pmlcl=((n-1).*bias_var)./chi_low; pmucl=((n-1).*bias_var)./chi_up; @Constructing normal approximation confidence interval @ tcal=cdfni(0.975); nalcl=bias_var-(tcal.*stdc(var_bt)); naucl=bias_var+(tcal.*stdc(var_bt)); @Constructing percentile confidence interval @ var_bt=var_bt~sd_v_bt; @Attach var*'s and their sd's for sorting together (needed for percentile-t)@ var_bt=sorthc(var_bt,1); @Sort x_bar*'s vector @ @Defining the CI percentile points for alpha = 0.05; see Efron and Tibshirani (1993), p. 160 @ ll=floor((b+1).*0.025); ul=b+1-ll; if ll.==0 or ul.>b; "increase the re-sampling number--wait"; wait; endif; @Defining lower and upper percentile CI endpoints@ perlcl=var_bt[ll,1]; perucl=var_bt[ul,1]; @Constructing percentile-t confidence interval@ @Standardize the var*'s using the SD from each re-sample and the full sample mean@ t_boot=(var_bt[.,1]-bias_var)./var_bt[.,2]; t_boot=sorthc(t_boot,1); @Sort the vector of t_boot@ @Define percentile-t endpoints. NOTE: Subtract t_boot conversion of opposite end from both limits@ prtlcl=bias_var-(t_boot[ul].*stdc(var_bt[.,1]) ); prtucl=bias_var-(t_boot[ll].*stdc(var_bt[.,1]) ); @Constructing BCa confidence interval@ @Step #1: Calculate the acceleration factor, a @ @Jackknifing for BCa; FAS: correct @ do while j<=n; xjack=delif(x,seqa(1,1,rows(x)).==j); jck_var=sumc(xjack)./(n-1); @Calculate the bias_var of xjack)@ var_jack[j,1]=jck_var; @Placing the var(i) into the jackknifed x-bar vector@ j=j+1; @Increasing jackknife index@ endo; @end jackknifing loop@ @Calculating acceleration factor, a, from the jackknifed vector of var's; FAS: correct @ a_num=sumc((var_jack-meanc(var_jack)).^3); @Numerator for acceleration factor@ @Calculate the denominator for a, setting format for small numbers@ a_den=6.*((sumc((var_jack-meanc(var_jack)).^2)).^(2./3)); format /rd 2,9; if a_den == 0; @To assure division by 0 doesn`t occur@ a=0; else; a=a_num/a_den; @Calculate a @ endif; @Step #2: Calculate z0; FAS: correct @ @Making sure prop =/ 0 or 1 (so zit =/ infinity)@ if prop.==0; prop=0.0001; elseif prop.==1; prop=0.9999; endif; zit=cdfni(prop); @ inverse of normal cdf @ @Step #3: Construct the BCa confidence interval; FAS: correct @ @Define the lower and upper BCa .05 percentile points@ z=cdfni(0.05); @ normal distribution; 0.5 percent per tail @ bc_u_per=cdfn(zit+((zit-z)./(1-(a*(zit-z))))); @ Hastie & Tibshirani, eq. (14.10) @ bc_l_per=cdfn(zit+((zit+z)./(1-(a*(zit+z))))); @ Hastie & Tibshirani, eq. (14.10) @ @Calculate the bootstrap vector case number for the lower and upper BCa bounds; FAS: correct @ l_bca = round(b.*bc_l_per); @Lower endpoint case number@ if l_bca.==0; @To avoid case #0 being choosen@ l_bca.==1; endif; u_bca = round(b.*bc_u_per); @Upper endpoint case number@ if u_bca > b; @To avoid case #(b+1) being choosen@ u_bca=b; endif; bcalcl = var_bt[l_bca,1]; @Lower BCa CI endpoint@ bcaucl = var_bt[u_bca,1]; @Upper BCa CI endpoint@ @Conducting test of normality on x @ {b1,b2,wb,rw}=jarqbera(x,n); @Conducting test of normality on bootstrapped var* vector@ {sk,ku,w,p_w}=jarqbera(var_bt[.,1],b); print; print "Full sample bias_var value = " bias_var; format /re 2,5; print; @Leaves space between output lines@ print "Bootstrap estimate of bias_var =" meanc(var_bt[.,1]); print "Bootstrapped full sample SD(bias_var) = " stdc(var_bt[.,1]); print; print "Parametric CI (biased var)= " pmlcl pmucl; print "Bootstrap CI's using the biased var:"; print "Normal approx CI = " nalcl naucl; print "Percentile CI =" perlcl perucl; print "Percentile-t CI =" prtlcl prtucl; print "BCa CI=" bcalcl bcaucl; print; print "Normality tests on x "; print "Skew of x = " b1; print "Kurtosis of x = " b2; print "Jarque-Bera omnibus test statistic = " wb; print "Probability of normality = " rw; print; print "Normality tests on vector of var*'s:"; print "Skew of var* =" sk; print "Kurtosis of var* =" ku; print "Jarque-Bera omnibus test statistic =" w; print "Probablity of normality =" p_w; print; print "Minutes elapsed = " (hsec-t_)/6000; /* ************************** end of bootstrapping ************************* */ endif; @ boot.==1 @ @Defining the proc for Jarque-Bera normality testing@ proc(4) = jarqbera(vartest,n1) ; local m5, m2, m3, xbar,sskew, skew, y, b2skew, w, var,m1, m4, kurt, prob_w; skew = 1; kurt = 1; m1 = (vartest)-meanc(vartest); m3 = sumc(m1^3)/n1; m4 = sumc(m1^4)/n1; var=(stdc(m1))^2; sskew = (m3/(var^1.5)) ; skew=sskew^2; kurt = m4/((var)^2) ; @Calculating the Bera-Jarque W, omnibus test for normality@ w = n1*(((skew)/6)+(((kurt-3)^2)/24)); prob_w = cdfchic(w,2); retp (sskew,kurt,w,prob_w) ; endp ; /* INVCFCHI - Inverse of the Chi squared Cumulative Distribution function ** with n1 degrees of freedom ** ** Usage: x = invcfchi(p,n) ** ** Input: P - matrix of percentage points ( 0 < p < 1 ) ** N - matrix of degrees of freedom (N > 0) (conformable with X) ** ** Output: X - matrix of critical values st Pr(x < X) = P and x ~ Chi(n) */ proc invcfchi(p,n) ; local converge,k,tol,t,d,z,x0,x1,f0,df0 ; if not(p > 0 and p < 1) ; errorlog "ERROR: INVCFCHI - P not in (0,1) " ; retp(error(99)) ; endif ; if not(n > 0) ; errorlog "ERROR: INVCFCHI - N is not positive" ; retp(error(99)) ; endif ; tol = 1e-6.*sqrt(n) ; clear converge,k ; /*Use Wilson-Hilferty approximation as initial value*/ t = sqrt(-2*ln(abs((p.>0.5) - p))); z = 2.515517 + t.*(0.802853 + t.*0.010328) ; d = 1 + t.*(1.432788 + t.*(0.189269 + t.*0.001308)) ; z = t - (z./d) ; z = (p.>0.5).*z - (p.<=0.5).*z ; d = 2./(9.*n) ; x0 = n.*(z.*sqrt(d) + 1 - d)^3 ; x0 = x0 + (0.1 - x0).*(x0 .<= 0) ; p = 1 - p ; do until(converge or k > 50) ; f0 = cdfchic(x0,n) ; df0 = missrv(miss(pdfchi(x0,n),0),tol) ; x1 = (x0 - (p-f0)./df0) ; x1 = x1 + (tol - x1).*(x1.<=0) ; converge = abs(x0 - x1) < tol ; x0 = x1 ; k = k + 1 ; endo ; if not converge ; errorlog "Warning: INVCDFF has not converged " ; endif ; ndpclex ; retp(x0) ; endp ; /* PDFCHI - Chi squared Probability density function with n1 degrees ** of freedom ** ** Usage: D = pdfchi(x,n) ** ** Input: X - matrix of chi-squared values (X > 0) ** N - matrix of degrees of freedom (N > 0) (conformable with X) ** ** Output: D - matrix of density of Chi square(n) at X */ proc pdfchi(x,n) ; if not(n > 0) ; errorlog "ERROR: PDFCHI - N is not positive" ; retp(error(99)) ; endif ; if not(x > 0) ; errorlog "ERROR: PDFCHI - X is not positive" ; retp(error(99)) ; endif ; if n < 100 ; retp(exp(-n.*ln(2)./2-ln(gamma(n./2))+(n./2-1).*ln(x)-x./2)) ; else ; retp(exp(-n.*ln(2)./2-lnfact(n./2-1)+(n./2-1).*ln(x)-x./2)) ; endif ; endp ;