c Writes a plotxy file for drawing a QQ plot against a Gaussian c error law and test with K-S statistic for that distribution. c Change this maximum length in subroutine ctestqq too. parameter (nmax=20000) dimension x(nmax) c c Read data from standard input without fuss. ce print '(a)',' Program reads data from standard input' c read (*,*, end=1500) (x(i), i=1, nmax) print *, ' Length of data series must not exceed',nmax i=nmax + 1 1500 n=i-1 c call testqq(n, x) stop end c_______________________________________________________________________ subroutine testqq(n, x) c$$$$ calls qqplot, kstest c Performs test of Normality on residual series x. c Draws a dirty QQ plot and writes file for proper plotting. c Uses Kolmogorov-Smirnov test for normality. parameter (nmax=20000) external gauss dimension x(n),y(nmax),q(nmax),key(nmax) common /gparam/ xbar,sigma ce sum=0 do 1500 i=1, n sum=sum + x(i) 1500 continue var=0 xbar=sum/n do 1510 i=1, n var=var + (x(i) - xbar)**2 1510 continue sigma=sqrt(var/(n-1)) print '(/a,i7, 3x, a, g12.4, 3x, a, g11.4)', $' Number of terms ',n, $' Mean of series', xbar, 'Standard dev.',sigma c call qqplot(n, x, q, key) c c Standardize x values and put in y do 1600 i=1, n y(i)=(x(i) - xbar)/sigma 1600 continue c print the tails * print '(/t20,a/t7,a,t14,a,t31,a)', * $'TAIL VALUES ','term','value','standardized value' * do 2100 i=1, n * if (abs(y(i)) .ge. 1.7) print '(i9,g14.6,9x,f10.4)', * $key(i),x(i),y(i) *2100 continue * c Perform Kolmogorov-Smirnov test for Gaussian pdf. print '(/2a)',' Test hypothesis that distribution is Gaussian ', $'by Kolmogorov-Smirnov test' call kstest(n, x, gauss, dn, confi) c c Create the file qq.p for plotxy open (unit=8, file='qq.p') write(8,'(a)') $'title \\ita\\','xlim 6','ylim 7','frame','file *', $'xlab Ordered standarized data','ylab Gaussian quantiles' c Write N and dN in notes to help identify the plot write(8,'(a/a,i6/a,f7.4)') 'note', $'note (0.5 5.5 i) N=',n, $'note (0.5 5.1 i) d\\sub{N}=',dn write(8,'(a/4f9.3/a)') $'read 2',y(1),y(1),y(n),y(n),'symbol 4 .04' c If less than 2000 points, just plot them all if (n .lt. 2000) then write(8, '(a,i7/(2f9.3 ))') 'read ',n,(y(i),q(i), i=1, n) c More than 2000, decimate the middle section else write(8,'(a/(2f9.4))') 'read 100',(y(i),q(i),i=1,100) write(8,'(a/a/( 2f9.4 ))') 'symbol 4 0.03','read 100', $ (y(i),q(i),i=n-99,n) write(8,'(a/a,i7/(2f9.4))')'symbol 15 .04', $ 'read',(n-200)/10, (y(i),q(i),i=101,n-99, 10) endif write(8,'(a)') 'plot','stop' c c print '(/a)', ' Now type plotxy < qq.p' return end c_______________________________________________________________________ subroutine qqplot(n, x, qinv, key) c$$$$$ calls sort, qsnorm dimension x(n), qinv(n), key(n) ce call sort(x, key, n) dx=1.0/(n + 1) do 1100 i=1, n 1100 qinv(i)=qsnorm(i*dx) return end c_______________________________________________________________________ function qsnorm(p) c$$$$$ calls no other routines c Finds inverse cumulative normal probabilty function. That is c if q(x)=p, where q is the cumulative Gaussian function, c finds x for the given p. c Notice 0 .lt. p .lt. 1. if not, qsnorm is returned as 10**6, c which is so large it cannot be achieved with any computer- c representable data. ce qsnorm=1e6 if (p .le. 0.0 .or. p .ge. 1.0) return c x=0.0 d=p if (d .gt. 0.5) d=1.0 - p s=-2.0*alog(d) t=sqrt(s) x=t - (2.515517 +t*(0.802853 +t*0.010328))/ $ (1.0 + t*(1.432788 + t*(0.189269+ t*0.001308))) if (p .lt. 0.5) x=-x qsnorm=x return end c_______________________________________________________________________ subroutine sort(x, key, no) c$$$$ calls no other routines c In-place rapid sorter dimension x(no), key(no) ce do 1 i=1,no 1 key(i)=i mo=no 2 if (mo-15) 21,21,23 21 if (mo.le.1) return mo=2*(mo/4) + 1 go to 24 23 mo=2*(mo/8) + 1 24 ko=no - mo jo=1 25 i=jo 26 if (x(i) - x(i+mo)) 28,28,27 27 temp=x(i) x(i)=x(i+mo) x(i+mo)=temp kemp=key(i) key(i)=key(i+mo) key(i+mo)=kemp i=i - mo if (i-1) 28,26,26 28 jo=1 + jo if (jo-ko) 25,25,2 end c______________________________________________________________ * subroutine sketch(n, x, y) *c$$$$ calls no other routines *c Given arrays x, y of length n, printer plots them *c as pluses. Used to provide a quick and dirty QQ plot * character*61 pix * dimension x(n),y(n),ii(62) * data nx/61/,nz/21/, ii(1)/0/ *c * sz=(nz-1)/(y(n) - y(1)) * sx=(nx-1)/(x(n) - x(1)) *c *c Scan down the page a line at a time * print '(1x//2x,80a1)',('-',i=1,nx) * do 1500 l=0, nz-1 * pix=' ' * k=1 * do 1200 i=1, n * if (ifix(0.1 + sz*(y(n)-y(i))) .eq. l) then * k=k + 1 * ii(k)=sx*(x(i)-x(1)) + 1.1 * if (ii(k) .eq. ii(k-1)) k=k - 1 * endif * 1200 continue *c Place symbols in the output line * do 1300 kk=2, k * jj=ii(kk) * pix(jj:jj)='+' * 1300 continue * print '(1x,3a)','|',pix,'|' * 1500 continue * print '(2x,80a1)',('-',i=1,nx) * return * end *c______________________________________________________________________ subroutine kstest(n, x, ftheo, dn, conf) c$$$$ calls ftheo, kolsmi, pks c Carries out the Kolmogorov-Smirnov test based on a sample array x c and a theoretical cumulative distribution. c For an elementary exposition see F. J. Massey, c 'The Kolmogorov-Smirnov test for goodness of fit' J. Am. Stat. Assoc. c 1951, pp68-78. c c n the number of samples in the array. c x array of samples of random variable. c ftheo a function (declared external in calling routine) that gives c theoretical distribution. Specifically, ftheo(x)=probability c that a member of the random population is less than x. c dn the Kolmogorov-Smirnov statistic for the sample, given ftheo. c conf one of 6 values: 0, 80, 85,90,95, 95 indicating percent c confidence for rejecting the theoretical distribution. The c value given in a lower bound, so 0 merely indicates the c confidence is not 80 or higher. c With iprint=2 prints a table of ordered data, and the empirical and c theoretical distribution functions. To print less set iprint=0, c (no printing) or 1 (a summary). c external ftheo dimension x(n), dtest(5), crit(5) data crit/80.0, 85.0, 90.0, 95.0, 99.0/ c c Data are assumed to be in ascending order. c c Find maximum distance between theoretical distribution and discrete c distribution based on sample. ce delta=1.0/n dn=0 do 1500 k=1, n f0=ftheo(x(k)) f1=(k-1)*delta f2=k*delta dt=max(abs(f1 - f0), abs(f2 - f0)) if (dt .gt. dn) then mark=k dn=dt endif 1500 continue c c Test the maximum distance against the critical values. call kolsmi(n, dtest) conf=0 do 1600 l=1, 5 if (dn .ge. dtest(l)) conf=crit(l) 1600 continue c Print probability of null hypo call pks(sqrt(n*dn*dn)) c c Print some stuff out. print '(/ 3(a, 5f10.3/))', $ ' Critical d values at P=', .80,.85,.90,.95,.99, $ ' are ', dtest, $ ' Actual d statistic is ', dn if (conf .eq. 0) print '(a/)',' Theoretical distribution accepted' if (conf .gt. 0) print '(a, f4.0/)', $ ' Theoretical distribution rejected with % confidence >=',conf c return end c_______________________________________________________________________ subroutine pks(x) c$$$$$ calls no other routines c If x=sqrt(N)*dN, pks gives the approximate probability that the c Kolmogorov-Smirnov statistic exceeds observed dN by chance in c a population of N members. Prints the result. common /chance/ prob ce prob=1.0 if (x .le. 0.22) return d=sqrt(2.0)*x z=d s=0.0 do 1100 j=1, 100 if (z .gt. 7.0) goto 1200 s=exp(-z**2) - s z=d + z 1100 continue 1200 prob=2.0*abs(s) if (prob .gt. 0.0) then print '(a,1p,g12.2)', $' Probability of hypothesis approximately',prob else al=0.301 - 0.868*x**2 n=int(al-1.0) print '(a,f4.2,a,i4)', $' Probability of hypothesis approximately ', $ 10.0**(al-n),' x10**',n prob=1.0e-20 endif return end c_______________________________________________________________________ subroutine kolsmi(n, dalpha) c$$$$$ calls no other routines c Reproduces the table of confidence values given by F. J. Massey in c 'The Kolmogorov-Smirnov test for goodness of fit' J. Am. Stat. Assoc. c 1951, pp68-78. Accuracy believed better than 1 percent everywhere. c n size of sample c dalpha a 5-vector of critical values of d the K-S statistic for c significance levels 0.2, 0.15, 0.1, 0.05, 0.01 that the c proposed distribution function is acceptable. c dimension dalpha(5),dsmall(5,2) data dsmall/.900,.925,.950,.975,.995, $ .684,.726,.776,.842,.929/ ce if (n .eq. 1 .or. n .eq. 2) then do 1000 i=1, 5 dalpha(i)=dsmall(i,n) 1000 continue return c c General case for n .gt. 2 else s=sqrt(1.0/n) dalpha(1)=s*(1.07 - s*0.16174) dalpha(2)=s*(1.14 - s*0.18092) dalpha(3)=s*(1.22 - s*(0.14952 + s*0.06221)) dalpha(4)=s*(1.36 - s*(0.17437 + s*0.09194)) dalpha(5)=s*(1.63 - s*(0.15008 + s*0.33641)) return endif end c_______________________________________________________________________ function gauss(y) c$$$$$ calls no other routines c Cumulative normal distribution of the variable y, with mean ybar, c standard deviation sigma, which must be supplied in common /gparam/ c Uses expression 7.1.26, p299 Abramowitz & Stegun. c Accuracy better than 1.5e-7 absolute. c For more accurate results, continued fraction is probably best. c common /gparam/ ybar,sigma ce x=(y - ybar)/(1.41421356*sigma) t=1.0/(1.0 + .3275911*abs(x)) erf=1.0 - exp(-x*x)*t*(.254829592-t*(.284496736-t*(1.421413741- $ t*(1.453152027-t*1.061405429)))) gauss=0.5*(1.0 + sign(erf, x)) return end c_______________________________________________________________________