program xxx
      real abnd(112,2001) 
     *    ,bbnd(224,2001)
     *    ,cbnd(224,2001)
     *    ,rhs1(2001)   
     *    ,rhs2(2001)  
      common /placem/ abnd,bbnd,cbnd

      integer mh(16) 
      data mh / 1,2,3,5,8, 10,12,15,20,31 ,32,33,48,72,96 ,110/
      n=100
         do 200 im=1,13
         if ( im.eq.10 ) n=2750
         if ( im.eq.13 ) n=3600
         m1=mh(im)
         m2=2*m1
         write(6,1) m2,n
1        format(' problem size  m,n',2i6)

      do 2 j=1,112*2001
2     abnd(j,1)=0.
      do 3 j=1,224*2001
3     bbnd(j,1)=0.

         do 5 i=1,n  
5        abnd(m1+1,i)=2.5+ranf()
      s=.75
         do 10 l=m1,1,-1
            do 8 i=1,n
8           abnd(l,i)=s*(ranf()-.4)
10       s=.8*s
         do 15 i=1,m1
         do 15 j=1,m1+1-i
15       abnd(j,i)=0.
      ta=second()
      write(6,*)' debug: about to call symmsq'
      call symmsq( abnd,112 ,n,m1 ,bbnd,224,mc )
      write(6,16) second()-ta
16    format(' formation time=',f8.3)

         do 30 j=1,224*2001
30       cbnd(j,1)=bbnd(j,1)

         do 35 j=1,n
         rhs1(j)=ranf()-.5
35       rhs2(j)=rhs1(j)

      write(6,*)' debug: about to call bandc'
      t1=second()
      call bandc ( bbnd,224,n,m2  ,infox )
      write(6,*)' debug: about to call spbfa'
      t2=second()
      call spbfa(  cbnd,224,n,m2  ,infol )
      t3=second()
      tt=t2-t1
      t3=t3-t2
      speed=t3/tt
      write(6,38) infox,tt  ,infol,t3  ,speed
38    format(' bndchl   info,time',i6,f12.6,/,
     1       ' spbfa    info,time',i6,f12.6 ,5x,' ratio',f8.3)

      flops= n*(  m2*(m2+1) +m2 +16 )
      rate=1.e-6*flops/tt
      write(6,44) rate
44    format(' rate=',g16.6)

      call bndchls( bbnd,224,n,m2  ,rhs1,rhs1 )
      call spbsl  ( cbnd,224,n,m2  ,rhs2 )

      siz=0.
      errs=0.
         do 45 j=1,n
         errs=errs+(rhs1(j)-rhs2(j))**2
45       siz=siz+rhs1(j)**2
      write(6,46) errs,siz
46    format(' errs,size',2g16.7,/,1x,50(2h-+))
***   call perfset(6)
200   continue

      stop
      end

      subroutine getrow( a,lda ,n,m ,i ,row ,l1,l2)
      real a(lda,n) ,row(n)

c     --- gets the ith row of a symmetric banded matrix ---

c      if ( numarg().eq.8 ) then
         k1=l1
         k2=l2
c      else
c         k1=1
c         k2=n
c      endif

         do 7 j=k1,k2
7        row(j)=0.

         do 2 j=0,min0(m,i-1)  
2        row(i-j)=a(m+1-j,i)


         do 3 j=1,min0(m,n-i)  
3        row(i+j)=a(m+1-j,i+j)

      return
      end

      subroutine putval( a,lda ,n,m ,i,j ,val )
      real a(lda,n)

c     --- stores an element into a symmetric band store matrix ---

**?   if ( iabs(i-j).gt.m ) return
**?   if ( i.lt.1 .or. j.lt.1 .or. i.gt.n .or. j.gt.n ) return
      ii=max0(i,j)
c     jj=iabs(i-j)
      a(m+1-iabs(i-j),ii)=val
      return
      end

      subroutine symmsq( a,lda,n,m ,as,ldas,ms )
      real a(lda,n) ,as(ldas,n)
     *  ,r(10000),c(10000)
      common /qqq/ aa(200,200),bb(200,200)

c   --- squares the symmetric matrix a ---

      ms=2*m
         write(6,*)'debug: symmsq: do 1'
         do 1 j=1,ms+1
         do 1 i=1,n
1        as(j,i)=0.

         write(6,*)'debug: symmsq: do 10'
         do 10 i=1,n
         call getrow( a,lda,n,m ,i,r )
      k1=max(1,i-m-1)
      k2=min(n,i+m+1)

         do 10 j=max0(1,i-ms),i
         call getrow( a,lda,n,m ,j,c ,k1,k2)

         s=0.
         write(6,*)'debug: symmsq: do 5'
            do 5 k=k1,min(k2,j+m+1)
cccc        do 5 k=k1,k2
5           s=s+r(k)*c(k)

         call putval( as,ldas,n,ms ,i,j ,s )
10       continue

      return
      end
c   file bandc forfor

      subroutine bandc ( a,lda ,n,m ,info )
      real a(lda,n)
*dir$ vfunction sxmups ]fortran only version.
      real rowi(1000)

c     --- matrix decomposition for banded positive definite matrices
c     --- ( this routine, and it's partner bndchls can be used to
c     --- accomplish the same thing as linpack's spbfa,spbsl -note
c     --- however that bandc&bndchls store d**(-1/2) on the diagnol
c     --- whereas linpack stores d**(1/2)
c     --- note: data is most frequently accessed stride (lda-1)
c     --- avoid lda = l*2**q+1 : q >2

         do 50 i=1,m

c        --- get rowi ---

         j1=max0(m+2-i,1)
            do 10 j=j1,m
10          rowi(j)=-a(j,i)

         nrows=min0(m+1,n+1-i)
            do 30 jj=j1,m
cdir$ ivdep
            do 30 ii=1,min0(jj,nrows)
30          a(m+2-ii,i+ii-1)=a(m+2-ii,i+ii-1)
     *      +rowi(jj)*a(jj+1-ii,i-1+ii)

      info=i
      if(a(m+1,i).le.0. ) go to 60
         a(m+1,i)=1./sqrt(a(m+1,i))
            do 40 ii=1,nrows-1
40          a(m+1-ii,i+ii)=a(m+1-ii,i+ii)*a(m+1,i)

50       continue

      j1=1
      nrows=     m+1

      if ( m.le.64 ) then
         do 100 i=m+1,n-m
        a(m+1,i)=sxmupf( a(1,i),a(1,i),a(m+1,i),lda-1,m )
c         a(m+1,i)=sxmups( loc(a(1,i)),loc(a(1,i)),loc(a(m+1,i))
c     *                    ,lda-1,m )
         info=i
         if ( a(m+1,i).le.0. ) goto 60
         a(m+1,i)=1./a(m+1,i)
cdir$ ivdep
            do 100 ii=1,m
100         a(m+1-ii,i+ii)=a(m+1-ii,i+ii)*a(m+1,i)
      else
         do 150 i=m+1,n-m
c        --- get rowi ---

            do 110 j=j1,m
110         rowi(j)=-a(j,i)

c            do 130 jj=j1,m
c            do 130 ii=1,nrows
c130         if ( jj+1-ii .gt. 0 )
c     *      a(m+2-ii,i+ii-1)=a(m+2-ii,i+ii-1)
c     *      +rowi(jj)*a(jj+1-ii,i-1+ii)
         call mxmup( a(1,i),lda-1 ,rowi,1 ,a(m+1,i),lda-1 ,m )

      info=i
      if(a(m+1,i).le.0. ) go to 60
         a(m+1,i)=1./sqrt(a(m+1,i))
            do 140 ii=1,nrows-1
140         a(m+1-ii,i+ii)=a(m+1-ii,i+ii)*a(m+1,i)

150      continue
      endif

         do 250 i=n+1-m,n

c        --- get rowi ---

         j1=max0(m+2-i,1)
            do 210 j=j1,m
210         rowi(j)=-a(j,i)

         nrows=min0(m+1,n+1-i)
            do 230 jj=j1,m
cdir$ ivdep
            do 230 ii=1,min0(jj,nrows)   
230         if ( jj+1-ii .gt. 0 )
     *      a(m+2-ii,i+ii-1)=a(m+2-ii,i+ii-1)
     *      +rowi(jj)*a(jj+1-ii,i-1+ii)

      info=i
      if(a(m+1,i).le.0. ) go to 60
         a(m+1,i)=1./sqrt(a(m+1,i))
            do 240 ii=1,nrows-1
240         a(m+1-ii,i+ii)=a(m+1-ii,i+ii)*a(m+1,i)

250      continue
      info=0
60    return
      end
      subroutine bndchls( a,lda ,n,m ,x,b )
      real a(lda,n) ,x(n),b(n)

      if ( loc(x).ne.loc(b) ) then
         do 1 j=1,n
1        x(j)=b(j)
      endif

         do 20 k=1,n 
         x(k)=x(k)*a(m+1,k)
cdir$ ivdep
            do 10 i=k+1,min0(k+m,n)
10          x(i)=x(i)-x(k)*a(m+k+1-i,i)
20       continue

         do 40 k=n,1,-1
         x(k)=x(k)*a(m+1,k)
cdir$ ivdep
            do 30 i=1,min0(k-1,m)
30          x(k-i)=x(k-i)-x(k)*a(m+1-i,k)
40       continue

      return
      end

c /// fortran versions of assembly language kernals
c      function sxmups ( pq,pr,py,ldy ,m )
c      integer pq,pr,py
c      real p(*),r(*),q(*)
c      pointer ( pq,q ),( pr,r ),( py,y )
c      sxmups = sxmupf ( q,r,y,ldy ,m )
c      return
c      end
      function sxmupf( q,r,y,ldy ,m )
      real q(ldy,m),r(m),y(ldy,m)
c /// specialized m in range (1,64) only (incorrect results otherwise
         do 10 j=m,1,-1
         do 10 i=1,j
10       y(1,i)=y(1,i) -r(j)*q(j,i)
      sxmupf=y(1,1)
      if ( sxmupf.gt.0 ) sxmupf=sqrt(sxmupf)
      return
      end
      subroutine mxmup ( a,lda ,r,ldr ,y,ldy ,m )
      real a(lda,m),r(ldr,m),y(ldy,m)

         do 10 i=1,m
         do 10 j=1,m
10       if ( j.ge.i ) y(1,i)=y(1,i)+ r(1,j)*a(j,i)

      return
      end
      function ranf()
      real ranf
c
      data init /1325/
            init = mod(3125*init,65536)
            ranf = (init - 32768.0)/16384.0
      return
      end

.