# To unbundle, sh this file echo README 1>&2 cat >README <<'End of README' To build the library needed to run SCHED type: make sched; make graph make sched will produce the SCHED library in file sched.a . This is the standard libarary for SCHED. make graph will produce the trace version of SCHED in file graph.a . This version produces output which can be viewed using the Schedule Trace Facility. To run some tests type: make xeig; xeig The will make and run a test program which computers the eigenvalues of a symmetric tridiagonal eigenvalue problem using a divide and conquer technique. make geig; geig This does the same as about only uses the graph lib, which produces a trace file. A complete make and can be carried out by doing: sh testrun > testout diff oldtest testout End of README echo chekin.f 1>&2 cat >chekin.f <<'End of chekin.f' subroutine chekin(jobtag) CVD$R NOCONCUR integer jobtag c*********************************************************************** c c this subroutine reports unit of computation labeled by c jobtag has completed to all dependent nodes. these nodes are c recorded in parmq(i,jobtag) where 6 .le. i .le. nchks+5 c checkin consists of decrementing the value in each of these c locations by 1. each of these is done in a critical section c protected by qlock(ichek) c c if the value in parmq(2,ichek) is 0 where ichek is a process c dependent upon this one then ichek is placed on the readyq c by entering the critical section protected by rtlock. the c pointer rtail to the tail of the readyq is incremented c unless task done is to be recorded. task done is placed on c the ready q and the pointer rtail left in place if nchks .eq. 0 c is found. c c see the common block description in libopn for more detail. c c*********************************************************************** parameter (mxprcs = 1000,iprcs = 120,mxces = 4,nslots = 30) parameter (nbuffr = 500) integer parmq,readyq,qlock,hrlock,trlock,phead,intspn,rhead,rtail, & done,qtail,qtlock common /qdata/ parmq(nslots,mxprcs),phead,intspn, & readyq(iprcs,mxces),rhead(mxces),rtail(mxces), & qtail common /qsync/ qlock(mxprcs),hrlock(mxces),trlock(mxces), & done,qtlock cgraph integer endgrf cgraph real igraph cgraph common /gphout/ endgrf,igraph(nslots,nbuffr) c c c common block description: c c a complete common block description is given in the routine libopn c c**************************************************************************** c c check to see if this process has completed. if not do not checkin c c first ask if any kids spawned by jobtag c if (parmq(4,jobtag) .ne. 0 .or. parmq(5,jobtag) .ne. 0 ) then c c either kids have been spawned or ientry has been referenced c indicating reentry is required c c c find out how many are waiting to complete c if (parmq(4,jobtag) .ne. 0) then call lockon(qlock(jobtag)) parmq(2,jobtag) = parmq(2,jobtag) - parmq(4,jobtag) call lockoff(qlock(jobtag)) endif c c reset number of kids c parmq(4,jobtag) = 0 c c update the number of times this procedure has been c entered c parmq(1,jobtag) = parmq(1,jobtag) + 1 c c return without checkin if all the kids have not c checked in to jobtag yet or if a reentry is required. c process jobtag is not done in either case. c if (parmq(2,jobtag) .ne. 0 ) return c c if ientry has been called but jobtag is not waiting c on any kids then jobtag is placed back on the readyq c if ( parmq(5,jobtag) .ne. 0) then iwrkr = mod(jobtag,mxces) + 1 call lockon(trlock(iwrkr)) readyq(rtail(iwrkr),iwrkr) = jobtag rtail(iwrkr) = rtail(iwrkr) + 1 call lockoff(trlock(iwrkr)) return endif endif c c the process has completed so chekin proceeds c cgraph call lockon(qlock(mxprcs)) cgraph if (endgrf .gt. nbuffr) call dump(endgrf,igraph) cgraph insrt = endgrf cgraph endgrf = endgrf + 1 cgraph call lockoff(qlock(mxprcs)) cgraph if (jobtag .ge. intspn) then cgraph igraph(1,insrt) = 5 cgraph igraph(2,insrt) = parmq(6,jobtag) cgraph igraph(3,insrt) = jobtag cgraph igraph(4,insrt) = second(foo) cgraph else cgraph igraph(1,insrt) = 2 cgraph igraph(2,insrt) = jobtag cgraph igraph(3,insrt) = second(foo) cgraph endif c nchks = parmq(3,jobtag) c c if this is the final process (indicated by nchks .eq. 0) then c record task done. do not advance the tail so task done sequence c is set. all subsequent gtprb queries will leave rhead .eq. rtail c with readyq(rhead) .eq. done. c if (nchks .eq. 0) then do 20 iwrkr = 1,mxces call lockon(trlock(iwrkr)) readyq(rtail(iwrkr),iwrkr) = done call lockoff(trlock(iwrkr)) return 20 continue endif do 50 j = 6,nchks+5 mychek = parmq(j,jobtag) c c get unique access to the checkin node mychek c and checkin by decrementing the appropriate counter c mchkgo = 1 call lockon(qlock(mychek)) parmq(2,mychek) = parmq(2,mychek) - 1 mchkgo = parmq(2,mychek) call lockoff(qlock(mychek)) c c place mychek on readyq if parmq(2,mychek) is 0 c if (mchkgo .eq. 0 ) then iwrkr = mod(mychek,mxces) + 1 call lockon(trlock(iwrkr)) readyq(rtail(iwrkr),iwrkr) = mychek rtail(iwrkr) = rtail(iwrkr) + 1 call lockoff(trlock(iwrkr)) endif 50 continue return c c last card of chekin c end End of chekin.f echo d_and_c.f 1>&2 cat >d_and_c.f <<'End of d_and_c.f' integer a(256),klevl(8),myid(256) external top write(6,*) ' input nprocs nlevls ' read (5,*) nprocs,nlevls do 50 j = 1,8 klevl(j) = j 50 continue call sched(nprocs,top,a,nlevls,klevl,myid) do 100 j = 1,2**nlevls-1 write(6,*) a(j) 100 continue stop end subroutine top(a,nlevls,klevl,myid) integer a(*),klevl(*),myid(*) external split c write(6,*) ' from top ' , a(1) jobtag = 1 icango =0 nchks = 0 myid(1) = 1 a(1) = 1 c call dep(jobtag,icango,nchks,mychkn) call putq(jobtag,split,myid,a,nlevls,klevl) c return end subroutine split(myid,a,nlevls,klevl) integer a(*),klevl(*),myid(*),rnode external clone c write(6,*) ' from split ',a(1) c if (klevl(1) .ge. nlevls) return c lnode = 2*a(1) rnode = lnode + 1 indx = lnode - a(1) + 1 a(indx) = lnode a(indx+1) = rnode mytag = myid(a(1)) c call nxtag(mytag,jobtag) myid(lnode) = jobtag call spawn(mytag,jobtag,clone,myid,a(indx),nlevls,klevl(2)) c call nxtag(mytag,jobtag) myid(rnode) = jobtag call spawn(mytag,jobtag,clone,myid,a(indx+1),nlevls,klevl(2)) c return end End of d_and_c.f echo data 1>&2 cat >data <<'End of data' 4 1 End of data echo dep.f 1>&2 cat >dep.f <<'End of dep.f' subroutine dep(jobtag,icango,nchks,mychkn) CVD$R NOCONCUR integer jobtag,icango,nchks,mychkn(*) c************************************************************************* c c warning - this routine may only be used by driver in a static definition c of the data dependencies in the task. c c c usage c subroutine xxx() c external yyy c . c . c . c call dep(jobtag,icango,nchks,mychkn) c call putq(jobtag,yyy,) c . c . c . c c this subroutine puts data dependencies for problem on the queue. c no synchronization is necessary because each index of a column of c parmq is associated with a jobtag specified by the user and c associated with a unique unit of computation. the arguments of c dep specify a the data dependencies associated with the unit of c computation labeled by jobtag and are placed in a column of parmq c to the menue specified below. c c jobtag is an integer specifying a unique schedulable process c c c icango is a positive integer specifying how many processes must check in c to this process before it can be placed on the readyq. c c nchks is the number of processes that depend upon the completion of c this process. c c mychkn is an integer array specifying the jobtags of the processes c which depend upon completion of this process. c c************************************************************************* c parameter (mxprcs = 1000,iprcs = 120,mxces = 4,nslots = 30) parameter (nbuffr = 500) integer parmq,readyq,qlock,hrlock,trlock,phead,intspn,rhead,rtail, & done,qtail,qtlock common /qdata/ parmq(nslots,mxprcs),phead,intspn, & readyq(iprcs,mxces),rhead(mxces),rtail(mxces), & qtail common /qsync/ qlock(mxprcs),hrlock(mxces),trlock(mxces), & done,qtlock cgraph integer endgrf cgraph real igraph cgraph common /gphout/ endgrf,igraph(nslots,nbuffr) c c c common block description: c c for a complete common block description see the subroutine libopn c c c place process jobtag on the problem queue c no synchronization required to update qtail since c only one program work executes this code. c if ((jobtag .le. 0 .or. jobtag .gt. mxprcs) .or. & icango .lt. 0 .or. nchks .lt. 0) then write(6,*) '*************SCHED USER ERROR********************' write(6,*) ' incorrect specification of dependencies ' write(6,*) ' all DEP parameters must be non-negative' write(6,*) ' input was ' write(6,*) ' jobtag ',jobtag ,'.... must be postitive ' write(6,*) ' but less than ',mxprcs write(6,*) ' icango ',icango write(6,*) ' nchks ',nchks write(6,*) ' ' write(6,*) ' EXECUTION TERMINATED BY SCHED ' cgraph call dump(endgrf,igraph) stop c endif qtail = qtail + 1 next = jobtag parmq(1,next) = 1 parmq(2,next) = icango parmq(3,next) = nchks parmq(4,next) = 0 c c check to see that exactly one node has ncheks set to 0 c if (nchks .eq. 0 .and. done .eq. 0) then done = -2 else if (nchks .eq. 0) done = 0 endif c c specify identifiers of processes which depend on this one c if there are too many abort c if (nchks .gt. nslots - 5) then write(6,*) '*************SCHED USER ERROR********************' write(6,*) ' attempt to place too many dependencies ' write(6,*) ' on chekin list during call to dep ' write(6,*) ' with jobtag ',jobtag write(6,*) ' ' write(6,*) ' user tried to place ',nchks ,' dependencies ' write(6,*) ' the maximum number is ',nslots - 5 write(6,*) ' ' write(6,*) ' EXECUTION TERMINATED BY SCHED ' cgraph call dump(endgrf,igraph) stop c endif do 50 j = 1,nchks parmq(j+5,next) = mychkn(j) c if (mychkn(j) .le. 0) then write(6,*) '*************SCHED USER ERROR********************' write(6,*) ' incorrect specification of dependencies ' write(6,*) ' all mychkn entries must be positive' write(6,*) ' input was ' write(6,*) ' mychkn(',j,') = ',mychkn(j) write(6,*) ' ' write(6,*) ' EXECUTION TERMINATED BY SCHED ' cgraph call dump(endgrf,igraph) stop endif c 50 continue cgraph call lockon(qlock(mxprcs)) cgraph if (endgrf .gt. nbuffr) call dump(endgrf,igraph) cgraph insrt = endgrf cgraph endgrf = endgrf + 1 cgraph call lockoff(qlock(mxprcs)) cgraph igraph(1,insrt) = 0 cgraph igraph(2,insrt) = jobtag cgraph igraph(3,insrt) = icango cgraph igraph(4,insrt) = nchks cgraph do 9001 jnsrt = 6,nchks + 5 cgraph igraph(jnsrt-1,insrt) = parmq(jnsrt,next) cgraph 9001 continue c return c c last card of dep c end End of dep.f echo dump.f 1>&2 cat >dump.f <<'End of dump.f' subroutine dump(endgrf,igraph) parameter (nslots = 30,nbuffr = 500) integer endgrf real igraph(nslots,nbuffr) integer ievent(nslots) c c this routine writes graphics output to a file c and resets endgrf to 1 c c do 100 j = 1,endgrf c write(6,15) (igraph(i,j),i = 1,nslots) c 100 continue c 15 format('dump: igraph',30f5.1) do 300 j = 1,endgrf-1 do 302 i = 1,nslots ievent(i) = igraph(i,j) 302 continue if( ievent(1) .eq. 0 ) $ write(3,301) (ievent(i),i=1,ievent(4)+4) if( ievent(1) .eq. 1 ) $ write(3,303) (ievent(i),i=1,2),igraph(3,j) if( ievent(1) .eq. 2 ) $ write(3,303) (ievent(i),i=1,2),igraph(3,j) if( ievent(1) .eq. 3 ) $ write(3,301) (ievent(i),i=1,3) if( ievent(1) .eq. 4 ) $ write(3,304) (ievent(i),i=1,3),igraph(4,j) if( ievent(1) .eq. 5 ) $ write(3,304) (ievent(i),i=1,3),igraph(4,j) 301 format(14i8) 303 format(2i8,1pe16.8) 304 format(3i8,1pe16.8) 300 continue c endgrf = 1 c return end End of dump.f echo graph.chekin.f 1>&2 cat >graph.chekin.f <<'End of graph.chekin.f' subroutine chekin(jobtag) CVD$R NOCONCUR integer jobtag c*********************************************************************** c c this subroutine reports unit of computation labeled by c jobtag has completed to all dependent nodes. these nodes are c recorded in parmq(i,jobtag) where 6 .le. i .le. nchks+5 c checkin consists of decrementing the value in each of these c locations by 1. each of these is done in a critical section c protected by qlock(ichek) c c if the value in parmq(2,ichek) is 0 where ichek is a process c dependent upon this one then ichek is placed on the readyq c by entering the critical section protected by rtlock. the c pointer rtail to the tail of the readyq is incremented c unless task done is to be recorded. task done is placed on c the ready q and the pointer rtail left in place if nchks .eq. 0 c is found. c c see the common block description in libopn for more detail. c c*********************************************************************** parameter (mxprcs = 1000,iprcs = 120,mxces = 4,nslots = 30) parameter (nbuffr = 500) integer parmq,readyq,qlock,hrlock,trlock,phead,intspn,rhead,rtail, & done,qtail,qtlock common /qdata/ parmq(nslots,mxprcs),phead,intspn, & readyq(iprcs,mxces),rhead(mxces),rtail(mxces), & qtail common /qsync/ qlock(mxprcs),hrlock(mxces),trlock(mxces), & done,qtlock integer endgrf real igraph common /gphout/ endgrf,igraph(nslots,nbuffr) c c c common block description: c c a complete common block description is given in the routine libopn c c**************************************************************************** c c check to see if this process has completed. if not do not checkin c c first ask if any kids spawned by jobtag c if (parmq(4,jobtag) .ne. 0 .or. parmq(5,jobtag) .ne. 0 ) then c c either kids have been spawned or ientry has been referenced c indicating reentry is required c c c find out how many are waiting to complete c if (parmq(4,jobtag) .ne. 0) then call lockon(qlock(jobtag)) parmq(2,jobtag) = parmq(2,jobtag) - parmq(4,jobtag) call lockoff(qlock(jobtag)) endif c c reset number of kids c parmq(4,jobtag) = 0 c c update the number of times this procedure has been c entered c parmq(1,jobtag) = parmq(1,jobtag) + 1 c c return without checkin if all the kids have not c checked in to jobtag yet or if a reentry is required. c process jobtag is not done in either case. c if (parmq(2,jobtag) .ne. 0 ) return c c if ientry has been called but jobtag is not waiting c on any kids then jobtag is placed back on the readyq c if ( parmq(5,jobtag) .ne. 0) then iwrkr = mod(jobtag,mxces) + 1 call lockon(trlock(iwrkr)) readyq(rtail(iwrkr),iwrkr) = jobtag rtail(iwrkr) = rtail(iwrkr) + 1 call lockoff(trlock(iwrkr)) return endif endif c c the process has completed so chekin proceeds c call lockon(qlock(mxprcs)) if (endgrf .gt. nbuffr) call dump(endgrf,igraph) insrt = endgrf endgrf = endgrf + 1 call lockoff(qlock(mxprcs)) if (jobtag .ge. intspn) then igraph(1,insrt) = 5 igraph(2,insrt) = parmq(6,jobtag) igraph(3,insrt) = jobtag igraph(4,insrt) = second(foo) else igraph(1,insrt) = 2 igraph(2,insrt) = jobtag igraph(3,insrt) = second(foo) endif c nchks = parmq(3,jobtag) c c if this is the final process (indicated by nchks .eq. 0) then c record task done. do not advance the tail so task done sequence c is set. all subsequent gtprb queries will leave rhead .eq. rtail c with readyq(rhead) .eq. done. c if (nchks .eq. 0) then do 20 iwrkr = 1,mxces call lockon(trlock(iwrkr)) readyq(rtail(iwrkr),iwrkr) = done call lockoff(trlock(iwrkr)) return 20 continue endif do 50 j = 6,nchks+5 mychek = parmq(j,jobtag) c c get unique access to the checkin node mychek c and checkin by decrementing the appropriate counter c mchkgo = 1 call lockon(qlock(mychek)) parmq(2,mychek) = parmq(2,mychek) - 1 mchkgo = parmq(2,mychek) call lockoff(qlock(mychek)) c c place mychek on readyq if parmq(2,mychek) is 0 c if (mchkgo .eq. 0 ) then iwrkr = mod(mychek,mxces) + 1 call lockon(trlock(iwrkr)) readyq(rtail(iwrkr),iwrkr) = mychek rtail(iwrkr) = rtail(iwrkr) + 1 call lockoff(trlock(iwrkr)) endif 50 continue return c c last card of chekin c end End of graph.chekin.f echo graph.dep.f 1>&2 cat >graph.dep.f <<'End of graph.dep.f' subroutine dep(jobtag,icango,nchks,mychkn) CVD$R NOCONCUR integer jobtag,icango,nchks,mychkn(*) c************************************************************************* c c warning - this routine may only be used by driver in a static definition c of the data dependencies in the task. c c c usage c subroutine xxx() c external yyy c . c . c . c call dep(jobtag,icango,nchks,mychkn) c call putq(jobtag,yyy,) c . c . c . c c this subroutine puts data dependencies for problem on the queue. c no synchronization is necessary because each index of a column of c parmq is associated with a jobtag specified by the user and c associated with a unique unit of computation. the arguments of c dep specify a the data dependencies associated with the unit of c computation labeled by jobtag and are placed in a column of parmq c to the menue specified below. c c jobtag is an integer specifying a unique schedulable process c c c icango is a positive integer specifying how many processes must check in c to this process before it can be placed on the readyq. c c nchks is the number of processes that depend upon the completion of c this process. c c mychkn is an integer array specifying the jobtags of the processes c which depend upon completion of this process. c c************************************************************************* c parameter (mxprcs = 1000,iprcs = 120,mxces = 4,nslots = 30) parameter (nbuffr = 500) integer parmq,readyq,qlock,hrlock,trlock,phead,intspn,rhead,rtail, & done,qtail,qtlock common /qdata/ parmq(nslots,mxprcs),phead,intspn, & readyq(iprcs,mxces),rhead(mxces),rtail(mxces), & qtail common /qsync/ qlock(mxprcs),hrlock(mxces),trlock(mxces), & done,qtlock integer endgrf real igraph common /gphout/ endgrf,igraph(nslots,nbuffr) c c c common block description: c c for a complete common block description see the subroutine libopn c c c place process jobtag on the problem queue c no synchronization required to update qtail since c only one program work executes this code. c if ((jobtag .le. 0 .or. jobtag .gt. mxprcs) .or. & icango .lt. 0 .or. nchks .lt. 0) then write(6,*) '*************SCHED USER ERROR********************' write(6,*) ' incorrect specification of dependencies ' write(6,*) ' all DEP parameters must be non-negative' write(6,*) ' input was ' write(6,*) ' jobtag ',jobtag ,'.... must be postitive ' write(6,*) ' but less than ',mxprcs write(6,*) ' icango ',icango write(6,*) ' nchks ',nchks write(6,*) ' ' write(6,*) ' EXECUTION TERMINATED BY SCHED ' call dump(endgrf,igraph) stop c endif qtail = qtail + 1 next = jobtag parmq(1,next) = 1 parmq(2,next) = icango parmq(3,next) = nchks parmq(4,next) = 0 c c check to see that exactly one node has ncheks set to 0 c if (nchks .eq. 0 .and. done .eq. 0) then done = -2 else if (nchks .eq. 0) done = 0 endif c c specify identifiers of processes which depend on this one c if there are too many abort c if (nchks .gt. nslots - 5) then write(6,*) '*************SCHED USER ERROR********************' write(6,*) ' attempt to place too many dependencies ' write(6,*) ' on chekin list during call to dep ' write(6,*) ' with jobtag ',jobtag write(6,*) ' ' write(6,*) ' user tried to place ',nchks ,' dependencies ' write(6,*) ' the maximum number is ',nslots - 5 write(6,*) ' ' write(6,*) ' EXECUTION TERMINATED BY SCHED ' call dump(endgrf,igraph) stop c endif do 50 j = 1,nchks parmq(j+5,next) = mychkn(j) c if (mychkn(j) .le. 0) then write(6,*) '*************SCHED USER ERROR********************' write(6,*) ' incorrect specification of dependencies ' write(6,*) ' all mychkn entries must be positive' write(6,*) ' input was ' write(6,*) ' mychkn(',j,') = ',mychkn(j) write(6,*) ' ' write(6,*) ' EXECUTION TERMINATED BY SCHED ' call dump(endgrf,igraph) stop endif c 50 continue call lockon(qlock(mxprcs)) if (endgrf .gt. nbuffr) call dump(endgrf,igraph) insrt = endgrf endgrf = endgrf + 1 call lockoff(qlock(mxprcs)) igraph(1,insrt) = 0 igraph(2,insrt) = jobtag igraph(3,insrt) = icango igraph(4,insrt) = nchks do 9001 jnsrt = 6,nchks + 5 igraph(jnsrt-1,insrt) = parmq(jnsrt,next) 9001 continue c return c c last card of dep c end End of graph.dep.f echo graph.dump.f 1>&2 cat >graph.dump.f <<'End of graph.dump.f' subroutine dump(endgrf,igraph) parameter (nslots = 30,nbuffr = 500) integer endgrf real igraph(nslots,nbuffr) integer ievent(nslots) c c this routine writes graphics output to a file c and resets endgrf to 1 c c do 100 j = 1,endgrf c write(6,15) (igraph(i,j),i = 1,nslots) c 100 continue c 15 format('dump: igraph',30f5.1) do 300 j = 1,endgrf-1 do 302 i = 1,nslots ievent(i) = igraph(i,j) 302 continue if( ievent(1) .eq. 0 ) $ write(3,301) (ievent(i),i=1,ievent(4)+4) if( ievent(1) .eq. 1 ) $ write(3,303) (ievent(i),i=1,2),igraph(3,j) if( ievent(1) .eq. 2 ) $ write(3,303) (ievent(i),i=1,2),igraph(3,j) if( ievent(1) .eq. 3 ) $ write(3,301) (ievent(i),i=1,3) if( ievent(1) .eq. 4 ) $ write(3,304) (ievent(i),i=1,3),igraph(4,j) if( ievent(1) .eq. 5 ) $ write(3,304) (ievent(i),i=1,3),igraph(4,j) 301 format(14i8) 303 format(2i8,1pe16.8) 304 format(3i8,1pe16.8) 300 continue c endgrf = 1 c return end End of graph.dump.f echo graph.gtprb.f 1>&2 cat >graph.gtprb.f <<'End of graph.gtprb.f' integer function gtprb(id,jobtag) CVD$R NOCONCUR c************************************************************************** c c this function gets unique access to the head of the readyq c pointed to by id and then claims the pointer to the next c schedulable process if there is one and returns with a nonzero c value when there is a process to schedule. if there are no entries c in the readyq indexed by id then the remaning ready ques are c polled in a round robin manner until schedulable process is found c or task done is recorded. if task done has been recorded the value c zero is returned in gtprb. if a nonzero value is returned in gtprb, c the integer jobtag will contain the identifier of the unit of c computation that is to be executed. c c input parameter c c id an integer specifying which readyq to access first c for work to do. c c output parameters c c jobtag an integer containing the next process to be executed c c gtprb the return value of this integer function is: c c 0 if task done has been posted c c nonzero if a schedulable process has been claimed. c c c*************************************************************************** parameter (mxprcs = 1000,iprcs = 120,mxces = 4,nslots = 30) parameter (nbuffr = 500) integer parmq,readyq,qlock,hrlock,trlock,phead,intspn,rhead,rtail, & done,qtail,qtlock common /qdata/ parmq(nslots,mxprcs),phead,intspn, & readyq(iprcs,mxces),rhead(mxces),rtail(mxces), & qtail common /qsync/ qlock(mxprcs),hrlock(mxces),trlock(mxces), & done,qtlock integer endgrf real igraph common /gphout/ endgrf,igraph(nslots,nbuffr) c c common block description: c c for a complete common block description see the routine libopn c c nspins = 0 fsave = second(foo) iwrkr = id 10 continue mhead = -1 call lockon(hrlock(iwrkr)) c c gain access to head of readyq. if task done has not been recorded c then increment the head of the readyq. otherwise the head pointer c is left fixed so the next active process will receive task done. c if (rhead(iwrkr) .lt. rtail(iwrkr)) then mhead = rhead(iwrkr) rhead(iwrkr) = rhead(iwrkr) + 1 endif call lockoff(hrlock(iwrkr)) if (mhead .gt. 0) then c c there was a work unit on the readyq c jobtag = readyq(mhead,iwrkr) call lockon(qlock(mxprcs)) if (endgrf .gt. nbuffr) call dump(endgrf,igraph) insrt = endgrf endgrf = endgrf + 1 call lockoff(qlock(mxprcs)) if (jobtag .ge. intspn) then igraph(1,insrt) = 4 igraph(2,insrt) = parmq(6,jobtag) igraph(3,insrt) = jobtag igraph(4,insrt) = second(foo) else igraph(1,insrt) = 1 igraph(2,insrt) = jobtag igraph(3,insrt) = second(foo) endif c if (jobtag .ne. done) then c c record the subroutine call identifier in gtprb and return c the process identifier in jobtag. c gtprb = parmq(1,jobtag) if (gtprb .gt. 1 .and. parmq(5,jobtag) .eq. 0) gtprb = -1 c else c c task done has been indicated. request a return from subroutine work c by returning the value 0 in gtprb. c gtprb = 0 c endif else c jobtag = readyq(rhead(iwrkr),iwrkr) if (jobtag .eq. done) then c c task done has been posted c gtprb = 0 c else c c there was not any work on the readyq c iwrkr = mod((iwrkr+1),mxces) if (iwrkr .eq. 0) iwrkr = mxces nspins = nspins + 1 C if (mod(nspins,mxces) .eq. 0) call nops go to 10 c endif endif return c c last card of gtprb c end End of graph.gtprb.f echo graph.ientry.f 1>&2 cat >graph.ientry.f <<'End of graph.ientry.f' integer function ientry(jobtag,nentrs) c integer jobtag c***************************************************************************** c c this routine will allow process jobtag to continue after c spawned processes have all checked in. it should only be called if c processes have been spawned by jobtag through the use of c the subroutine spawn. c c go to (1000,2000,...,N000), ientry(jobtag,N) c 1000 continue c . c . c . c do 10 j = 1,nproc c . c . (set parameters to define spawned process) c . c call nxtag(myid,jobtag) c call spawn(myid,jobtag,subname,) c 10 continue c return c 2000 continue c . c . c . c return c N000 continue c c return c end c c this subroutine returns the number of times process jobtag c has been entered. if that number is equal to the total c number nentrs of expected reentries then parmq(5,jobtag) c is set to 0 indicating no more reentries required. c c***************************************************************************** parameter (mxprcs = 1000,iprcs = 120,mxces = 4,nslots = 30) parameter (nbuffr = 500) integer parmq,readyq,qlock,hrlock,trlock,phead,intspn,rhead,rtail, & done,qtail,qtlock common /qdata/ parmq(nslots,mxprcs),phead,intspn, & readyq(iprcs,mxces),rhead(mxces),rtail(mxces), & qtail common /qsync/ qlock(mxprcs),hrlock(mxces),trlock(mxces), & done,qtlock integer endgrf real igraph common /gphout/ endgrf,igraph(nslots,nbuffr) c c report the entry point where process jobtag should resume c computation c if (nentrs .lt. 2) then write(6,*) '*************SCHED USER ERROR********************' write(6,*) ' user call to IENTRY with number of ' write(6,*) ' labels in nentrs set less than 2 ' write(6,*) ' from process ',jobtag write(6,*) ' ' write(6,*) ' EXECUTION TERMINATED BY SCHED ' call dump(endgrf,igraph) stop endif ientry = parmq(1,jobtag) if (ientry .lt. nentrs) then parmq(5,jobtag) = nentrs else parmq(5,jobtag) = 0 endif c return c c last card of ientry c end End of graph.ientry.f echo graph.libopn.f 1>&2 cat >graph.libopn.f <<'End of graph.libopn.f' subroutine libopn(nproc) integer nproc c************************************************************************ c c this routine sets locks and initializes variables c and then spawns nproc generic work routines. c c nproc is a positive integer. care should be taken to c match nproc to the number of physical processors c available. c c************************************************************************ parameter (mxprcs = 1000,iprcs = 120,mxces = 4,nslots = 30) parameter (nbuffr = 500) integer parmq,readyq,qlock,hrlock,trlock,phead,intspn,rhead,rtail, & done,qtail,qtlock common /qdata/ parmq(nslots,mxprcs),phead,intspn, & readyq(iprcs,mxces),rhead(mxces),rtail(mxces), & qtail common /qsync/ qlock(mxprcs),hrlock(mxces),trlock(mxces), & done,qtlock integer endgrf real igraph common /gphout/ endgrf,igraph(nslots,nbuffr) c external work integer ispace(mxces) integer tskary(4,10),jsave(10) c c common block description: c c common/qdata/ c c parmq is a two dimensional integer array. each column of c this array represents a schedulable process. a process is c identified by its jobtag which corresponds to a unique c column of parmq. a column of parmq has the following c entries c c parmq(1,jobtag) = nentries c a nonzero integer. if process jobtag c is on the readyq then this integer c is equal to the one plus number of times c process jobtag has been entered. c thus when work executes this process c the integer is equal to the number c of times the process has been entered. c c parmq(2,jobtag) = icango c an integer specifying the number c of processes that must check in c before this process may scheduled c (ie. be placed on the ready queue) c c parmq(3,jobtag) = nchks c an integer specifying the number c of processes that this process c must checkin to. identifiers of c these processes are recorded below. c if nchks .eq. 0 then completion of c this process signifies completion of c task. c c parmq(4,jobtag) = the number of kids spawned by this c process. if this value is zero c then this process has not spawned c any subprocesses. c c parmq(5,jobtag) = entry_flag c has the value 1 if ientry was called c has the value 0 otherwise c c parmq(6:6+nchks,jobtag) is reserved for identifiers of the c nchks c processes that must wait for completion c of this process before they can execute. c c c phead pointer to head of parmq c c intspn pointer to first spawned process. all jobtags c with values greater than or equal to intspn will c be spawned processes. c c readyq a two dimensional integer array that holds the jobtags of those c processes that are ready to execute. the k-th column of c this array serves as a readyq for the k-th work routine. c on executing gtprb, the k-th work routine will look for work c in the k-th readyq first and then the others (round robin). c if readyq(j,id) .eq. done has been set then a return from c subroutine work(*,*,id) is indicated. c c rhead an integer array. the i-th entry of rhead is a pointer to the c head of the i-th column of readyq c c rtail an integer array. the i-th entry of rtail is a pointer to the c tail of the i-th column of readyq c c c common/sync/ c c qlock is an integer array of locks. there is one lock for each c column of parmq. the purpose of this lock is to ensure c unique access to a column of parmq during the checkin operation. c c hrlock is an integer lock. the purpose of this lock is to ensure c unique access to the pointer rhead to the head of the readyq. c c trlock is an integer lock. the purpose of this lock is to ensure c unique access to the pointer rtail to the tail of the readyq. c c done is a unique non positive integer set in libopn to indicate c task done. c c common /gphout/ c c endgrf is an integer pointing to the next available c slot in igraph c c igraph is a two dimensional integer array c used as a buffer for graphics output c each column of igraph records an event. c c if (nproc .gt. mxces) then write(6,*) '*************SCHED USER ERROR********************' write(6,*) ' user asking for more physical processors' write(6,*) ' than are available on this system ' write(6,*) ' the maximum allowed is nproc = ',mxces write(6,*) ' ' write(6,*) ' EXECUTION TERMINATED BY SCHED ' call dump(endgrf,igraph) stop endif c jobtag = next done = -1 qtlock = 0 call lockasgn(qtlock) c c set qlocks off c initialize readyq(*) = -1 to set done sequence c initialize reentry indicator in parmq(5,*) c do 50 j = 1,mxces hrlock(j) = 0 call lockasgn(hrlock(j)) trlock(j) = 0 call lockasgn(trlock(j)) rhead(j) = 1 rtail(j) = 1 do 20 i = 1,iprcs readyq(i,j) = -1 20 continue 50 continue c do 100 j = 1,mxprcs qlock(j) = 0 call lockasgn(qlock(j)) parmq(5,j) = 0 100 continue c c set readyq locks off c tqlock = 0 c c initialize queue pointers c phead = 1 intspn = 1 qtail = 2 next = 1 endgrf = 1 open( file='trace.graph',unit=3) rewind 3 c c set lock on pointer to head of readyq so c no process may start until all process and data dependencies c have been specified by the user supplied routine driver. c do 150 j = 1,mxces call lockon(hrlock(j)) 150 continue c c now spawn virtual processors. these generic work routines will c assume the identity of any schedulable process specified by driver. c CVD$L CNCALL do 200 j = 1,nproc-1 tskary(1,j) = 3 tskary(3,j) = 'task'h jsave(j) = j call tskstart(tskary(1,j),work,jsave(j),ispace(j)) 200 continue c call work(nproc,ispace(nproc)) call dump(endgrf,igraph) return c c last card of libopn c end End of graph.libopn.f echo graph.nxtag.f 1>&2 cat >graph.nxtag.f <<'End of graph.nxtag.f' subroutine nxtag(mypar,jobtag) CVD$R NOCONCUR integer mypar,jobtag c************************************************************************* c c c this subroutine puts data dependencies for problem on the queue. c no synchronization c is necessary because each index of a column of parmq is associated c with a jobtag specified by the user and associated with a unique c schedulable process. the arguments of putq specify a process and are c placed in a column of jobq according to the menue specified in the c common block description given below. c c jobtag is an integer specifying a unique schedulable process c c c icango is a positive integer specifying how many processes must check in c to this process before it can be placed on the readyq. c c nchks is the number of processes that depend upon the completion of c this process. c c mchkin is an integer array specifying the jobtags of the processes c which depend upon completion of this process. c c************************************************************************* c parameter (mxprcs = 1000,iprcs = 120,mxces = 4,nslots = 30) parameter (nbuffr = 500) integer parmq,readyq,qlock,hrlock,trlock,phead,intspn,rhead,rtail, & done,qtail,qtlock common /qdata/ parmq(nslots,mxprcs),phead,intspn, & readyq(iprcs,mxces),rhead(mxces),rtail(mxces), & qtail common /qsync/ qlock(mxprcs),hrlock(mxces),trlock(mxces), & done,qtlock integer endgrf real igraph common /gphout/ endgrf,igraph(nslots,nbuffr) c c c common block description: c c for a complete common block description see the subroutine libopn c c c c place this process on the next slot in the problem queue c call lockon(qtlock) next = qtail qtail = qtail + 1 call lockoff(qtlock) c call lockon(qlock(mxprcs)) if (endgrf .gt. nbuffr) call dump(endgrf,igraph) insrt = endgrf endgrf = endgrf + 1 call lockoff(qlock(mxprcs)) igraph(1,insrt) = 3 igraph(2,insrt) = mypar igraph(3,insrt) = next c if ( next .gt. mxprcs) then write(6,*) '*************SCHED USER ERROR********************' write(6,*) ' user attempt to create too many processes' write(6,*) ' through dynamic spawning ' write(6,*) ' the maximum allowed is ',mxprcs write(6,*) ' ' write(6,*) ' EXECUTION TERMINATED BY SCHED ' call dump(endgrf,igraph) stop endif c jobtag = next parmq(1,next) = 1 parmq(2,next) = 0 parmq(3,next) = 1 parmq(6,next) = mypar c c update the icango counter of the parent process c by adding 2 to parmq(2,mypar)... prevents race condition. c add 1 to the number of kids spawned by parent mypar c call lockon(qlock(mypar)) parmq(2,mypar) = parmq(2,mypar) + 2 parmq(4,mypar) = parmq(4,mypar) + 1 call lockoff(qlock(mypar)) c c set number of kids spawned by next to zero c parmq(4,next) = 0 c c c return c c last card of nxtag c end End of graph.nxtag.f echo graph.place.f 1>&2 cat >graph.place.f <<'End of graph.place.f' subroutine place(jobtag) CVD$R NOCONCUR integer jobtag c************************************************************************* c c c this subroutine places a problem on the readyq c c jobtag is an integer specifying a unique schedulable process c c c icango is a positive integer specifying how many processes must check in c to this process before it can be placed on the readyq. c c nchks is the number of processes that depend upon the completion of c this process. c c mchkin is an integer array specifying the jobtags of the processes c which depend upon completion of this process. c c************************************************************************* c parameter (mxprcs = 1000,iprcs = 120,mxces = 4,nslots = 30) parameter (nbuffr = 500) integer parmq,readyq,qlock,hrlock,trlock,phead,intspn,rhead,rtail, & done,qtail,qtlock common /qdata/ parmq(nslots,mxprcs),phead,intspn, & readyq(iprcs,mxces),rhead(mxces),rtail(mxces), & qtail common /qsync/ qlock(mxprcs),hrlock(mxces),trlock(mxces), & done,qtlock integer endgrf real igraph common /gphout/ endgrf,igraph(nslots,nbuffr) c c c common block description: c c for a complete common block description see the subroutine libopn c c c c place this process on readyq if icango is 0 c when icango .eq. 0 this process does not depend on any c others. c icango = parmq(2,jobtag) iwrkr = mod(jobtag,mxces) + 1 if (icango .eq. 0 ) then call lockon(trlock(iwrkr)) readyq(rtail(iwrkr),iwrkr) = jobtag rtail(iwrkr) = rtail(iwrkr) + 1 call lockoff(trlock(iwrkr)) endif c c last card of place c return end End of graph.place.f echo graph.start2.f 1>&2 cat >graph.start2.f <<'End of graph.start2.f' subroutine start2 c c this routine allows parallel processing to start after user supplied c driver has completed by unlocking the head of the readyq c parameter (mxprcs = 1000,iprcs = 120,mxces = 4,nslots = 30) parameter (nbuffr = 500) integer parmq,readyq,qlock,hrlock,trlock,phead,intspn,rhead,rtail, & done,qtail,qtlock common /qdata/ parmq(nslots,mxprcs),phead,intspn, & readyq(iprcs,mxces),rhead(mxces),rtail(mxces), & qtail common /qsync/ qlock(mxprcs),hrlock(mxces),trlock(mxces), & done,qtlock logical nostrt integer endgrf real igraph common /gphout/ endgrf,igraph(nslots,nbuffr) c c c for common block description see subroutine libopn. c if (done .ne. 0) then write(6,*) '*************SCHED USER ERROR********************' if (done .eq. -1 ) then write(6,*) ' no process has set nchks equal to 0 ' else write(6,*) ' more than one process has set nchks to 0 ' endif write(6,*) ' SCHEDULE will not be able to terminate job' write(6,*) ' correctly ' write(6,*) ' ' write(6,*) ' check subroutine passed to initial call to' write(6,*) ' to see that at exactly one call to DEP has ' write(6,*) ' set nchks = 0 ' write(6,*) ' ' write(6,*) ' EXECUTION TERMINATED BY SCHED ' call dump(endgrf,igraph) stop endif c nostrt = .true. do 100 iwrkr = 1,mxces if (rhead(iwrkr) .ne. rtail(iwrkr)) nostrt = .false. 100 continue if (nostrt) then write(6,*) '*************SCHED USER ERROR********************' write(6,*) ' no process had an intitial icango of 0 ' write(6,*) ' SCHEDULE could not begin ' write(6,*) ' ' write(6,*) ' check subroutine passed to initial call to' write(6,*) ' to see that at least one call to DEP has ' write(6,*) ' set icango = 0 ' write(6,*) ' ' write(6,*) ' EXECUTION TERMINATED BY SCHED ' call dump(endgrf,igraph) stop endif intspn = qtail c do 200 iwrkr = 1,mxces call lockoff(hrlock(iwrkr)) 200 continue c return c c last card of start2 c end End of graph.start2.f echo graph.wait.f 1>&2 cat >graph.wait.f <<'End of graph.wait.f' logical function wait(jobtag,ienter) c integer jobtag,ienter c***************************************************************************** c c this routine will allow process jobtag to continue after c spawned processes have all checked in. it should only be called if c processes have been spawned by jobtag through the use of c the subroutine spawn. this routine must be used in conjunction with c subroutine prtspn. the required syntax is c c go to (1000,...,L000,...,N000), ientry(mytag,N) c 1000 continue c . c . c . c do 100 j = 1,nproc c . c . (set parameters to define spawned process) c . c call nxtag(mytag,jobtag) c call spawn(mytag,jobtag,subname,) c 100 continue c label = L c if (wait(jobtag,label)) return c L000 continue c . c . c . c c if this subroutine returns a value of .true. then the calling process c jobtag should issue a return. if a value of .false. is returned then c the calling process jobtag should resume execution at the c statement immediately following the reference to wait (ie. at L000 in c the example above. a return value .true. indicates that some spawned c process has not yet completed and checked in. a return value .false. c indicates all spawned processes have checked in. c c***************************************************************************** parameter (mxprcs = 1000,iprcs = 120,mxces = 4,nslots = 30) parameter (nbuffr = 500) integer parmq,readyq,qlock,hrlock,trlock,phead,intspn,rhead,rtail, & done,qtail,qtlock common /qdata/ parmq(nslots,mxprcs),phead,intspn, & readyq(iprcs,mxces),rhead(mxces),rtail(mxces), & qtail common /qsync/ qlock(mxprcs),hrlock(mxces),trlock(mxces), & done,qtlock c c c check the icango counter to see if all spawned processes (kids) c have checked in. c icango = 1 call lockon(qlock(jobtag)) icango = parmq(2,jobtag) - parmq(4,jobtag) call lockoff(qlock(jobtag)) c if (icango .eq. 0) then c c all kids are done ... dont wait (ie return false) c wait = .false. c c record re_entry label where computation is to c resume after wait is complete c parmq(1,jobtag) = ienter c if (ienter .gt. parmq(5,jobtag)) then write(6,*) '*************SCHED USER ERROR*****************' write(6,*) ' executing SCHEDULE function WAIT ' write(6,*) ' return label larger than the maximum ' write(6,*) ' specified by user in call to ientry ' write(6,*) ' from process jobtag ',jobtag write(6,*) ' ' write(6,*) ' the maximum reentry number is ' write(6,*) ' ', parmq(5,jobtag) write(6,*) ' ' write(6,*) ' EXECUTION TERMINATED BY SCHED ' call dump(endgrf,igraph) stop endif c c set last re_entry indication (parmq(5,jobtag) = 0) c if this reentry point corresponds to last one c (recorded in parmq(5,jobtag) during call to ientry) c if (ienter .eq. parmq(5,jobtag)) parmq(5,jobtag) = 0 c else c c kids are not done c wait = .true. c c a checkin will be made so set the number of c entries to return label ienter - 1 to get c correct entry point after checkin c parmq(1,jobtag) = ienter - 1 c endif c return c c last card of wait c end End of graph.wait.f echo gtprb.f 1>&2 cat >gtprb.f <<'End of gtprb.f' integer function gtprb(id,jobtag) CVD$R NOCONCUR c************************************************************************** c c this function gets unique access to the head of the readyq c pointed to by id and then claims the pointer to the next c schedulable process if there is one and returns with a nonzero c value when there is a process to schedule. if there are no entries c in the readyq indexed by id then the remaning ready ques are c polled in a round robin manner until schedulable process is found c or task done is recorded. if task done has been recorded the value c zero is returned in gtprb. if a nonzero value is returned in gtprb, c the integer jobtag will contain the identifier of the unit of c computation that is to be executed. c c input parameter c c id an integer specifying which readyq to access first c for work to do. c c output parameters c c jobtag an integer containing the next process to be executed c c gtprb the return value of this integer function is: c c 0 if task done has been posted c c nonzero if a schedulable process has been claimed. c c c*************************************************************************** parameter (mxprcs = 1000,iprcs = 120,mxces = 4,nslots = 30) parameter (nbuffr = 500) integer parmq,readyq,qlock,hrlock,trlock,phead,intspn,rhead,rtail, & done,qtail,qtlock common /qdata/ parmq(nslots,mxprcs),phead,intspn, & readyq(iprcs,mxces),rhead(mxces),rtail(mxces), & qtail common /qsync/ qlock(mxprcs),hrlock(mxces),trlock(mxces), & done,qtlock cgraph integer endgrf cgraph real igraph cgraph common /gphout/ endgrf,igraph(nslots,nbuffr) c c common block description: c c for a complete common block description see the routine libopn c c nspins = 0 fsave = second(foo) iwrkr = id 10 continue mhead = -1 call lockon(hrlock(iwrkr)) c c gain access to head of readyq. if task done has not been recorded c then increment the head of the readyq. otherwise the head pointer c is left fixed so the next active process will receive task done. c if (rhead(iwrkr) .lt. rtail(iwrkr)) then mhead = rhead(iwrkr) rhead(iwrkr) = rhead(iwrkr) + 1 endif call lockoff(hrlock(iwrkr)) if (mhead .gt. 0) then c c there was a work unit on the readyq c jobtag = readyq(mhead,iwrkr) cgraph call lockon(qlock(mxprcs)) cgraph if (endgrf .gt. nbuffr) call dump(endgrf,igraph) cgraph insrt = endgrf cgraph endgrf = endgrf + 1 cgraph call lockoff(qlock(mxprcs)) cgraph if (jobtag .ge. intspn) then cgraph igraph(1,insrt) = 4 cgraph igraph(2,insrt) = parmq(6,jobtag) cgraph igraph(3,insrt) = jobtag cgraph igraph(4,insrt) = second(foo) cgraph else cgraph igraph(1,insrt) = 1 cgraph igraph(2,insrt) = jobtag cgraph igraph(3,insrt) = second(foo) cgraph endif c if (jobtag .ne. done) then c c record the subroutine call identifier in gtprb and return c the process identifier in jobtag. c gtprb = parmq(1,jobtag) if (gtprb .gt. 1 .and. parmq(5,jobtag) .eq. 0) gtprb = -1 c else c c task done has been indicated. request a return from subroutine work c by returning the value 0 in gtprb. c gtprb = 0 c endif else c jobtag = readyq(rhead(iwrkr),iwrkr) if (jobtag .eq. done) then c c task done has been posted c gtprb = 0 c else c c there was not any work on the readyq c iwrkr = mod((iwrkr+1),mxces) if (iwrkr .eq. 0) iwrkr = mxces nspins = nspins + 1 C if (mod(nspins,mxces) .eq. 0) call nops go to 10 c endif endif return c c last card of gtprb c end End of gtprb.f echo ientry.f 1>&2 cat >ientry.f <<'End of ientry.f' integer function ientry(jobtag,nentrs) c integer jobtag c***************************************************************************** c c this routine will allow process jobtag to continue after c spawned processes have all checked in. it should only be called if c processes have been spawned by jobtag through the use of c the subroutine spawn. c c go to (1000,2000,...,N000), ientry(jobtag,N) c 1000 continue c . c . c . c do 10 j = 1,nproc c . c . (set parameters to define spawned process) c . c call nxtag(myid,jobtag) c call spawn(myid,jobtag,subname,) c 10 continue c return c 2000 continue c . c . c . c return c N000 continue c c return c end c c this subroutine returns the number of times process jobtag c has been entered. if that number is equal to the total c number nentrs of expected reentries then parmq(5,jobtag) c is set to 0 indicating no more reentries required. c c***************************************************************************** parameter (mxprcs = 1000,iprcs = 120,mxces = 4,nslots = 30) parameter (nbuffr = 500) integer parmq,readyq,qlock,hrlock,trlock,phead,intspn,rhead,rtail, & done,qtail,qtlock common /qdata/ parmq(nslots,mxprcs),phead,intspn, & readyq(iprcs,mxces),rhead(mxces),rtail(mxces), & qtail common /qsync/ qlock(mxprcs),hrlock(mxces),trlock(mxces), & done,qtlock cgraph integer endgrf cgraph real igraph cgraph common /gphout/ endgrf,igraph(nslots,nbuffr) c c report the entry point where process jobtag should resume c computation c if (nentrs .lt. 2) then write(6,*) '*************SCHED USER ERROR********************' write(6,*) ' user call to IENTRY with number of ' write(6,*) ' labels in nentrs set less than 2 ' write(6,*) ' from process ',jobtag write(6,*) ' ' write(6,*) ' EXECUTION TERMINATED BY SCHED ' cgraph call dump(endgrf,igraph) stop endif ientry = parmq(1,jobtag) if (ientry .lt. nentrs) then parmq(5,jobtag) = nentrs else parmq(5,jobtag) = 0 endif c return c c last card of ientry c end End of ientry.f echo libopn.f 1>&2 cat >libopn.f <<'End of libopn.f' subroutine libopn(nproc) integer nproc c************************************************************************ c c this routine sets locks and initializes variables c and then spawns nproc generic work routines. c c nproc is a positive integer. care should be taken to c match nproc to the number of physical processors c available. c c************************************************************************ parameter (mxprcs = 1000,iprcs = 120,mxces = 4,nslots = 30) parameter (nbuffr = 500) integer parmq,readyq,qlock,hrlock,trlock,phead,intspn,rhead,rtail, & done,qtail,qtlock common /qdata/ parmq(nslots,mxprcs),phead,intspn, & readyq(iprcs,mxces),rhead(mxces),rtail(mxces), & qtail common /qsync/ qlock(mxprcs),hrlock(mxces),trlock(mxces), & done,qtlock cgraph integer endgrf cgraph real igraph cgraph common /gphout/ endgrf,igraph(nslots,nbuffr) c external work integer ispace(mxces) integer tskary(4,10),jsave(10) c c common block description: c c common/qdata/ c c parmq is a two dimensional integer array. each column of c this array represents a schedulable process. a process is c identified by its jobtag which corresponds to a unique c column of parmq. a column of parmq has the following c entries c c parmq(1,jobtag) = nentries c a nonzero integer. if process jobtag c is on the readyq then this integer c is equal to the one plus number of times c process jobtag has been entered. c thus when work executes this process c the integer is equal to the number c of times the process has been entered. c c parmq(2,jobtag) = icango c an integer specifying the number c of processes that must check in c before this process may scheduled c (ie. be placed on the ready queue) c c parmq(3,jobtag) = nchks c an integer specifying the number c of processes that this process c must checkin to. identifiers of c these processes are recorded below. c if nchks .eq. 0 then completion of c this process signifies completion of c task. c c parmq(4,jobtag) = the number of kids spawned by this c process. if this value is zero c then this process has not spawned c any subprocesses. c c parmq(5,jobtag) = entry_flag c has the value 1 if ientry was called c has the value 0 otherwise c c parmq(6:6+nchks,jobtag) is reserved for identifiers of the c nchks c processes that must wait for completion c of this process before they can execute. c c c phead pointer to head of parmq c c intspn pointer to first spawned process. all jobtags c with values greater than or equal to intspn will c be spawned processes. c c readyq a two dimensional integer array that holds the jobtags of those c processes that are ready to execute. the k-th column of c this array serves as a readyq for the k-th work routine. c on executing gtprb, the k-th work routine will look for work c in the k-th readyq first and then the others (round robin). c if readyq(j,id) .eq. done has been set then a return from c subroutine work(*,*,id) is indicated. c c rhead an integer array. the i-th entry of rhead is a pointer to the c head of the i-th column of readyq c c rtail an integer array. the i-th entry of rtail is a pointer to the c tail of the i-th column of readyq c c c common/sync/ c c qlock is an integer array of locks. there is one lock for each c column of parmq. the purpose of this lock is to ensure c unique access to a column of parmq during the checkin operation. c c hrlock is an integer lock. the purpose of this lock is to ensure c unique access to the pointer rhead to the head of the readyq. c c trlock is an integer lock. the purpose of this lock is to ensure c unique access to the pointer rtail to the tail of the readyq. c c done is a unique non positive integer set in libopn to indicate c task done. c c common /gphout/ c c endgrf is an integer pointing to the next available c slot in igraph c c igraph is a two dimensional integer array c used as a buffer for graphics output c each column of igraph records an event. c c if (nproc .gt. mxces) then write(6,*) '*************SCHED USER ERROR********************' write(6,*) ' user asking for more physical processors' write(6,*) ' than are available on this system ' write(6,*) ' the maximum allowed is nproc = ',mxces write(6,*) ' ' write(6,*) ' EXECUTION TERMINATED BY SCHED ' cgraph call dump(endgrf,igraph) stop endif c jobtag = next done = -1 qtlock = 0 call lockasgn(qtlock) c c set qlocks off c initialize readyq(*) = -1 to set done sequence c initialize reentry indicator in parmq(5,*) c do 50 j = 1,mxces hrlock(j) = 0 call lockasgn(hrlock(j)) trlock(j) = 0 call lockasgn(trlock(j)) rhead(j) = 1 rtail(j) = 1 do 20 i = 1,iprcs readyq(i,j) = -1 20 continue 50 continue c do 100 j = 1,mxprcs qlock(j) = 0 call lockasgn(qlock(j)) parmq(5,j) = 0 100 continue c c set readyq locks off c tqlock = 0 c c initialize queue pointers c phead = 1 intspn = 1 qtail = 2 next = 1 endgrf = 1 cgraph open( file='trace.graph',unit=3) cgraph rewind 3 c c set lock on pointer to head of readyq so c no process may start until all process and data dependencies c have been specified by the user supplied routine driver. c do 150 j = 1,mxces call lockon(hrlock(j)) 150 continue c c now spawn virtual processors. these generic work routines will c assume the identity of any schedulable process specified by driver. c CVD$L CNCALL do 200 j = 1,nproc-1 tskary(1,j) = 3 tskary(3,j) = 'task'h jsave(j) = j call tskstart(tskary(1,j),work,jsave(j),ispace(j)) 200 continue c call work(nproc,ispace(nproc)) cgraph call dump(endgrf,igraph) return c c last card of libopn c end End of libopn.f echo maindp.f 1>&2 cat >maindp.f <<'End of maindp.f' real dd(500),ee(500),qq(500,500) real esave(500),dsave(500) real s,s2,t,t2,tnorm,tnorm2,res,res2 integer icase c real d(500),e(500),q(500,500) integer n,ldq common /prbdef/n,ldq,q,d,e common/prfpms/ksect,kgran real enorm,dummy external treeql c ldq = 500 c write(6,*) ' input nproc ... the number of processors ' read(5,*) nproc do 9999 n = 100,100 ksect = n/10 kgran = n/20 write(6,*) 'nprocs = ',nproc write(6,*) 'ksect = ',ksect, ' kgran = ',kgran write(6,*)'==============================' write(6,*)' n = ',n do 9998 icase = 1,1 write(6,*)'++++++++++++++++++++++++++++++' if( icase .eq. 1 ) write(6,*)' twos on diagonal' if( icase .eq. 2 ) write(6,*)' random numbers on diagonal' if( icase .eq. 3 ) write(6,*)' glued wilks eps = 1.d-8 ' if( icase .eq. 4 ) write(6,*)' glued wilks eps = 1.d-14 ' nsect = 2**ksect nd2 = n/2 go to (112,113,114,115), icase 112 do 13 i = 1,n d(i) = 2.0 e(i) = 1 13 continue go to 445 113 do 14 i = 1,n d(i) = rand(foo) e(i) = rand(foo) 14 continue go to 445 114 continue eps = 1.e-8 do 151 inum = 1,num do 15 i = 1,21 d((inum-1)*21+i) = iabs(11-i) e((inum-1)*21+i) = 1 15 continue 151 continue go to 444 115 continue eps = 1.e-14 do 161 inum = 1,num do 16 i = 1,n d((inum-1)*21+i) = iabs(11-i) e((inum-1)*21+i) = 1 16 continue 161 continue 444 continue do 443 inum = 1,num e(inum*21+1) = eps 443 continue 445 continue do 10 j = 1,n do 5 i = 1,n q(i,j) = 0.0 qq(i,j) = 0.0 5 continue q(j,j) = 1.0 qq(j,j) = 1.0 dd(j) = d(j) ee(j) = e(j) dsave(j) = d(j) esave(j) = e(j) 10 continue c if (n .eq. 50) go to 9999 t1 = second(gtime) call tql2(ldq,n,dd,ee,qq,ierr) t2t = second(gtime) - t1 write(6,*) ' time for tql ',t2t c c the test problem has been defined now comes the numerical c refinements that will avoid cancellation c t1 = second(gtime) c call sched(nproc,treeql,n,ldq,q,d,e,ifail) c t2 = second(gtime) - t1 write(6,*) ' time for sesupd ',t2 if( ifail .gt. 1 ) write(6,*)' deflate from sesupd',ifail write(6,*)' ratio of tql2/new',t2t/t2 tnorm = 0.0d0 tnorm2 = 0.0d0 res = 0.0d0 res2 = 0.0d0 do 530 j = 1,n e(1) = dsave(1)*q(1,j) + $ esave(2)*q(2,j) - d(j)*q(1,j) ee(1) = dsave(1)*qq(1,j) + $ esave(2)*qq(2,j) - dd(j)*qq(1,j) do 400 i = 2,n-1 e(i) = esave(i)*q(i-1,j) + dsave(i)*q(i,j) + $ esave(i+1)*q(i+1,j) - d(j)*q(i,j) ee(i) = esave(i)*qq(i-1,j) + dsave(i)*qq(i,j) + $ esave(i+1)*qq(i+1,j) - dd(j)*qq(i,j) 400 continue e(n) = esave(n)*q(n-1,j) + dsave(n)*q(n,j) $ - d(j)*q(n,j) ee(i) = esave(n)*qq(n-1,j) + dsave(n)*qq(n,j) $ - dd(j)*qq(n,j) t = enorm(n,e) t2 = enorm(n,ee) res = max(res,t) res2 = max(res2,t2) if (t .gt. 1.0d-10) then write(6,*)' j ',j, ' d ',d(j),' er ',t,' dd ',dd(j),' eer ',t2 write(6,*) ' ev ',q(1,j),' eev ',qq(1,j) endif do 520 i = 1,n t = 0.0 s = 0.0 t2 = 0.0 s2 = 0.0 do 510 k = 1,n s2 = s2 + qq(k,i)*qq(k,j) s = s + q(k,i)*q(k,j) 510 continue if (i .eq. j) s2 = s2 - 1.0 if (i .eq. j) s = s - 1.0 t2 = abs(s2) t = abs(s) tnorm2 = max(tnorm2,t2) tnorm = max(tnorm,t) c if (t .gt. 1.0d-13) write(6,*) ' ij ',i,j,' tnorm ',tnorm 520 continue 530 continue write(6,*)' the residual for the tql values and vectors',res2 write(6,*)' the residual for the updated values and vectors',res write(6,*)' the tql norm of q*q sup t is',tnorm2 write(6,*)' the upd norm of q*q sup t is',tnorm write (6,*) ' spectrum [ ',d(1) ,',',d(n),']' 9998 continue 9999 continue stop end End of maindp.f echo make1 1>&2 cat >make1 <<'End of make1' SHELL = /bin/sh FILES = putq.o chekin.o dep.o dump.o gtprb.o ientry.o \ libopn.o nxtag.o place.o start2.o wait.o FILES2 = putq.o graph.chekin.o graph.dep.o graph.dump.o \ graph.gtprb.o graph.ientry.o \ graph.libopn.o graph.nxtag.o graph.place.o graph.start2.o graph.wait.o lib : $(FILES) ar r sched.a $(FILES) graph : $(FILES2) ar r graph.a $(FILES2) .f.o : ; cft77 -a stack $*.f .c.o : ; cc -c $*.c End of make1 echo makefile 1>&2 cat >makefile <<'End of makefile' SHELL = /bin/sh FILES = stuffspawn.o xtest : $(FILES) ldr -o xtest $(FILES) -L /har/cssjdg/TEST/sched.a /usr/lib/libnet.a -lmt gtest : $(FILES) ldr -o gtest $(FILES) -L /har/cssjdg/TEST/graph.a /usr/lib/libnet.a -lmt FILES2 = d_and_c.o xdandc : $(FILES2) ldr -o xdandc $(FILES2) -L /har/cssjdg/TEST/sched.a /usr/lib/libnet.a -lmt gdandc : $(FILES2) ldr -o gdandc $(FILES2) -L /har/cssjdg/TEST/graph.a /usr/lib/libnet.a -lmt FILES3 = maindp.o newevdp0.o stateig.o statses.o xeig : $(FILES3) ldr -o xeig $(FILES3) -L /har/cssjdg/TEST/sched.a /usr/lib/libnet.a -lmt geig : $(FILES3) ldr -o geig $(FILES3) -L /har/cssjdg/TEST/graph.a /usr/lib/libnet.a -lmt FILES4 = maindp.o newevdp0.o stateig.o stseswait.o xwait : $(FILES4) ldr -o xwait $(FILES4) -L /har/cssjdg/TEST/sched.a /usr/lib/libnet.a -lmt gwait : $(FILES4) ldr -o gwait $(FILES4) -L /har/cssjdg/TEST/graph.a /usr/lib/libnet.a -lmt FILES5 = to.harwell.o xtest5 : $(FILES5) ldr -o xtest5 $(FILES5) -L /har/cssjdg/TEST/sched.a /usr/lib/libnet.a -lmt gtest5 : $(FILES5) ldr -o gtest5 $(FILES5) -L /har/cssjdg/TEST/graph.a /usr/lib/libnet.a -lmt sched: make -f make1 lib graph: make -f make1 graph .f.o : ; cft77 -a stack $*.f .c.o : ; cc -c $*.c End of makefile echo newevdp0.f 1>&2 cat >newevdp0.f <<'End of newevdp0.f' subroutine evupd(n,i,d,z,delta,rho,dlam,ifail) implicit real (a-h,o-z) CVD$G NOCONCUR c*********************************************************************** c this subroutine computes the updated eigenvalues of a c rank one modification to a symmetric matrix. it is assumed c that the eigenvalues are in the array d, and that c c d(i) .ne. d(j) for i .ne. j c c it is also assumed that the eigenvalues are in increasing c order and that the value of rho is positive. this is c arranged by the calling subroutine sesupd, and is no loss c in generality. c it is also assumed that the values in the array z are c the squares of the components of the updatingvector. c c c the method consists of approximating the rational functions c c rho = sum(z(j)/((d(j)-d(i))/rho - lamda): j = i+1,n) c c phi =sum(z(j)/(d(j)-d(i))/rho - lamda): j = 1,i) c c by simple interpolating rational functions. this avoids c the need for safeguarding by bisection since c the convergence is monotone, and quadratic from any starting c point that is greater than zero but less than the solution c c c input variables... c n the length of all arrays c c i the i - th eigenvalue is computed c c d the original eigenvalues. it is assumed that they are c in order, d(i) .lt. d(j) for i .lt. j. c c z this array of length n contains the squares of c of the components of the updating vector c c delta this array of length n contains (d(j) - lamda(i))/rho c in its j - th component. these values will be used c to update the eigenvectors in sesupd. c c rho this is the scalar in the the symmetric updating c formula. c c dlam this scalar will contain the value of the i - th c updated eigenvalue on return c c ifail this integer variable indicates failure of the c updating process with value 1, and success c with value 0. c c c c************************************************************************** integer i,n,im1,ip1,ip2,niter dimension d(n),z(n), delta(n) real d,z,rho,delta,zero,one,two,tsave, 1 del,phi,dphi,psi,dpsi,lambda,oldlam, 2 t,temp,a,b,d1,w,eps,eps1,eta,dlam,dmax real epslon,enorm zero = 0.0e0 one = 1.0e0 two = 2.0e0 c c eps is machine precision c eta is the relative accuracy requirement on the roots. c c these values should be adjusted for the particular machine. c eps = epslon(1.0) eta = eps*8. c im1 = i - 1 ip1 = i + 1 ip2 = i + 2 niter = 1 lambda = zero oldlam = zero del = d(i) do 100 j = 1,n delta(j) = (d(j) - del)/rho 100 continue dmax = max(abs(d(1)), abs(d(n))) dmax = abs(rho) + dmax c c calculate initial guess c if (i .lt. n) * then del = d(ip1) else del = d(n) + rho endif a = zero do 200 j = 1,im1 a = a + rho*z(j)/(d(j) - del) 200 continue b = zero do 220 j = ip2,n b = b + rho*z(j)/(d(j) - del) 220 continue a = a + (b + one) if (ip1 .gt. n) * then t = z(i)/abs(a) else t = a*delta(ip1) b = t + z(i) + z(ip1) if (b .ge. zero) * then t = two*z(i)*delta(ip1)/ * (b + sqrt(abs(b*b - 4.0e0*t*z(i)))) else t = (b - sqrt(b*b - 4.0e0*t*z(i)))/(two*a) endif t = t/two endif c c test to see that the initial guess is not too close to endpoint c if (ip1 .le. n .and. t .ge. .9*delta(ip1)) * t = .9*delta(ip1) c c update the values of the array delta c 250 continue tsave = abs(t) do 300 j = 1,n delta(j) = delta(j) - t 300 continue lambda = lambda + t dlam = d(i) + rho*lambda c c evaluate psi and the derivative dpsi c dpsi = zero psi = zero do 400 j = 1,i t = z(j)/delta(j) psi = psi + t dpsi = dpsi + t/delta(j) 400 continue c c evaluate phi and the derivative dphi c dphi = zero phi = zero if (i .eq. n) go to 600 do 500 j = ip1,n t = z(j)/delta(j) phi = phi + t dphi = dphi + t/delta(j) 500 continue c c test for convergence c return if the test is satisfied c 600 continue w = one + phi + psi eps1 = eps*dmax*sqrt(dphi + dpsi) if ((abs(w) .le. eps1) .and. (abs(lambda - oldlam) .le. 1 eta*oldlam) ) then return endif c c return with ifail = -1 if convergence has not ocurred c within 45 iterations. c if (niter .lt. 45) go to 650 ifail = -1 return 650 continue niter = niter + 1 oldlam = lambda c c calculate the new step c if (i .ne. n) go to 700 c c otherwise c calculate the step for the special case i = n c t = (w*psi)/dpsi go to 250 700 continue del = delta(ip1) temp = psi/dpsi d1 = one + phi - del*dphi a = (del*(one + phi) + psi*temp)/d1 + temp b = (two*temp*del*w)/d1 t = sqrt(abs(a*a - two*b)) t = b/(a + t) if (t .lt. -eps) t = -tsave/two go to 250 c c last card of evupd c end subroutine deflat(j,jlam,jdel,indx,n,ldq,q,d,z,tol1) implicit real (a-h,o-z) CVD$G NOCONCUR c c c rotate z components to 0 c assumes norm(z) .eq. 1 c integer indx(*),jdel,j,jlam,n,ldq real q(ldq,*),d(*),z(*),gamma,sigma,tau,t1,t2, * tol,tol1 c tol = tol1 gamma = z(jlam) sigma = z(j) tau = pythag(gamma,sigma) jdel = -1 if (tau .gt. 0.0e0) * then delta = d(j) - d(jlam) gamma = gamma/tau sigma = sigma/tau if (abs(delta*gamma*sigma) .le. tol) * then if (abs(gamma) .le. abs(sigma)) * then z(j) = tau z(jlam) = 0.0e0 jjlam = indx(jlam) jj = indx(j) do 100 i = 1,n t1 = q(i,jjlam) t2 = q(i,jj) q(i,jjlam) = -t1*sigma + t2*gamma q(i,jj) = t1*gamma + t2*sigma 100 continue temp = d(jlam)*sigma**2 + d(j)*gamma**2 d(j) = d(jlam)*gamma**2 + d(j)*sigma**2 d(jlam) = temp jdel = jlam jlam = j else z(jlam) = tau z(j) = 0.0e0 jjlam = indx(jlam) jj = indx(j) do 200 i = 1,n t1 = q(i,jjlam) t2 = q(i,jj) q(i,jjlam) = t1*gamma + t2*sigma q(i,jj) = -t1*sigma + t2*gamma 200 continue temp = d(jlam)*gamma**2 + d(j)*sigma**2 d(j) = d(jlam)*sigma**2 + d(j)*gamma**2 d(jlam) = temp jdel = j endif endif endif c return c c last card of deflat end function rand(foo) CVD$G NOCONCUR data init/1365/ init = mod(3125*init,65536) rand = (init - 32768.)/(2.**15) return end real function enorm(n,x) CVD$G NOCONCUR integer n real x(n),temp,tmax tmax = 0.0e0 do 100 j = 1,n temp = max(tmax,abs(x(j))) if (temp .gt. tmax) tmax = temp 100 continue if (tmax .eq. 0.0e0) then enorm = tmax else temp = 0.0e0 do 200 j = 1,n temp = temp + (x(j)/tmax)**2 200 continue enorm = tmax*sqrt(temp) endif return end subroutine tql2(nm,n,d,e,z,ierr) implicit real (a-h,o-z) CVD$G NOCONCUR c integer i,j,k,l,m,n,ii,l1,l2,nm,mml,ierr real d(n),e(n),z(nm,n) real c,c2,c3,dl1,el1,f,g,h,p,r,s,s2,tst1,tst2,pythag c c this subroutine is a translation of the algol procedure tql2, c num. math. 11, 293-306(1968) by bowdler, martin, reinsch, and c wilkinson. c handbook for auto. comp., vol.ii-linear algebra, 227-240(1971). c c this subroutine finds the eigenvalues and eigenvectors c of a symmetric tridiagonal matrix by the ql method. c the eigenvectors of a full symmetric matrix can also c be found if tred2 has been used to reduce this c full matrix to tridiagonal form. c c on input c c nm must be set to the row dimension of two-dimensional c array parameters as declared in the calling program c dimension statement. c c n is the order of the matrix. c c d contains the diagonal elements of the input matrix. c c e contains the subdiagonal elements of the input matrix c in its last n-1 positions. e(1) is arbitrary. c c z contains the transformation matrix produced in the c reduction by tred2, if performed. if the eigenvectors c of the tridiagonal matrix are desired, z must contain c the identity matrix. c c on output c c d contains the eigenvalues in ascending order. if an c error exit is made, the eigenvalues are correct but c unordered for indices 1,2,...,ierr-1. c c e has been destroyed. c c z contains orthonormal eigenvectors of the symmetric c tridiagonal (or full) matrix. if an error exit is made, c z contains the eigenvectors associated with the stored c eigenvalues. c c ierr is set to c zero for normal return, c j if the j-th eigenvalue has not been c determined after 30 iterations. c c calls pythag for sqrt(a*a + b*b) . c c questions and comments should be directed to burton s. garbow, c mathematics and computer science div, argonne national laboratory c c this version dated august 1983. c c ------------------------------------------------------------------ c ierr = 0 if (n .eq. 1) go to 1001 c do 100 i = 2, n 100 e(i-1) = e(i) c f = 0.0e0 tst1 = 0.0e0 e(n) = 0.0e0 c do 240 l = 1, n j = 0 h = abs(d(l)) + abs(e(l)) if (tst1 .lt. h) tst1 = h c .......... look for small sub-diagonal element .......... do 110 m = l, n tst2 = tst1 + abs(e(m)) if (tst2 .eq. tst1) go to 120 c .......... e(n) is always zero, so there is no exit c through the bottom of the loop .......... 110 continue c 120 if (m .eq. l) go to 220 130 if (j .eq. 30) go to 1000 j = j + 1 c .......... form shift .......... l1 = l + 1 l2 = l1 + 1 g = d(l) p = (d(l1) - g) / (2.0e0 * e(l)) r = pythag(p,1.0e0) d(l) = e(l) / (p + sign(r,p)) d(l1) = e(l) * (p + sign(r,p)) dl1 = d(l1) h = g - d(l) if (l2 .gt. n) go to 145 c do 140 i = l2, n 140 d(i) = d(i) - h c 145 f = f + h c .......... ql transformation .......... p = d(m) c = 1.0e0 c2 = c el1 = e(l1) s = 0.0e0 mml = m - l c .......... for i=m-1 step -1 until l do -- .......... do 200 ii = 1, mml c3 = c2 c2 = c s2 = s i = m - ii g = c * e(i) h = c * p r = pythag(p,e(i)) e(i+1) = s * r s = e(i) / r c = p / r p = c * d(i) - s * g d(i+1) = h + s * (c * g + s * d(i)) c .......... form vector .......... do 180 k = 1, n h = z(k,i+1) z(k,i+1) = s * z(k,i) + c * h z(k,i) = c * z(k,i) - s * h 180 continue c 200 continue c p = -s * s2 * c3 * el1 * e(l) / dl1 e(l) = s * p d(l) = c * p tst2 = tst1 + abs(e(l)) if (tst2 .gt. tst1) go to 130 220 d(l) = d(l) + f 240 continue c .......... order eigenvalues and eigenvectors .......... do 300 ii = 2, n i = ii - 1 k = i p = d(i) c do 260 j = ii, n if (d(j) .ge. p) go to 260 k = j p = d(j) 260 continue c if (k .eq. i) go to 300 d(k) = d(i) d(i) = p c do 280 j = 1, n p = z(j,i) z(j,i) = z(j,k) z(j,k) = p 280 continue c 300 continue c go to 1001 c .......... set error -- no convergence to an c eigenvalue after 30 iterations .......... 1000 ierr = l 1001 return end real function pythag(a,b) real a,b c c finds sqrt(a**2+b**2) without overflow or destructive underflow c real p,r,s,t,u p = max(abs(a),abs(b)) if (p .eq. 0.0e0) go to 20 r = (min(abs(a),abs(b))/p)**2 10 continue t = 4.0e0 + r if (t .eq. 4.0e0) go to 20 s = r/t u = 1.0e0 + 2.0e0*s p = u*p r = (s/u)**2 * r go to 10 20 pythag = p return end real function epslon (x) real x c c estimate unit roundoff in quantities of size x. c real a,b,c,eps c c this program should function properly on all systems c satisfying the following two assumptions, c 1. the base used in representing floating point c numbers is not a power of three. c 2. the quantity a in statement 10 is represented to c the accuracy used in floating point variables c that are stored in memory. c the statement number 10 and the go to 10 are intended to c force optimizing compilers to generate code satisfying c assumption 2. c under these assumptions, it should be true that, c a is not exactly equal to four-thirds, c b has a zero for its last bit or digit, c c is not exactly equal to one, c eps measures the separation of 1.0 from c the next larger floating point number. c the developers of eispack would appreciate being informed c about any systems where these assumptions do not hold. c c this version dated 4/6/83. c a = 4.0e0/3.0e0 10 b = a - 1.0e0 c = b + b + b eps = abs(c-1.0e0) if (eps .eq. 0.0e0) go to 10 epslon = eps*abs(x) epslon = 1.0e-16 return end subroutine evdrv(k,kstart,kstop,ldq,n,q,d,rho,z,dlamda,q2, * w,indxp,indx,ksnrho,ifail) implicit real (a-h,o-z) CVD$G NOCONCUR c c real * q(ldq,n),d(n),z(n),q2(ldq,n),dlamda(n), * w(n),rho integer k,kstart,kstop,ldq,n,ifail integer indx(n),indxp(n),ksnrho logical rhoge0 c c local variables c real x(300),delta(300) real t,s,dlam real enorm rhoge0 = .true. if (ksnrho .lt. 0) rhoge0 = .false. c do 2700 j = kstart,kstop i = j if (.not. rhoge0) i = k - i + 1 call evupd(k,i,dlamda,w,delta,rho,dlam,ifail) c c if the zero finder failed the computation is terminated c if (ifail .eq. 1) return c if (rhoge0) * then jp = indxp(j) else dlam = -dlam jp = indxp(k-j+1) endif c d(jp) = dlam c c compute the updated eigenvectors c do 2200 i1 = 1,n x(i1) = 0.0e0 2200 continue do 2500 jj = 1,k j3 = jj if (.not. rhoge0) j3 = k - jj + 1 jp = indxp(j3) jjpp = indx(jp) s = z(jp)/delta(j3) c qq(jj,j) = s do 2300 i1 = 1,n x(i1) = x(i1) + q(i1,jjpp)*s 2300 continue 2500 continue t = enorm(n,x) j3 = j if (.not. rhoge0) j3 = k - j + 1 jp = indxp(j3) do 2600 i1 = 1,n q2(i1,jp) = x(i1)/t 2600 continue 2700 continue c return c c last card of evdrv c end End of newevdp0.f echo nxtag.f 1>&2 cat >nxtag.f <<'End of nxtag.f' subroutine nxtag(mypar,jobtag) CVD$R NOCONCUR integer mypar,jobtag c************************************************************************* c c c this subroutine puts data dependencies for problem on the queue. c no synchronization c is necessary because each index of a column of parmq is associated c with a jobtag specified by the user and associated with a unique c schedulable process. the arguments of putq specify a process and are c placed in a column of jobq according to the menue specified in the c common block description given below. c c jobtag is an integer specifying a unique schedulable process c c c icango is a positive integer specifying how many processes must check in c to this process before it can be placed on the readyq. c c nchks is the number of processes that depend upon the completion of c this process. c c mchkin is an integer array specifying the jobtags of the processes c which depend upon completion of this process. c c************************************************************************* c parameter (mxprcs = 1000,iprcs = 120,mxces = 4,nslots = 30) parameter (nbuffr = 500) integer parmq,readyq,qlock,hrlock,trlock,phead,intspn,rhead,rtail, & done,qtail,qtlock common /qdata/ parmq(nslots,mxprcs),phead,intspn, & readyq(iprcs,mxces),rhead(mxces),rtail(mxces), & qtail common /qsync/ qlock(mxprcs),hrlock(mxces),trlock(mxces), & done,qtlock cgraph integer endgrf cgraph real igraph cgraph common /gphout/ endgrf,igraph(nslots,nbuffr) c c c common block description: c c for a complete common block description see the subroutine libopn c c c c place this process on the next slot in the problem queue c call lockon(qtlock) next = qtail qtail = qtail + 1 call lockoff(qtlock) c cgraph call lockon(qlock(mxprcs)) cgraph if (endgrf .gt. nbuffr) call dump(endgrf,igraph) cgraph insrt = endgrf cgraph endgrf = endgrf + 1 cgraph call lockoff(qlock(mxprcs)) cgraph igraph(1,insrt) = 3 cgraph igraph(2,insrt) = mypar cgraph igraph(3,insrt) = next c if ( next .gt. mxprcs) then write(6,*) '*************SCHED USER ERROR********************' write(6,*) ' user attempt to create too many processes' write(6,*) ' through dynamic spawning ' write(6,*) ' the maximum allowed is ',mxprcs write(6,*) ' ' write(6,*) ' EXECUTION TERMINATED BY SCHED ' cgraph call dump(endgrf,igraph) stop endif c jobtag = next parmq(1,next) = 1 parmq(2,next) = 0 parmq(3,next) = 1 parmq(6,next) = mypar c c update the icango counter of the parent process c by adding 2 to parmq(2,mypar)... prevents race condition. c add 1 to the number of kids spawned by parent mypar c call lockon(qlock(mypar)) parmq(2,mypar) = parmq(2,mypar) + 2 parmq(4,mypar) = parmq(4,mypar) + 1 call lockoff(qlock(mypar)) c c set number of kids spawned by next to zero c parmq(4,next) = 0 c c c return c c last card of nxtag c end End of nxtag.f echo place.f 1>&2 cat >place.f <<'End of place.f' subroutine place(jobtag) CVD$R NOCONCUR integer jobtag c************************************************************************* c c c this subroutine places a problem on the readyq c c jobtag is an integer specifying a unique schedulable process c c c icango is a positive integer specifying how many processes must check in c to this process before it can be placed on the readyq. c c nchks is the number of processes that depend upon the completion of c this process. c c mchkin is an integer array specifying the jobtags of the processes c which depend upon completion of this process. c c************************************************************************* c parameter (mxprcs = 1000,iprcs = 120,mxces = 4,nslots = 30) parameter (nbuffr = 500) integer parmq,readyq,qlock,hrlock,trlock,phead,intspn,rhead,rtail, & done,qtail,qtlock common /qdata/ parmq(nslots,mxprcs),phead,intspn, & readyq(iprcs,mxces),rhead(mxces),rtail(mxces), & qtail common /qsync/ qlock(mxprcs),hrlock(mxces),trlock(mxces), & done,qtlock cgraph integer endgrf cgraph real igraph cgraph common /gphout/ endgrf,igraph(nslots,nbuffr) c c c common block description: c c for a complete common block description see the subroutine libopn c c c c place this process on readyq if icango is 0 c when icango .eq. 0 this process does not depend on any c others. c icango = parmq(2,jobtag) iwrkr = mod(jobtag,mxces) + 1 if (icango .eq. 0 ) then call lockon(trlock(iwrkr)) readyq(rtail(iwrkr),iwrkr) = jobtag rtail(iwrkr) = rtail(iwrkr) + 1 call lockoff(trlock(iwrkr)) endif c c last card of place c return end End of place.f echo putq.c 1>&2 cat >putq.c <<'End of putq.c' #include #define MAXPARMS 26 struct parms { int (*subname)(); long *parms[MAXPARMS]; }; struct parms indx[1000]; SCHED(nprocs,parms) int *nprocs; struct parms parms; /* this procedure obtains nprocs physical processors devoted to the the execution of the parallel program indicated through parms which is a structure whose first entry is a subroutine name and whose remaining entries are parameters appearing in the calling sequence of that subroutine. */ { int LIBOPN(); bcopy(&parms, &indx[0], sizeof(struct parms)); /* the subroutine name and prameter list have been copied and placed in a special slot on the parmq then libopn is invoked to initialize pointers, grab physical processors and begin the computation */ LIBOPN(nprocs); return(0); } PUTQ(jobtag,parms) int *jobtag; struct parms parms; /* this procedure puts the descriptor of a schedulable process onto the problem queue. this process will be scheduled for execution when its data dependencies have been satisfied (indicated by icango==0). the argument parms is a structure whose first entry is a subroutine name and whose remaining entries are parameters appearing in the calling sequence of that subroutine. */ { register int j; int PLACE(); j = *jobtag; bcopy(&parms, &indx[j], sizeof(struct parms)); /* first the parms block is copied into the slot pointed to by by jobtag and then this descriptor is placed on the problem queue */ PLACE(jobtag); return(0); } SPAWN(parent,jobtag,parms) int *parent,*jobtag; struct parms parms; /* this procedure puts the descriptor of a schedulable process onto the problem queue. this process will be scheduled for execution when its data dependencies have been satisfied (indicated by icango==0). the argument parms is a structure whose first entry is a subroutine name and whose remaining entries are parameters appearing in the calling sequence of that subroutine. the action of this procedure differs from putq in that the user does not assign jobtags or data dependencies. a parent may spawn any number of children but these child processes only report to the parent. */ { register int j,i; int PLACE(),CLONE(); j = *jobtag; i = *parent; bcopy(&parms, &indx[j], sizeof(struct parms)); /* first the parms block is copied into the slot pointed to by by jobtag and then this descriptor is placed on the problem queue */ if (indx[j].subname == CLONE) indx[j].subname = indx[i].subname; /* here we ask if this is a recursive spawning. if so the name clone has been called instead of subname so we replace the name clone by subname. */ PLACE(jobtag); return(0); } CLONE() { /* this is a dummy routine to satisfy unresolved external */ return(0); } WORK(id,jobtag) int *id,*jobtag; { int START2(),GTPRB(); register int j,myjob; j = *id; if (j == 1) /* the worker whose id is 1 will execute the subroutine passed to sched. this subroutine executes the static data dependency graph. this graph must have at least one node. */ { indx[0].subname(indx[0].parms[0], indx[0].parms[1], indx[0].parms[2], indx[0].parms[3], indx[0].parms[4], indx[0].parms[5], indx[0].parms[6], indx[0].parms[7], indx[0].parms[8], indx[0].parms[9], indx[0].parms[10], indx[0].parms[11], indx[0].parms[12], indx[0].parms[13], indx[0].parms[14], indx[0].parms[15], indx[0].parms[16], indx[0].parms[17], indx[0].parms[18], indx[0].parms[19], indx[0].parms[20], indx[0].parms[21], indx[0].parms[22], indx[0].parms[23], indx[0].parms[24], indx[0].parms[25]); START2(); } myjob = GTPRB(id,jobtag); while (myjob != 0) { j = *jobtag; if (myjob <= -1 ) { /* reenter... simple spawning was done all kids completed and no reentry is required. this indicates jobtag is all done and checkin can proceed. */ CHEKIN(jobtag); myjob = GTPRB(id,jobtag); } else { /* call subname().......... */ /* printf("debug: work: about to call id %d jobtag %d \n",*id,*jobtag); */ indx[j].subname(indx[j].parms[0], indx[j].parms[1], indx[j].parms[2], indx[j].parms[3], indx[j].parms[4], indx[j].parms[5], indx[j].parms[6], indx[j].parms[7], indx[j].parms[8], indx[j].parms[9], indx[j].parms[10], indx[j].parms[11], indx[j].parms[12], indx[j].parms[13], indx[j].parms[14], indx[j].parms[15], indx[j].parms[16], indx[j].parms[17], indx[j].parms[18], indx[j].parms[19], indx[j].parms[20], indx[j].parms[21], indx[j].parms[22], indx[j].parms[23], indx[j].parms[24], indx[j].parms[25]); CHEKIN(jobtag); myjob = GTPRB(id,jobtag); /* printf("debug: work: after call id %d jobtag %d \n",*id,*jobtag); */ } } return(0); } End of putq.c echo start2.f 1>&2 cat >start2.f <<'End of start2.f' subroutine start2 c c this routine allows parallel processing to start after user supplied c driver has completed by unlocking the head of the readyq c parameter (mxprcs = 1000,iprcs = 120,mxces = 4,nslots = 30) parameter (nbuffr = 500) integer parmq,readyq,qlock,hrlock,trlock,phead,intspn,rhead,rtail, & done,qtail,qtlock common /qdata/ parmq(nslots,mxprcs),phead,intspn, & readyq(iprcs,mxces),rhead(mxces),rtail(mxces), & qtail common /qsync/ qlock(mxprcs),hrlock(mxces),trlock(mxces), & done,qtlock logical nostrt cgraph integer endgrf cgraph real igraph cgraph common /gphout/ endgrf,igraph(nslots,nbuffr) c c c for common block description see subroutine libopn. c if (done .ne. 0) then write(6,*) '*************SCHED USER ERROR********************' if (done .eq. -1 ) then write(6,*) ' no process has set nchks equal to 0 ' else write(6,*) ' more than one process has set nchks to 0 ' endif write(6,*) ' SCHEDULE will not be able to terminate job' write(6,*) ' correctly ' write(6,*) ' ' write(6,*) ' check subroutine passed to initial call to' write(6,*) ' to see that at exactly one call to DEP has ' write(6,*) ' set nchks = 0 ' write(6,*) ' ' write(6,*) ' EXECUTION TERMINATED BY SCHED ' cgraph call dump(endgrf,igraph) stop endif c nostrt = .true. do 100 iwrkr = 1,mxces if (rhead(iwrkr) .ne. rtail(iwrkr)) nostrt = .false. 100 continue if (nostrt) then write(6,*) '*************SCHED USER ERROR********************' write(6,*) ' no process had an intitial icango of 0 ' write(6,*) ' SCHEDULE could not begin ' write(6,*) ' ' write(6,*) ' check subroutine passed to initial call to' write(6,*) ' to see that at least one call to DEP has ' write(6,*) ' set icango = 0 ' write(6,*) ' ' write(6,*) ' EXECUTION TERMINATED BY SCHED ' cgraph call dump(endgrf,igraph) stop endif intspn = qtail c do 200 iwrkr = 1,mxces call lockoff(hrlock(iwrkr)) 200 continue c return c c last card of start2 c end End of start2.f echo stateig.f 1>&2 cat >stateig.f <<'End of stateig.f' subroutine treeql(n,ldq,q,d,e,ifail) implicit real (a-h,o-z) CVD$G NOCONCUR c * integer n,ldq real d(*),e(*),q(ldq,*) c real z(500),dlamda(500),x(500),q2(500,500),w(500), * delta(500),rho(300) integer indx(500),indxp(500),nn(3,300),kwork(50,300),ifail common /wspace/z,dlamda,x,q2,w,delta,rho,indx,indxp,nn,kwork common/prfpms/nsmall,kgran external sesupd,tql2 c c this subroutine splits a problem in two parts c nn(1,1) = n nn(2,1) = 1 nn(3,1) = 1 ifail = 0 nsize = n nlevl = -1 1111 continue nlevl = nlevl + 1 if (nsize .gt. 2*nsmall) then c c the problem is large enough to split one more level c define the splits here in the 100 loop and then c place a call to sesupd on the queue to glue the results c together c nsize = nsize/2 do 100 id = 2**nlevl,2**(nlevl+1) - 1 c idl = 2*id idr = idl + 1 n1 = nn(1,id)/2 c nn(1,idl) = n1 nn(2,idl) = idl nn(3,idl) = nn(3,id) c nn(1,idr) = nn(1,id) - n1 nn(2,idr) = idr nn(3,idr) = nn(3,id) + n1 c isplt1 = nn(3,idr) isplt = isplt1 - 1 n1p1 = n1 + 1 rho(id) = e(isplt1) alpha = d(isplt) alphap = d(isplt1) sigma = 1.0e0 c if (sign(1.0e0,alpha)*sign(1.0e0,alphap) .ge. 0.0e0) * then if (alpha .lt. 0.0e0 .or. alphap .lt. 0.0e0) * then if(rho(id) .ne. 0.0e0) * then rho(id) = - rho(id) sigma = -sigma endif endif endif kwork(50,id) = sigma d(isplt) = d(isplt) - rho(id) d(isplt1) = d(isplt1) - rho(id) c c the problem is large enough to split into two parts c isplt = nn(3,id) c jobtag = id icango = 2 nchks = 1 if (jobtag .eq. 1) nchks = 0 list = id/2 call dep(jobtag,icango,nchks,list) call putq(jobtag,sesupd,nn(2,id),ldq,nn(1,id), * q(isplt,isplt),d(isplt),rho(id),z(isplt), * x(isplt),dlamda(isplt),q2(isplt,isplt), * delta(isplt),w(isplt),indxp(isplt), * indx(isplt),kwork(1,id),ifail) c 100 continue go to 1111 else c c we have reached the lowest level c granularity now warrants call to tql2 c do 200 id = 2**nlevl,2**(nlevl+1) - 1 c isplt = nn(3,id) c jobtag = id icango = 0 ncheks = 1 list = id/2 c call dep(jobtag,icango,ncheks,list) call putq(jobtag,tql2,ldq,nn(1,id),d(isplt),e(isplt), * q(isplt,isplt),ifail) c 200 continue endif c return c c last card of split c end End of stateig.f echo statses.f 1>&2 cat >statses.f <<'End of statses.f' subroutine sesupd(myid,ldq,n,q,d,rho,z,x,dlamda,q2,delta,w,indxp, * indx,kwork,ifail) implicit real (a-h,o-z) CVD$G NOCONCUR c*********************************************************************** c c c this subroutine will compute the updated eigensystem of a c of a symmetric matrix after modification by a rank one c symmetric matrix. c c a = qdq' + rho*z*z' c c it is assumed that the eigenvectors of the original matrix c are stored in q, and the eigenvalues are in d. c the algorithm consists of three stages... c c c the first stage constists of deflating the size of c the problem when there multiple eigenvalues or if there c zero of the vector q'z. for each such ocurrence the dimension c is reduced by one. c c the second stage consists of calculating the updated c eigenvalues of the reduced problem. this requires a call c to the zero finding routine evupd. c c the final stage consists of computing the updated eigenvectors c directly using the updated eigenvalue. c c c the algorithm requires o(n**2) operations to update the c eigenvectors, but n**3 + o(n**2) to update the eigenvectors. c c c input variables... c c n the dimension of the problem. q is n x n c c q an n x n matrix that contains the eigenvectors of c the original matrix on input and the updated c eigenvectors on output. c c d a vector of length n. the original eigenvalues are c contained in d on input. the updated eigenvectors c are contained on output. c c rho a scalar c c z a vector of length n. on input this vector c containes the updating vector. the contents of z c are destroyed during the updating process. c c x a working array of length n. c c dlamda a working array of length n c c q2 a working array of dimension n x n c c delta a working array of length n c c w a working array of length n. c c indx an integer array of length n. c c ifail this integer variable indicates failure of the c updating process with value 1, and success c with value 0. c c called subroutines... c c evupd a subroutine for calculating the updated eigenvalues. c c c*********************************************************************** integer ldq,n,ifail integer indx(n),indxp(n),kwork(*) integer ksize(20),kstart(20),kstop(20),kbins,msize,krem,prcsys integer kgran,ksect real * q(ldq,n),d(n),z(n),x(n),dlamda(n),q2(ldq,n),delta(n) * ,w(n),rho common/prfpms/ksect,kgran logical rhoge0 real eps,zero,one,two,s,t,evsprd,dmax,dlam real epslon,enorm external evdrv c c c eps is machine precision c eps = epslon(1.0) zero = 0.0e0 ifail = 0 one = 1.0e0 two = 2.0e0 rhoge0 = (rho .ge. zero) go to (1111,2222),ientry(myid,2) 1111 continue c c compute q(transpose)*z c n1 = n/2 n1p1 = n1 + 1 do 100 j = 1,n1 z(j) = q(n1,j) 100 continue sigma = kwork(50) if (sigma .gt. 0.0) then do 200 j = n1p1,n z(j) = q(n1p1,j) 200 continue else do 201 j = n1p1,n z(j) = -q(n1p1,j) 201 continue endif c c normalize z so that norm(z) = 1 c t = enorm(n,z) do 300 j = 1,n z(j) = z(j)/t indx(j) = j 300 continue rho = rho*t*t c c calculate the allowable deflation tolerence c tol = (1.0e2)*eps*max(abs(d(1)),abs(d(n))) if (abs(rho) .le. tol) return c c c order the eigenvalues c nm1 = n - 1 do 600 j = 1,nm1 t = d(1) s = z(1) inx = indx(1) k = n - j + 1 do 500 i = 2,k im1 = i - 1 if (d(i) .lt. t) go to 400 t = d(i) s = z(i) inx = indx(i) go to 500 400 continue d(im1) = d(i) d(i) = t z(im1) = z(i) z(i) = s indx(im1) = indx(i) indx(i) = inx 500 continue 600 continue c c if there multiple eigenvalues then the problem deflates. c here the number of equal eigenvalues are found. c then an elementary reflector is computed to rotate the c corresponding eigensubspace so that certain components of c z are zero in this new basis. c if (rhoge0) *then k = 0 k2 = n + 1 do 670 j = 1,n if(rho*abs(z(j)) .le. tol) * then c c deflate due to small z component c k2 = k2 - 1 indxp(k2) = j if (j .eq. n) go to 1111 else jlam = j go to 700 endif 670 continue 700 continue j = j + 1 if (j .gt. n) go to 800 if(rho*abs(z(j)) .le. tol) c if(rho*abs(z(j)) .le. 1.0e-10) * then c c deflate due to small z component c k2 = k2 - 1 indxp(k2) = j else call deflat(j,jlam,jdef,indx,n,ldq,q,d,z,tol) if (jdef .le. 0) * then k = k + 1 w(k) = z(jlam)**2 dlamda(k) = d(jlam) indxp(k) = jlam jlam = j else k2 = k2 - 1 indxp(k2) = jdef endif endif go to 700 800 continue c c record the last eigenvalue c k = k + 1 w(k) = z(jlam)**2 dlamda(k) = d(jlam) indxp(k) = jlam else jlam = n k = n + 1 k2 = 0 do 671 j = n,1,-1 if(abs(rho*z(j)) .le. tol) c if(abs(rho*z(j)) .le. 1.0e-10) * then c c deflate due to small z component c k2 = k2 + 1 indxp(n-k2+1) = j if (j .eq. 1) go to 1111 else jlam = j go to 701 endif 671 continue 701 continue j = j - 1 if (j .lt. 1) go to 801 if(abs(rho*z(j)) .le. tol) * then c c deflate due to small z component c k2 = k2 + 1 indxp(n-k2+1) = j else call deflat(j,jlam,jdef,indx,n,ldq,q,d,z,tol) if (jdef .le. 0) * then k = k - 1 w(n-k+1) = z(jlam)**2 dlamda(n-k+1) = -d(jlam) indxp(n-k+1) = jlam jlam = j else k2 = k2 + 1 indxp(n-k2+1) = jdef endif endif go to 701 801 continue c c record the last eigenvalue c k = k - 1 w(n-k+1) = z(jlam)**2 dlamda(n-k+1) = -d(jlam) indxp(n-k+1) = jlam k = n - k + 1 rho = -rho endif c c compute the updated eigenvalues of the deflated problem c c kbins = k/kgran + 1 kbins = min(kbins,20) krem = mod(k,kbins) msize = (k - krem)/kbins do 900 j = 1,kbins ksize(j) = msize 900 continue do 910 j = 1,krem ksize(j) = ksize(j) + 1 910 continue kstart(1) = 1 kstop(1) = ksize(1) kwork(49) = k kwork(48) = -1 if (rhoge0) kwork(48) = 1 if ( kbins .gt. 1 ) then c do 920 j = 2,kbins kstart(j) = kstop(j-1) + 1 kstop(j) = kstop(j-1) + ksize(j) kwork(j) = kstart(j) kwork(kbins+j) = kstop(j) call nxtag(myid,jtemp) call spawn(myid,jtemp,evdrv,kwork(49), * kwork(j),kwork(kbins+j), * ldq,n,q,d,rho,z,dlamda,q2, * w,indxp,indx,kwork(48),ifail) 920 continue call evdrv(k,kstart(1),kstop(1),ldq,n,q,d,rho,z, * dlamda,q2,w,indxp,indx,kwork(48),ifail) c c kbins-1 processes were spawned c place a join point here and resume when all spawned c processes have checked in c c else c c there was not sufficient granularity to warrant spawning c of additional processes to compute roots c kstop(1) = k call evdrv(k,kstart(1),kstop(1),ldq,n,q,d,rho,z, * dlamda,q2,w,indxp,indx,kwork(48),ifail) endif return c 2222 continue c c store the updated eigenvectors back into q c k = kwork(49) do 1100 j = k+1,n jp = indxp(j) jjpp = indx(jp) do 1000 i = 1,n q2(i,jp) = q(i,jjpp) 1000 continue 1100 continue do 1300 j = 1,n do 1200 i = 1,n q(i,j) = q2(i,j) 1200 continue 1300 continue c return c c last card of sesupd c end End of statses.f echo stseswait.f 1>&2 cat >stseswait.f <<'End of stseswait.f' subroutine sesupd(myid,ldq,n,q,d,rho,z,x,dlamda,q2,delta,w,indxp, * indx,kwork,ifail) implicit real (a-h,o-z) CVD$G NOCONCUR c*********************************************************************** c c c this subroutine will compute the updated eigensystem of a c of a symmetric matrix after modification by a rank one c symmetric matrix. c c a = qdq' + rho*z*z' c c it is assumed that the eigenvectors of the original matrix c are stored in q, and the eigenvalues are in d. c the algorithm consists of three stages... c c c the first stage constists of deflating the size of c the problem when there multiple eigenvalues or if there c zero of the vector q'z. for each such ocurrence the dimension c is reduced by one. c c the second stage consists of calculating the updated c eigenvalues of the reduced problem. this requires a call c to the zero finding routine evupd. c c the final stage consists of computing the updated eigenvectors c directly using the updated eigenvalue. c c c the algorithm requires o(n**2) operations to update the c eigenvectors, but n**3 + o(n**2) to update the eigenvectors. c c c input variables... c c n the dimension of the problem. q is n x n c c q an n x n matrix that contains the eigenvectors of c the original matrix on input and the updated c eigenvectors on output. c c d a vector of length n. the original eigenvalues are c contained in d on input. the updated eigenvectors c are contained on output. c c rho a scalar c c z a vector of length n. on input this vector c containes the updating vector. the contents of z c are destroyed during the updating process. c c x a working array of length n. c c dlamda a working array of length n c c q2 a working array of dimension n x n c c delta a working array of length n c c w a working array of length n. c c indx an integer array of length n. c c ifail this integer variable indicates failure of the c updating process with value 1, and success c with value 0. c c called subroutines... c c evupd a subroutine for calculating the updated eigenvalues. c c c*********************************************************************** real * q(ldq,n),d(n),z(n),x(n),dlamda(n),q2(ldq,n),delta(n) * ,w(n),rho integer ldq,n,ifail integer indx(n),indxp(n),kwork(*) integer ksize(20),kstart(20),kstop(20),kbins,msize,krem,prcsys integer kgran,ksect common/prfpms/ksect,kgran common/wsync/iwrite logical rhoge0,wait real eps,zero,one,two,s,t,evsprd,dmax,dlam real epslon,enorm external evdrv c c c eps is machine precision c c write(6,*) ' in ses ldq n rho ifail ',ldq,n,rho,ifail c write(6,*) 'fses myid rho q d z x ',myid,rho,q(1,1),d(1),z(1),x(1) eps = epslon(1.0) zero = 0.0e0 ifail = 0 one = 1.0e0 two = 2.0e0 rhoge0 = (rho .ge. zero) go to (1111,2222),ientry(myid,2) 1111 continue c c compute q(transpose)*z c n1 = n/2 n1p1 = n1 + 1 do 100 j = 1,n1 z(j) = q(n1,j) 100 continue sigma = kwork(50) c write(6,*) ' sigma at 2 ',sigma if (sigma .gt. 0.0) then do 200 j = n1p1,n z(j) = q(n1p1,j) 200 continue else do 201 j = n1p1,n z(j) = -q(n1p1,j) 201 continue endif c c normalize z so that norm(z) = 1 c t = enorm(n,z) c call lockon(iwrite) c write(6,*) ' n t z1 zn ',n,t,z(1),z(n) c call lockoff(iwrite) do 300 j = 1,n z(j) = z(j)/t indx(j) = j 300 continue rho = rho*t*t c c calculate the allowable deflation tolerence c tol = (1.0e2)*eps*max(abs(d(1)),abs(d(n))) if (abs(rho) .le. tol) return c c c order the eigenvalues c c write(6,*) ' before order evs ',d(1),d(n) nm1 = n - 1 do 600 j = 1,nm1 t = d(1) s = z(1) inx = indx(1) k = n - j + 1 do 500 i = 2,k im1 = i - 1 if (d(i) .lt. t) go to 400 t = d(i) s = z(i) inx = indx(i) go to 500 400 continue d(im1) = d(i) d(i) = t z(im1) = z(i) z(i) = s indx(im1) = indx(i) indx(i) = inx 500 continue 600 continue c write(6,*) ' after order evs ',d(1),d(n) c c if there multiple eigenvalues then the problem deflates. c here the number of equal eigenvalues are found. c then an elementary reflector is computed to rotate the c corresponding eigensubspace so that certain components of c z are zero in this new basis. c if (rhoge0) *then k = 0 k2 = n + 1 do 670 j = 1,n if(rho*abs(z(j)) .le. tol) * then c c deflate due to small z component c k2 = k2 - 1 indxp(k2) = j if (j .eq. n) go to 1111 else jlam = j go to 700 endif 670 continue 700 continue j = j + 1 if (j .gt. n) go to 800 if(rho*abs(z(j)) .le. tol) c if(rho*abs(z(j)) .le. 1.0e-10) * then c c deflate due to small z component c k2 = k2 - 1 indxp(k2) = j else call deflat(j,jlam,jdef,indx,n,ldq,q,d,z,tol) if (jdef .le. 0) * then k = k + 1 w(k) = z(jlam)**2 dlamda(k) = d(jlam) indxp(k) = jlam jlam = j else k2 = k2 - 1 indxp(k2) = jdef endif endif go to 700 800 continue c c record the last eigenvalue c k = k + 1 w(k) = z(jlam)**2 dlamda(k) = d(jlam) indxp(k) = jlam else jlam = n k = n + 1 k2 = 0 do 671 j = n,1,-1 if(abs(rho*z(j)) .le. tol) c if(abs(rho*z(j)) .le. 1.0e-10) * then c c deflate due to small z component c k2 = k2 + 1 indxp(n-k2+1) = j if (j .eq. 1) go to 1111 else jlam = j go to 701 endif 671 continue 701 continue j = j - 1 if (j .lt. 1) go to 801 if(abs(rho*z(j)) .le. tol) * then c c deflate due to small z component c k2 = k2 + 1 indxp(n-k2+1) = j else call deflat(j,jlam,jdef,indx,n,ldq,q,d,z,tol) if (jdef .le. 0) * then k = k - 1 w(n-k+1) = z(jlam)**2 dlamda(n-k+1) = -d(jlam) indxp(n-k+1) = jlam jlam = j else k2 = k2 + 1 indxp(n-k2+1) = jdef endif endif go to 701 801 continue c c record the last eigenvalue c k = k - 1 w(n-k+1) = z(jlam)**2 dlamda(n-k+1) = -d(jlam) indxp(n-k+1) = jlam k = n - k + 1 rho = -rho endif c write(6,*) ' begin a problem ------------------------------- ' c write(6,*) ' ' c write(6,*) k,rho c do 6666 jj = 1,k c write(6,*) z(indxp(jj)),w(jj),dlamda(jj) c6666 continue c write(6,*) ' begin a problem ------------------------------- ' c c c compute the updated eigenvalues of the deflated problem c c kbins = k/kgran + 1 kbins = min(kbins,20) krem = mod(k,kbins) msize = (k - krem)/kbins do 900 j = 1,kbins ksize(j) = msize 900 continue do 910 j = 1,krem ksize(j) = ksize(j) + 1 910 continue c call lockon(iwrite) c write(6,*) ' from ses w ',w(1),w(2),w(k) c call lockoff(iwrite) kstart(1) = 1 kstop(1) = ksize(1) kwork(49) = k kwork(48) = -1 if (rhoge0) kwork(48) = 1 c write(6,*) ' rhoge0 kwork ',rhoge0,kwork(48) c if ( kbins .gt. 1 ) then c write(6,*) ' about to spawn',kbins,' evdrvs from ses ',myid c do 920 j = 2,kbins kstart(j) = kstop(j-1) + 1 kstop(j) = kstop(j-1) + ksize(j) kwork(j) = kstart(j) kwork(kbins+j) = kstop(j) c write(6,*) ' bef spawn evdrv ',kwork(49), c * kwork(j),kwork(kbins+j),kwork(48) c call lockon(iwrite) c write(6,*) ' about to spawn evdrv ',k,kstart(j),kstop(j) c call lockoff(iwrite) call nxtag(myid,jtemp) call spawn(myid,jtemp,evdrv,kwork(49), * kwork(j),kwork(kbins+j), * ldq,n,q,d,rho,z,dlamda,q2, * w,indxp,indx,kwork(48),ifail) c write(6,*) ' tag assigned ',jtemp 920 continue c call lockon(iwrite) c write(6,*) ' about to call evdrv ',k,kstart(1),kstop(1) c call lockoff(iwrite) call evdrv(k,kstart(1),kstop(1),ldq,n,q,d,rho,z, * dlamda,q2,w,indxp,indx,kwork(48),ifail) c c kbins-1 processes were spawned c place a join point here and resume when all spawned c processes have checked in c c else c c there was not sufficient granularity to warrant spawning c of additional processes to compute roots c kstop(1) = k c write(6,*) 'bef rho q d z x ',rho,q(1,1),d(1),z(1),x(1) c call lockon(iwrite) c write(6,*) ' about to call evdrv ',k,kstart(1),kstop(1) c call lockoff(iwrite) call evdrv(k,kstart(1),kstop(1),ldq,n,q,d,rho,z, * dlamda,q2,w,indxp,indx,kwork(48),ifail) endif if( wait(myid,2) ) return c 2222 continue c c store the updated eigenvectors back into q c k = kwork(49) do 1100 j = k+1,n jp = indxp(j) jjpp = indx(jp) do 1000 i = 1,n q2(i,jp) = q(i,jjpp) 1000 continue 1100 continue do 1300 j = 1,n do 1200 i = 1,n q(i,j) = q2(i,j) 1200 continue 1300 continue c call lockon(iwrite) c write(6,*) ' after ret in ses ',k,n c write(6,*) ' k n z1 zn ',k,n,q(1,1),q(n,1) c call lockoff(iwrite) c ifail = n-k c return c c last card of sesupd c end End of stseswait.f echo stuffspawn.f 1>&2 cat >stuffspawn.f <<'End of stuffspawn.f' double precision a,b common /prbdef/ a(1000),b(100),itmp(10),jtmp(10),myid(10) EXTERNAL PARALG nblks = 8 do 10 j = 1,10 itmp(j) = j jtmp(j) = j 10 continue write(6,*) ' input nprocs ' read (5,*) nprocs t1 = second(foo) c CALL SCHED(nprocs,paralg,nblks,a,b,itmp,jtmp,myid) c t2 = second(foo) write(6,*) ' time = ',t2-t1,' nprocs = ',nprocs nn = 36 do 100 j = 1,nn write(6,*) a(j) 100 continue stop end c subroutine paralg(n,a,b,itmp,jtmp,myid) integer itmp(*),jtmp(*),myid(*) double precision a(*),b(*) integer mychkn(1) EXTERNAL STUFF1 c c this is the driver for filling a packed triangular matrix with c i on the i-th diagonal and -(i+j) in the (i,j) off diagonal position c icount = 1 do 200 j = 1,n-1 c c c the j-th diagonal waits for the diagonal above to complete c c the j-th diagonal completion will allow c the (j+1)-st diagonal to start c c jobtag = j icango = 1 if (jobtag .eq. 1) icango = 0 nchks = 1 mychkn(1) = j+1 c c we just set up data dependencies and are ready to put c this process on the queue c CALL DEP(jobtag,icango,nchks,mychkn) myid(jobtag) = jobtag CALL PUTQ(jobtag,stuff1,myid(jobtag),n, & a(icount),jtmp(j),itmp(1)) c c when the data dependencies for process jobtag are satisfied c the following call will be made c c call stuff1(myid....,itmp(1)) c icount = icount + (n-j+1) 200 continue c icango = 1 nchks = 0 mychkn(1) = n+1 c CALL DEP(n,icango,nchks,mychkn) myid(n) = n CALL PUTQ(n,stuff1,myid(n),n,a(icount),jtmp(n),itmp(1)) c return end c subroutine stuff1(myid,n,a,j,itmp) double precision a(*) integer myid,n,j,itmp(*) logical wait EXTERNAL STUFF2,STUFF3 c c write(6,*) ' enter stuff1 ',myid,j c c write(6,*) ' enter stuff1 ',ientry(myid),myid go to (1111,2222,3333),ientry(myid,3) 1111 continue ii = 2 do 100 i = 2,n c CALL NXTAG(myid,jdummy) c write(6,*) ' about to spawn id jd ',myid,jdummy CALL SPAWN(myid,jdummy,stuff2,a(ii + 1),itmp(i),itmp(j)) c c this spawns a process that will execute a call to stuff2 c and report completion to process MYID c ii = ii + 1 100 continue call stuff2(a(2),itmp(1),itmp(j)) if (wait(myid,2)) return c c return to help out and then return here (at label 2222) c on the next reentry c 2222 continue ii = 2 do 200 i = 2,n c CALL NXTAG(myid,jdummy) c write(6,*) ' about to spawn 3 id jd ',myid,jdummy CALL SPAWN(myid,jdummy,stuff3,a(ii + 1),itmp(i),itmp(j)) c c this spawns a process that will execute a call to stuff2 c and report completion to process MYID c ii = ii + 1 200 continue call stuff2(a(2),itmp(1),itmp(j)) if (wait(myid,3)) return 3333 continue a(1) = j c return end subroutine stuff2(a,i,j) double precision a(*),one one = 1.0d0 a(1) = 0.0d0 c write(6,*) ' enter stuff2 ',i,j do 100 kk = 1,1000000 a(1) = a(1) + one 100 continue a(1) = -(i+j) return end subroutine stuff3(a,i,j) double precision a(*),one,save one = 1.0d0 save = a(1) c write(6,*) ' enter stuff3 ',i,j do 100 kk = 1,1000000 a(1) = a(1) + one 100 continue a(1) = -(i+j) + save return end c End of stuffspawn.f echo testrun 1>&2 cat >testrun <<'End of testrun' make sched; make graph make xeig; xeig < data make xtest; xtest < data make geig; geig < data make gtest; gtest < data make xdandc; xdandc < data make gdandc; gdandc < data make xwait; xwait < data make gwait; gwait < data End of testrun echo to.harwell.f 1>&2 cat >to.harwell.f <<'End of to.harwell.f' real a1(500,500),b1(500) real a2(500,500),b2(500) real a3(500,500),b3(500) real a4(500,500),b4(500) real a5(500,500),b5(500) real a6(500,500),b6(500) integer ipvt1(500),ipvt2(500) integer ipvt3(500),ipvt4(500) integer ipvt5(500),ipvt6(500) integer mychkn(10) common /foo/ n,lda,mychkn c EXTERNAL start c lda = 500 write(6,*) ' input nprocs ' read (5,*) nprocs write(6,*) ' input n ' read (5,*) n c call matgen(n,lda,a1,b1) c t3 = second(x) call dgefa(a1,lda,n,ipvt1,info1) call dgesl(a1,lda,n,ipvt1,b1,job) t4 = second(x) - t3 write(6,*)'the time to call dgefa and dgesl once explicitely =' $ ,t4 c call matgen(n,lda,a1,b1) call matgen(n,lda,a2,b2) call matgen(n,lda,a3,b3) call matgen(n,lda,a4,b4) call matgen(n,lda,a5,b5) call matgen(n,lda,a6,b6) c t1 = second(x) CALL SCHED(nprocs,start,a1,b1,a2,b2,a3,b3,a4,b4,a5,b5, $ a6,b6,ipvt1,ipvt2,ipvt3,ipvt4,ipvt5,ipvt6) t2 = second(x) - t1 c c write(6,*) 'from b1' c write(6,*)b1(1),b1(n) c write(6,*) 'from b2' c write(6,*)b2(1),b2(n) c write(6,*) 'from b3' c write(6,*)b3(1),b3(n) c write(6,*) 'from b4' c write(6,*)b4(1),b4(n) c write(6,*) 'from b5' c write(6,*)b5(1),b5(n) c c write(6,*)' using ',nprocs,' processors' write(6,*)'the time to call sched (6 calls to dgefa and dgesl) =' $ ,t2 c stop end subroutine matgen(n,lda,a,b) c real a(lda,*),b(*),norma c init = 1325 norma = 0.0 do 30 j = 1,n do 20 i = 1,n init = mod(3125*init,65536) a(i,j) = (init - 32768.0)/16384.0 norma = max1(a(i,j), norma) 20 continue 30 continue do 35 i = 1,n b(i) = 0.0 35 continue do 50 j = 1,n do 40 i = 1,n b(i) = b(i) + a(i,j) 40 continue 50 continue c return end subroutine start(a1,b1,a2,b2,a3,b3,a4,b4,a5,b5, $ a6,b6,ipvt1,ipvt2,ipvt3,ipvt4,ipvt5,ipvt6) c real a1(lda,*),b1(*) real a2(lda,*),b2(*) real a3(lda,*),b3(*) real a4(lda,*),b4(*) real a5(lda,*),b5(*) real a6(lda,*),b6(*) integer ipvt1(*),ipvt2(*),ipvt3(*),ipvt4(*),ipvt5(*),mychkn(10) integer ipvt6(*) common /foo/ n,lda,mychkn c external dgefa, dgesl, term, relese c job = 0 c jobtag = 3 icango = 1 nchks = 1 mychkn(1) = 2 CALL DEP(jobtag,icango,nchks,mychkn) CALL PUTQ(jobtag,dgefa,a1,lda,n,ipvt1,info1) c jobtag = 2 icango = 1 nchks = 1 mychkn(1) = 1 CALL DEP(jobtag,icango,nchks,mychkn) CALL PUTQ(jobtag,dgesl,a1,lda,n,ipvt1,b1,job) c jobtag = 5 icango = 1 nchks = 1 mychkn(1) = 4 CALL DEP(jobtag,icango,nchks,mychkn) CALL PUTQ(jobtag,dgefa,a2,lda,n,ipvt2,info2) c jobtag = 4 icango = 1 nchks = 1 mychkn(1) = 1 CALL DEP(jobtag,icango,nchks,mychkn) CALL PUTQ(jobtag,dgesl,a2,lda,n,ipvt2,b2,job) c jobtag = 7 icango = 1 nchks = 1 mychkn(1) = 6 CALL DEP(jobtag,icango,nchks,mychkn) CALL PUTQ(jobtag,dgefa,a3,lda,n,ipvt3,info3) c jobtag = 6 icango = 1 nchks = 1 mychkn(1) = 1 CALL DEP(jobtag,icango,nchks,mychkn) CALL PUTQ(jobtag,dgesl,a3,lda,n,ipvt3,b3,job) c jobtag = 9 icango = 1 nchks = 1 mychkn(1) = 8 CALL DEP(jobtag,icango,nchks,mychkn) CALL PUTQ(jobtag,dgefa,a4,lda,n,ipvt4,info4) c jobtag = 8 icango = 1 nchks = 1 mychkn(1) = 1 CALL DEP(jobtag,icango,nchks,mychkn) CALL PUTQ(jobtag,dgesl,a4,lda,n,ipvt4,b4,job) c jobtag = 11 icango = 1 nchks = 1 mychkn(1) = 10 CALL DEP(jobtag,icango,nchks,mychkn) CALL PUTQ(jobtag,dgefa,a5,lda,n,ipvt5,info5) c jobtag = 10 icango = 1 nchks = 1 mychkn(1) = 1 CALL DEP(jobtag,icango,nchks,mychkn) CALL PUTQ(jobtag,dgesl,a5,lda,n,ipvt5,b5,job) c jobtag = 13 icango = 1 nchks = 1 mychkn(1) = 12 CALL DEP(jobtag,icango,nchks,mychkn) CALL PUTQ(jobtag,dgefa,a6,lda,n,ipvt6,info6) c jobtag = 12 icango = 1 nchks = 1 mychkn(1) = 1 CALL DEP(jobtag,icango,nchks,mychkn) CALL PUTQ(jobtag,dgesl,a6,lda,n,ipvt6,b6,job) c jobtag = 1 icango = 6 nchks = 0 mychkn(1) = 0 CALL DEP(jobtag,icango,nchks,mychkn) CALL PUTQ(jobtag,term,foo) c jobtag = 14 icango = 0 nchks = 6 mychkn(1) = 3 mychkn(2) = 5 mychkn(3) = 7 mychkn(4) = 9 mychkn(5) = 11 mychkn(6) = 13 CALL DEP(jobtag,icango,nchks,mychkn) CALL PUTQ(jobtag,relese,foo2) c c write(6,*)'start: about to return ' return end c subroutine term(foo) c write(6,*)'term:' c write(6,*)'term: about to return to and then write' return end subroutine relese(foo) c write(6,*)'relese:' return end Caveat receptor. (Jack) dongarra@anl-mcs, (Eric Grosse) research!ehg Compliments of netlib Sun May 3 11:24:50 CDT 1987 subroutine dgefa(a,lda,n,ipvt,info) integer lda,n,ipvt(1),info real a(lda,1) c c dgefa factors a real matrix by gaussian elimination. c c dgefa is usually called by dgeco, but it can be called c directly with a saving in time if rcond is not needed. c (time for dgeco) = (1 + 9/n)*(time for dgefa) . c c on entry c c a real(lda, n) c the matrix to be factored. c c lda integer c the leading dimension of the array a . c c n integer c the order of the matrix a . c c on return c c a an upper triangular matrix and the multipliers c which were used to obtain it. c the factorization can be written a = l*u where c l is a product of permutation and unit lower c triangular matrices and u is upper triangular. c c ipvt integer(n) c an integer vector of pivot indices. c c info integer c = 0 normal value. c = k if u(k,k) .eq. 0.0 . this is not an error c condition for this subroutine, but it does c indicate that dgesl or dgedi will divide by zero c if called. use rcond in dgeco for a reliable c indication of singularity. c c linpack. this version dated 08/14/78 . c cleve moler, university of new mexico, argonne national lab. c c subroutines and functions c c blas daxpy,dscal,idamax c c internal variables c real t integer idamax,j,k,kp1,l,nm1 c c c gaussian elimination with partial pivoting c info = 0 nm1 = n - 1 if (nm1 .lt. 1) go to 70 do 60 k = 1, nm1 kp1 = k + 1 c c find l = pivot index c l = idamax(n-k+1,a(k,k),1) + k - 1 ipvt(k) = l c c zero pivot implies this column already triangularized c if (a(l,k) .eq. 0.0e0) go to 40 c c interchange if necessary c if (l .eq. k) go to 10 t = a(l,k) a(l,k) = a(k,k) a(k,k) = t 10 continue c c compute multipliers c t = -1.0e0/a(k,k) call dscal(n-k,t,a(k+1,k),1) c c row elimination with column indexing c do 30 j = kp1, n t = a(l,j) if (l .eq. k) go to 20 a(l,j) = a(k,j) a(k,j) = t 20 continue call daxpy(n-k,t,a(k+1,k),1,a(k+1,j),1) 30 continue go to 50 40 continue info = k 50 continue 60 continue 70 continue ipvt(n) = n if (a(n,n) .eq. 0.0e0) info = n return end subroutine dgesl(a,lda,n,ipvt,b,job) integer lda,n,ipvt(1),job real a(lda,1),b(1) c c dgesl solves the real system c a * x = b or trans(a) * x = b c using the factors computed by dgeco or dgefa. c c on entry c c a real(lda, n) c the output from dgeco or dgefa. c c lda integer c the leading dimension of the array a . c c n integer c the order of the matrix a . c c ipvt integer(n) c the pivot vector from dgeco or dgefa. c c b real(n) c the right hand side vector. c c job integer c = 0 to solve a*x = b , c = nonzero to solve trans(a)*x = b where c trans(a) is the transpose. c c on return c c b the solution vector x . c c error condition c c a division by zero will occur if the input factor contains a c zero on the diagonal. technically this indicates singularity c but it is often caused by improper arguments or improper c setting of lda . it will not occur if the subroutines are c called correctly and if dgeco has set rcond .gt. 0.0 c or dgefa has set info .eq. 0 . c c to compute inverse(a) * c where c is a matrix c with p columns c call dgeco(a,lda,n,ipvt,rcond,z) c if (rcond is too small) go to ... c do 10 j = 1, p c call dgesl(a,lda,n,ipvt,c(1,j),0) c 10 continue c c linpack. this version dated 08/14/78 . c cleve moler, university of new mexico, argonne national lab. c c subroutines and functions c c blas daxpy,ddot c c internal variables c real ddot,t integer k,kb,l,nm1 c nm1 = n - 1 if (job .ne. 0) go to 50 c c job = 0 , solve a * x = b c first solve l*y = b c if (nm1 .lt. 1) go to 30 do 20 k = 1, nm1 l = ipvt(k) t = b(l) if (l .eq. k) go to 10 b(l) = b(k) b(k) = t 10 continue call daxpy(n-k,t,a(k+1,k),1,b(k+1),1) 20 continue 30 continue c c now solve u*x = y c do 40 kb = 1, n k = n + 1 - kb b(k) = b(k)/a(k,k) t = -b(k) call daxpy(k-1,t,a(1,k),1,b(1),1) 40 continue go to 100 50 continue c c job = nonzero, solve trans(a) * x = b c first solve trans(u)*y = b c do 60 k = 1, n t = ddot(k-1,a(1,k),1,b(1),1) b(k) = (b(k) - t)/a(k,k) 60 continue c c now solve trans(l)*x = y c if (nm1 .lt. 1) go to 90 do 80 kb = 1, nm1 k = n - kb b(k) = b(k) + ddot(n-k,a(k+1,k),1,b(k+1),1) l = ipvt(k) if (l .eq. k) go to 70 t = b(l) b(l) = b(k) b(k) = t 70 continue 80 continue 90 continue 100 continue return end subroutine dscal(n,da,dx,incx) c c scales a vector by a constant. c uses unrolled loops for increment equal to one. c jack dongarra, linpack, 3/11/78. c real da,dx(1) integer i,incx,m,mp1,n,nincx c if(n.le.0)return if(incx.eq.1)go to 20 c c code for increment not equal to 1 c nincx = n*incx do 10 i = 1,nincx,incx dx(i) = da*dx(i) 10 continue return c c code for increment equal to 1 c c c clean-up loop c 20 m = mod(n,5) if( m .eq. 0 ) go to 40 do 30 i = 1,m dx(i) = da*dx(i) 30 continue if( n .lt. 5 ) return 40 mp1 = m + 1 do 50 i = mp1,n,5 dx(i) = da*dx(i) dx(i + 1) = da*dx(i + 1) dx(i + 2) = da*dx(i + 2) dx(i + 3) = da*dx(i + 3) dx(i + 4) = da*dx(i + 4) 50 continue return end integer function idamax(n,dx,incx) c c finds the index of element having max. absolute value. c jack dongarra, linpack, 3/11/78. c real dx(1),dmax integer i,incx,ix,n c idamax = 0 if( n .lt. 1 ) return idamax = 1 if(n.eq.1)return if(incx.eq.1)go to 20 c c code for increment not equal to 1 c ix = 1 dmax = abs(dx(1)) ix = ix + incx do 10 i = 2,n if(abs(dx(ix)).le.dmax) go to 5 idamax = i dmax = abs(dx(ix)) 5 ix = ix + incx 10 continue return c c code for increment equal to 1 c 20 dmax = abs(dx(1)) do 30 i = 2,n if(abs(dx(i)).le.dmax) go to 30 idamax = i dmax = abs(dx(i)) 30 continue return end subroutine daxpy(n,da,dx,incx,dy,incy) c c constant times a vector plus a vector. c uses unrolled loops for increments equal to one. c jack dongarra, linpack, 3/11/78. c real dx(1),dy(1),da integer i,incx,incy,ixiy,m,mp1,n c if(n.le.0)return if (da .eq. 0.0e0) return if(incx.eq.1.and.incy.eq.1)go to 20 c c code for unequal increments or equal increments c not equal to 1 c ix = 1 iy = 1 if(incx.lt.0)ix = (-n+1)*incx + 1 if(incy.lt.0)iy = (-n+1)*incy + 1 do 10 i = 1,n dy(iy) = dy(iy) + da*dx(ix) ix = ix + incx iy = iy + incy 10 continue return c c code for both increments equal to 1 c c c clean-up loop c 20 m = mod(n,4) if( m .eq. 0 ) go to 40 do 30 i = 1,m dy(i) = dy(i) + da*dx(i) 30 continue if( n .lt. 4 ) return 40 mp1 = m + 1 do 50 i = mp1,n,4 dy(i) = dy(i) + da*dx(i) dy(i + 1) = dy(i + 1) + da*dx(i + 1) dy(i + 2) = dy(i + 2) + da*dx(i + 2) dy(i + 3) = dy(i + 3) + da*dx(i + 3) 50 continue return end real function ddot(n,dx,incx,dy,incy) c c forms the dot product of two vectors. c uses unrolled loops for increments equal to one. c jack dongarra, linpack, 3/11/78. c real dx(1),dy(1),dtemp integer i,incx,incy,ix,iy,m,mp1,n c ddot = 0.0e0 dtemp = 0.0e0 if(n.le.0)return if(incx.eq.1.and.incy.eq.1)go to 20 c c code for unequal increments or equal increments c not equal to 1 c ix = 1 iy = 1 if(incx.lt.0)ix = (-n+1)*incx + 1 if(incy.lt.0)iy = (-n+1)*incy + 1 do 10 i = 1,n dtemp = dtemp + dx(ix)*dy(iy) ix = ix + incx iy = iy + incy 10 continue ddot = dtemp return c c code for both increments equal to 1 c c c clean-up loop c 20 m = mod(n,5) if( m .eq. 0 ) go to 40 do 30 i = 1,m dtemp = dtemp + dx(i)*dy(i) 30 continue if( n .lt. 5 ) go to 60 40 mp1 = m + 1 do 50 i = mp1,n,5 dtemp = dtemp + dx(i)*dy(i) + dx(i + 1)*dy(i + 1) + * dx(i + 2)*dy(i + 2) + dx(i + 3)*dy(i + 3) + dx(i + 4)*dy(i + 4) 50 continue 60 ddot = dtemp return end End of to.harwell.f echo trace.graph 1>&2 cat >trace.graph <<'End of trace.graph' 0 1 2 0 0 2 2 1 1 0 3 2 1 1 0 4 2 1 2 0 5 2 1 2 0 6 2 1 3 0 7 2 1 3 0 8 0 1 4 0 9 0 1 4 0 10 0 1 5 0 11 0 1 5 0 12 0 1 6 0 13 0 1 6 0 14 0 1 7 0 15 0 1 7 1 8 3.54055090E-03 1 10 2.34626600E-04 1 9 3.10370000E-04 2 10 4.32834130E-03 1 14 4.34584010E-03 2 8 8.07075570E-03 1 12 8.08854970E-03 2 9 5.13159280E-03 1 13 5.15316700E-03 2 14 8.43198210E-03 1 11 8.45379410E-03 2 12 1.21833509E-02 1 4 1.22010711E-02 3 4 17 3 4 18 3 4 19 3 4 20 2 13 9.97397160E-03 4 4 17 9.99612800E-03 3 4 21 4 4 20 1.54236137E-02 5 4 17 1.26458104E-02 4 4 21 1.26648180E-02 2 11 1.33046025E-02 4 4 18 1.33257093E-02 5 4 20 1.74398297E-02 1 5 1.74623961E-02 5 4 18 1.45865249E-02 1 6 1.46063689E-02 5 4 21 1.45604284E-02 3 5 22 1 15 1.45889931E-02 3 5 23 3 6 24 3 5 25 3 6 26 3 5 27 3 6 28 3 6 29 4 6 24 2.20108992E-02 4 5 22 1.91266189E-02 2 15 1.94451028E-02 4 5 25 1.94663244E-02 5 6 24 2.42343005E-02 4 6 28 2.42527341E-02 5 5 25 2.09031612E-02 4 6 29 2.09227428E-02 5 5 22 2.13614961E-02 4 6 26 2.13799625E-02 5 6 28 2.56893577E-02 4 4 19 2.57200216E-02 5 6 29 2.20149910E-02 4 5 23 2.20417722E-02 5 4 19 2.75858373E-02 1 4 2.76115525E-02 2 4 2.76715273E-02 4 5 27 2.77059509E-02 5 6 26 2.44126833E-02 1 6 2.44338393E-02 2 6 2.45003741E-02 1 7 2.45231701E-02 3 7 30 5 5 27 2.87799049E-02 5 5 23 2.50720986E-02 1 5 2.50934022E-02 4 7 30 2.89329989E-02 2 5 2.51580018E-02 3 7 31 1 2 2.52440690E-02 3 7 32 3 7 33 4 7 31 2.77039009E-02 5 7 30 3.10498453E-02 4 7 32 3.10678689E-02 4 7 33 3.95636995E-01 5 7 31 2.98406897E-02 5 7 32 3.31829441E-02 5 7 33 3.96668735E-01 1 7 3.00982927E-02 2 7 3.01620969E-02 1 3 3.01957989E-02 3 3 34 3 3 35 4 3 34 3.99449060E-01 3 3 36 4 3 35 3.64427557E-02 3 3 37 3 3 38 3 3 39 3 3 40 3 3 41 3 3 42 3 3 43 5 3 35 3.84525921E-02 4 3 36 3.84716161E-02 5 3 34 4.02293410E-01 4 3 39 4.02311926E-01 5 3 39 4.04720627E-01 5 3 36 4.14463465E-02 4 3 43 4.04745653E-01 4 3 40 4.14644685E-02 4 3 38 4.00524777E-02 5 3 43 4.07099193E-01 5 3 40 4.38554573E-02 4 3 37 4.07130533E-01 4 3 41 4.38808937E-02 5 3 38 4.21705049E-02 4 3 42 4.21923743E-02 5 3 37 4.09137680E-01 5 3 41 4.62933993E-02 5 3 42 4.38431081E-02 1 3 4.40543975E-02 2 3 4.42039081E-02 3 2 44 3 2 45 4 2 44 9.66589739E-01 3 2 46 3 2 47 3 2 48 3 2 49 3 2 50 3 2 51 3 2 52 3 2 53 5 2 44 9.68603479E-01 4 2 48 9.68621158E-01 5 2 48 9.71461786E-01 4 2 52 9.71480154E-01 4 2 45 3.73054510E-01 5 2 52 9.73167205E-01 4 2 49 9.73192609E-01 5 2 45 3.75883133E-01 4 2 53 3.75902867E-01 5 2 49 9.75668911E-01 4 2 46 9.75696053E-01 4 2 47 1.33961694E+00 5 2 53 3.77521510E-01 4 2 50 3.77545011E-01 5 2 47 1.34156441E+00 4 2 51 1.34158499E+00 5 2 46 9.79320305E-01 5 2 50 3.80739288E-01 5 2 51 1.34332874E+00 1 2 3.81025034E-01 2 2 3.81122376E-01 1 1 3.81262784E-01 3 1 54 3 1 55 4 1 54 9.89392562E-01 4 1 55 1.35278242E+00 3 1 56 3 1 57 3 1 58 3 1 59 3 1 60 3 1 61 3 1 62 3 1 63 5 1 55 1.35528158E+00 4 1 59 1.35530328E+00 5 1 54 9.94396054E-01 4 1 56 9.94416521E-01 4 1 57 3.97270521E-01 5 1 56 9.97293213E-01 4 1 60 9.97311695E-01 5 1 57 3.99894226E-01 4 1 61 3.99912462E-01 5 1 60 9.99496913E-01 4 1 58 9.99524412E-01 5 1 61 4.02243362E-01 4 1 62 4.02266240E-01 5 1 58 1.00261175E+00 4 1 63 1.00264229E+00 5 1 62 4.04637106E-01 5 1 63 1.00536234E+00 5 1 59 1.35790442E+00 1 1 5.08357426E-01 2 1 7.37563596E-01 1.48648271E+00 3 5 66 3 5 67 4 5 66 1.66214227E+00 3 5 68 4 5 67 1.63390799E+00 3 5 69 3 5 70 3 5 71 3 5 72 5 5 66 1.73158824E+00 4 5 69 1.73160637E+00 4 5 68 1.55793046E+00 5 5 69 1.80106356E+00 4 5 70 1.80108839E+00 5 5 68 1.62761204E+00 4 5 72 1.62762977E+00 5 5 67 1.70377368E+00 4 5 71 1.70379654E+00 5 5 70 1.87049769E+00 5 5 72 1.69720402E+00 5 5 71 1.77336351E+00 1 5 1.88548776E+00 3 5 73 3 5 74 4 5 73 1.70829946E+00 4 5 74 1.77391699E+00 3 5 75 3 5 76 3 5 77 3 5 78 3 5 79 5 5 73 1.77742797E+00 4 5 76 1.77744738E+00 5 5 74 1.84313131E+00 4 5 78 1.84315176E+00 4 5 77 1.95654494E+00 5 5 76 1.84658490E+00 4 5 75 1.84661834E+00 5 5 78 1.91228669E+00 4 5 79 1.91231127E+00 5 5 77 2.02567919E+00 5 5 79 1.98144059E+00 5 5 75 1.91602612E+00 1 5 1.91606662E+00 2 5 1.91610025E+00 1 6 1.98211726E+00 3 6 80 3 6 81 4 6 80 1.40706601E+00 3 6 82 4 6 81 1.91681647E+00 3 6 83 3 6 84 3 6 85 3 6 86 5 6 81 1.98622726E+00 4 6 84 1.98624560E+00 5 6 80 1.47673709E+00 4 6 83 1.47675499E+00 4 6 82 2.05321993E+00 4 6 85 2.05956687E+00 5 6 83 1.54634061E+00 4 6 86 1.54637358E+00 5 6 84 2.05567383E+00 5 6 85 2.12942438E+00 5 6 86 1.61578905E+00 5 6 82 2.33497610E+00 1 6 2.33499920E+00 3 6 87 3 6 88 4 6 87 2.45675773E+00 3 6 89 3 6 90 3 6 91 3 6 92 3 6 93 4 6 90 2.40626592E+00 5 6 90 2.47542624E+00 4 6 91 2.47544829E+00 5 6 91 2.54473879E+00 4 6 88 2.54476560E+00 5 6 87 2.52620597E+00 4 6 89 2.52622394E+00 5 6 88 2.61390641E+00 4 6 92 2.61393450E+00 5 6 89 2.59536300E+00 4 6 93 2.59538081E+00 5 6 92 2.68306715E+00 5 6 93 2.66478254E+00 1 6 2.40346782E+00 2 6 2.40354884E+00 1 7 2.66504306E+00 3 7 94 4 7 94 2.40386745E+00 3 7 95 4 7 95 1.92008626E+00 3 7 96 3 7 97 3 7 98 3 7 99 3 7 100 4 7 97 2.69661572E+00 5 7 95 1.98953522E+00 4 7 99 1.98955297E+00 4 7 98 2.73627094E+00 5 7 94 2.47355797E+00 4 7 96 2.47357576E+00 5 7 99 2.05915717E+00 4 7 100 2.05918202E+00 5 7 98 2.80600710E+00 5 7 100 2.12860386E+00 5 7 96 2.54306050E+00 5 7 97 2.76653818E+00 1 7 2.92187671E+00 3 7 101 4 7 101 2.22466695E+00 3 7 102 4 7 102 2.63598897E+00 3 7 103 3 7 104 4 7 103 2.76768509E+00 3 7 105 3 7 106 3 7 107 5 7 101 2.29377787E+00 4 7 107 2.29379561E+00 5 7 102 2.70518610E+00 4 7 104 2.70520385E+00 4 7 105 2.99330739E+00 5 7 107 2.36290933E+00 4 7 106 2.36294059E+00 5 7 105 3.06254707E+00 5 7 104 2.77476026E+00 5 7 103 2.83737489E+00 5 7 106 2.43217227E+00 1 7 2.43219624E+00 2 7 2.43222503E+00 1 8 2.43225117E+00 3 8 108 3 8 109 3 8 110 3 8 111 3 8 112 3 8 113 3 8 114 4 8 109 2.77698678E+00 4 8 113 2.83743370E+00 4 8 111 2.50339902E+00 5 8 109 2.84672015E+00 4 8 108 2.84673916E+00 5 8 111 2.57295673E+00 4 8 112 2.57297889E+00 5 8 108 2.91620933E+00 4 8 110 2.91623570E+00 5 8 112 2.64255199E+00 4 8 114 2.64258243E+00 5 8 110 2.98568596E+00 5 8 114 2.71212636E+00 5 8 113 2.90706973E+00 1 8 3.13497063E+00 3 8 115 3 8 116 4 8 115 2.90751642E+00 4 8 116 3.14062811E+00 3 8 117 3 8 118 3 8 119 3 8 120 3 8 121 5 8 116 3.20975437E+00 4 8 117 3.20977248E+00 5 8 115 2.97677038E+00 4 8 118 2.97678910E+00 4 8 120 3.20612495E+00 5 8 117 3.27890335E+00 4 8 121 3.27892140E+00 5 8 118 3.04599564E+00 4 8 119 3.04601774E+00 5 8 120 3.27523734E+00 5 8 121 3.34807389E+00 5 8 119 3.11523569E+00 1 8 3.11531652E+00 2 8 3.11534960E+00 End of trace.graph echo wait.f 1>&2 cat >wait.f <<'End of wait.f' logical function wait(jobtag,ienter) c integer jobtag,ienter c***************************************************************************** c c this routine will allow process jobtag to continue after c spawned processes have all checked in. it should only be called if c processes have been spawned by jobtag through the use of c the subroutine spawn. this routine must be used in conjunction with c subroutine prtspn. the required syntax is c c go to (1000,...,L000,...,N000), ientry(mytag,N) c 1000 continue c . c . c . c do 100 j = 1,nproc c . c . (set parameters to define spawned process) c . c call nxtag(mytag,jobtag) c call spawn(mytag,jobtag,subname,) c 100 continue c label = L c if (wait(jobtag,label)) return c L000 continue c . c . c . c c if this subroutine returns a value of .true. then the calling process c jobtag should issue a return. if a value of .false. is returned then c the calling process jobtag should resume execution at the c statement immediately following the reference to wait (ie. at L000 in c the example above. a return value .true. indicates that some spawned c process has not yet completed and checked in. a return value .false. c indicates all spawned processes have checked in. c c***************************************************************************** parameter (mxprcs = 1000,iprcs = 120,mxces = 4,nslots = 30) parameter (nbuffr = 500) integer parmq,readyq,qlock,hrlock,trlock,phead,intspn,rhead,rtail, & done,qtail,qtlock common /qdata/ parmq(nslots,mxprcs),phead,intspn, & readyq(iprcs,mxces),rhead(mxces),rtail(mxces), & qtail common /qsync/ qlock(mxprcs),hrlock(mxces),trlock(mxces), & done,qtlock c c c check the icango counter to see if all spawned processes (kids) c have checked in. c icango = 1 call lockon(qlock(jobtag)) icango = parmq(2,jobtag) - parmq(4,jobtag) call lockoff(qlock(jobtag)) c if (icango .eq. 0) then c c all kids are done ... dont wait (ie return false) c wait = .false. c c record re_entry label where computation is to c resume after wait is complete c parmq(1,jobtag) = ienter c if (ienter .gt. parmq(5,jobtag)) then write(6,*) '*************SCHED USER ERROR*****************' write(6,*) ' executing SCHEDULE function WAIT ' write(6,*) ' return label larger than the maximum ' write(6,*) ' specified by user in call to ientry ' write(6,*) ' from process jobtag ',jobtag write(6,*) ' ' write(6,*) ' the maximum reentry number is ' write(6,*) ' ', parmq(5,jobtag) write(6,*) ' ' write(6,*) ' EXECUTION TERMINATED BY SCHED ' cgraph call dump(endgrf,igraph) stop endif c c set last re_entry indication (parmq(5,jobtag) = 0) c if this reentry point corresponds to last one c (recorded in parmq(5,jobtag) during call to ientry) c if (ienter .eq. parmq(5,jobtag)) parmq(5,jobtag) = 0 c else c c kids are not done c wait = .true. c c a checkin will be made so set the number of c entries to return label ienter - 1 to get c correct entry point after checkin c parmq(1,jobtag) = ienter - 1 c endif c return c c last card of wait c end End of wait.f .