%! %%BoundingBox: 0.0 0.0 612.0 792.0 %%DocumentFonts: (atend) %%Creator: dongarra@thud with Eroff/eps release 3.0.0A %%CreationDate: Mon Dec 9 13:51:16 1991 %%Pages: (atend) %%EndComments /DocState23724 save def /pid 23724 def 0.2400 dup scale /inch { 300.0000 mul } bind def /pgtop 10.875 inch def /stm usertime def /pgc statusdict begin pagecount end def /ps { print flush } bind def 1.0 setlinewidth /page { showpage restore save home } bind def /home { newpath 0 pgtop moveto } bind def /mf { statusdict /manualfeed true put } bind def /af { statusdict /manualfeed false put } bind def af /xform { transform round exch round exch itransform } bind def 0 0 xform moveto 0 setlinecap /y { neg 0 exch xform rmoveto } bind def /X { currentpoint exch pop xform moveto } bind def /Y { pgtop exch sub currentpoint pop exch xform moveto } bind def /s { show } bind def /S { exch currentpoint exch pop xform moveto show } bind def /l { neg rlineto currentpoint stroke moveto } bind def /bnf { /dy exch def /dx exch def currentpoint /cury exch def /curx exch def newpath curx cury moveto 0 dy rlineto dx 0 rlineto 0 dy neg rlineto closepath stroke curx dx add cury moveto } bind def /b { /dy exch def /dx exch def currentpoint /cury exch def /curx exch def newpath curx cury moveto 0 dy rlineto dx 0 rlineto 0 dy neg rlineto closepath gsave fill grestore gsave 0 setgray stroke grestore newpath curx dx add cury moveto } bind def /B { /dy exch def /dx exch def currentpoint /cury exch def /curx exch def newpath curx cury moveto 0 dy rlineto dx 0 rlineto 0 dy neg rlineto closepath fill 0 curx dx add cury moveto } bind def /polystart { currentpoint newpath moveto } bind def /polynext { rlineto } bind def /polyclose { closepath } bind def /polyfill { gsave closepath setgray fill grestore currentpoint stroke moveto } bind def /polydraw { currentpoint stroke moveto } bind def /c { 2 div /rad exch def currentpoint /y0 exch def /x0 exch def newpath x0 rad add y0 xform rad 0 360 arc closepath stroke x0 rad add rad add y0 xform moveto } bind def /cf { 2 div /rad exch def currentpoint /y0 exch def /x0 exch def newpath x0 rad add y0 xform rad 0 360 arc closepath gsave setgray fill grestore stroke x0 rad add rad add y0 xform moveto } bind def /a { /y2 exch neg def /x2 exch def /y1 exch neg def /x1 exch def x1 y1 xform rmoveto currentpoint currentpoint x2 x2 mul y2 y2 mul add sqrt y1 neg x1 neg atan y2 x2 atan newpath arc stroke moveto x2 y2 rmoveto } bind def /e { currentlinewidth /elw exch def gsave 2 div /yrad exch def 2 div /xrad exch def currentpoint /y0 exch def /x0 exch def x0 xrad add y0 translate xrad yrad scale newpath 0 0 xform 1 0 360 arc closepath elw xrad div setlinewidth stroke grestore elw setlinewidth x0 xrad add xrad add y0 xform moveto } bind def /ef { currentlinewidth /elw exch def gsave 2 div /yrad exch def 2 div /xrad exch def currentpoint /y0 exch def /x0 exch def x0 xrad add y0 translate xrad yrad scale newpath 0 0 xform 1 0 360 arc closepath elw xrad div setlinewidth gsave setgray fill grestore stroke grestore elw setlinewidth x0 xrad add xrad add y0 xform moveto } bind def /spln { rcurveto currentpoint stroke moveto } bind def /ft { /fonttype exch def /xsiz exch def /ysiz exch def /sl exch def fonttype [ xsiz pt 0 sl sin sl cos div ysiz pt mul ysiz pt 0 0 ] makefont setfont } bind def /doImage {{currentfile rasterString readhexstring pop} image} bind def /pt { 4.166667 mul } bind def 0.375 0.000000 add inch 0.25 inch translate /savematrix matrix currentmatrix def /roman-8-dict 20 dict def % Local storage /roman-8-mappings [ 8#260 /Adieresis 8#265 /Aring 8#276 /Aacute 8#300 /Agrave 8#311 /Acircumflex 8#314 /Atilde 8#321 /Ccedilla 8#322 /Edieresis 8#323 /Eacute 8#324 /Egrave 8#325 /Ecircumflex 8#326 /Idieresis 8#327 /Iacute 8#330 /Igrave 8#331 /Icircumflex 8#332 /Ntilde 8#333 /Odieresis 8#334 /Oacute 8#335 /Ograve 8#336 /Ocircumflex 8#337 /Otilde 8#340 /Scaron 8#342 /Udieresis 8#344 /Uacute 8#345 /Ugrave 8#346 /Ydieresis 8#347 /adieresis 8#354 /aring 8#355 /aacute 8#356 /agrave 8#357 /acircumflex 8#360 /atilde 8#362 /ccedilla 8#363 /edieresis 8#364 /eacute 8#366 /egrave 8#367 /ecircumflex 8#374 /idieresis 8#375 /iacute 8#376 /igrave 8#220 /icircumflex 8#221 /ntilde 8#222 /odieresis 8#223 /oacute 8#224 /ograve 8#225 /ocircumflex 8#226 /otilde 8#227 /scaron 8#230 /udieresis 8#231 /uacute 8#232 /ugrave 8#233 /ucircumflex 8#234 /ydieresis 8#235 /Ucircumflex ] def /AddRoman-8 { roman-8-dict begin /roman-8-mappings exch def /newName exch def /oldName exch def /oldDict oldName findfont def /newDict oldDict maxlength dict def oldDict { exch dup /FID ne { dup /Encoding eq { exch dup length array copy newDict 3 1 roll put } { exch newDict 3 1 roll put } ifelse } { pop pop } ifelse } forall newDict /FontName newName put 0 2 roman-8-mappings length 1 sub { dup roman-8-mappings exch get exch 1 add roman-8-mappings exch get newDict /Encoding get 3 1 roll put } for newName newDict definefont pop end } bind def /BracketFontDict 9 dict def /$workingdict 10 dict def BracketFontDict begin /FontType 3 def /FontName (Bracket) cvn def /FontMatrix [ 0.001 0 0 0.001 0 0] def /FontBBox [ -50 -250 1000 1000 ] def /Encoding 256 array def 0 1 255 { Encoding exch /.notdef put } for Encoding dup 65 /Cbv put dup 66 /Clt put dup 67 /Clk put dup 68 /Clb put dup 69 /Crt put dup 70 /Crk put dup 71 /Crb put dup 72 /Clc put dup 73 /Clf put dup 74 /Crc put dup 75 /Crf put dup 76 /Cbr put dup 77 /Crn put dup 78 /Cci put dup 79 /C|| put dup 80 /C^^ put dup 81 /Cr1 put dup 82 /Cr2 put pop /CharProcs 24 dict dup begin /setC { 0 -50 -250 500 1000 setcachedevice} bind def /C.bv {220 -250 moveto 0 1000 rlineto 60 0 rlineto 0 -1000 rlineto fill } bind def /C.cbar { 750 moveto 180 0 rlineto 0 -60 rlineto -180 0 rlineto fill } bind def /C.fbar { -250 moveto 180 0 rlineto 0 60 rlineto -180 0 rlineto fill } bind def /C.brk.end { 1 setlinewidth moveto rlineto rcurveto reversepath 60 0 rlineto rlineto rcurveto fill } bind def /C.setl {dup dtransform exch round exch idtransform pop setlinewidth } bind def /Cbv { 208 setC C.bv } bind def /Clt { 208 setC 0 150 50 210 140 250 0 730 0 150 50 250 200 250 0 750 220 -250 C.brk.end } bind def /Clk { 208 setC 1 setlinewidth 220 -250 moveto 0 400 rlineto 0 50 -50 100 -100 100 rcurveto 50 0 100 50 100 100 rcurveto 0 400 rlineto 60 0 rlineto 0 -400 rlineto 0 -50 -50 -100 -100 -100 rcurveto 50 0 100 -50 100 -100 rcurveto 0 -400 rlineto closepath fill } bind def /Clb { 208 setC 0 -150 50 -210 140 -250 0 -730 0 -150 50 -250 200 -250 0 -750 220 750 C.brk.end } bind def /Crt { 208 setC 0 150 -50 250 -200 250 0 750 0 150 -50 210 -140 250 0 730 220 -250 C.brk.end } bind def /Crk { 208 setC 1 setlinewidth 220 -250 moveto 0 400 rlineto 0 50 50 100 100 100 rcurveto -50 0 -100 50 -100 100 rcurveto 0 400 rlineto 60 0 rlineto 0 -400 rlineto 0 -50 50 -100 100 -100 rcurveto -50 0 -100 -50 -100 -100 rcurveto 0 -400 rlineto fill } bind def /Crb { 208 setC 0 -150 -50 -250 -200 -250 0 -750 0 -150 -50 -210 -140 -250 0 -730 220 750 C.brk.end } bind def /Clc { 208 setC C.bv 280 C.cbar } bind def /Clf { 208 setC C.bv 280 C.fbar } bind def /Crc { 208 setC C.bv 40 C.cbar } bind def /Crf { 208 setC C.bv 40 C.fbar } bind def /Cbr { 0 0 -50 -250 0 1000 setcachedevice 40 C.setl 0 -250 moveto 0 1000 rlineto stroke } bind def /Crn { 208 setC 40 C.setl 0 770 moveto 500 0 rlineto stroke } bind def /Cci { 937 0 -50 -250 937 1000 setcachedevice 40 C.setl 500 250 400 0 360 arc stroke } bind def /C|| { 70 0 -50 -250 70 1000 setcachedevice } bind def /C^^ { 33 0 -50 -250 33 1000 setcachedevice } bind def /Cr1 { 333 0 -50 -250 333 1000 setcachedevice 40 C.setl 0 setlinejoin 700 180 moveto -650 currentlinewidth add 0 rlineto 200 -200 rlineto 50 360 moveto 650 currentlinewidth sub 0 rlineto -200 200 rlineto stroke } bind def /Cr2 { 333 0 -50 -250 333 1000 setcachedevice 40 C.setl 2 setlinejoin 217 18 moveto -150 150 rlineto 150 150 rlineto -150 -150 rlineto 633 0 rlineto 50 360 moveto 633 0 rlineto -150 150 rlineto 150 -150 rlineto -150 -150 rlineto stroke } bind def end def /BuildChar { $workingdict begin /charcode exch def /fontdict exch def fontdict /CharProcs get begin fontdict /Encoding get charcode get load gsave 0 setlinecap 0 setgray newpath exec grestore end end } bind def end /BracketFont BracketFontDict definefont pop %%EndProlog %%Page: label 1 %%PageFonts: (atend) /PageState23724 save def home 0 Y 300 X 375 Y 550 X %%IncludeFont: Times-Bold /Times-Bold /Times-Bold-8 roman-8-mappings AddRoman-8 /f.B /Times-Bold-8 findfont def 0.0 14 14 f.B ft(Evaluating)s 837(Block)S 995(Algorithm)S 1271(V)S 1308(a)S 1337(r)S 1364(i)S 1381(a)S 1410(n)S 1442(t)S 1462(s)S 1498(in)S 1561(L)S 1600(A)S 1642(P)S 1675(A)S 1717(C)S 1759(K)S 1818(*)S 300 X 425 Y 614 X %%IncludeFont: Times-Roman /Times-Roman /Times-Roman-8 roman-8-mappings AddRoman-8 /f.R /Times-Roman-8 findfont def 0.0 11 11 f.R ft(*)s 648(This)S 741(work)S 846(was)S 929(supported)S 1122(by)S 1179(the)S 1247(National)S 1418(Science)S 1574(F)S 1598(o)S 1621(u)S 1644(n)S 1667(d)S 1690(a)S 1711(t)S 1724(i)S 1737(o)S 1760(n)S 300 X 475 Y 905 X(under)s 1021(Grant)S 1137(No.)S 1215(A)S 1248(S)S 1273(C)S 1304(-)S 1319(8)S 1342(7)S 1365(1)S 1388(5)S 1411(7)S 1434(2)S 1457(8)S 1480(.)S 300 X 525 Y 763 X(This)s 856(paper)S 970(was)S 1053(submitted)S 1246(to)S 1293(the)S 1361(proceedings)S 1596(of)S 300 X 575 Y 589 X(the)s 657(F)S 681(o)S 704(u)S 727(r)S 742(t)S 755(h)S 789(SIAM)S 914(Conference)S 1139(on)S 1196(P)S 1220(a)S 1241(r)S 1256(a)S 1277(l)S 1290(l)S 1303(e)S 1324(l)S 1348(P)S 1372(r)S 1387(o)S 1410(c)S 1431(e)S 1452(s)S 1470(s)S 1488(i)S 1501(n)S 1524(g)S 1558(for)S 1622(Scienti)S 1751 X(\256)s 1776(c)S 300 X 625 Y 698 X(Computing,)s 927(held)S 1018(in)S 1065(Chicago,)S 1242(Illinois,)S 1395(December)S 1596(1989.)S 300 X 733 Y 822 X 0.0 12 12 f.R ft(Edward)s 990(Anderson)S 1197(and)S 1282(Jack)S 1384(D)S 1420(o)S 1445(n)S 1470(g)S 1495(a)S 1518(r)S 1535(r)S 1552(a)S 300 X 791 Y 860 X(Department)s 1110(of)S 1164(Computer)S 1377(S)S 1405(c)S 1428(i)S 1442(e)S 1465(n)S 1490(c)S 1513(e)S 300 X 849 Y 956 X(University)s 1180(of)S 1234(T)S 1261(e)S 1284(n)S 1309(n)S 1334(e)S 1357(s)S 1376(s)S 1395(e)S 1418(e)S 300 X 907 Y 1045 X(107)s 1132(Ayres)S 1264(Hall)S 300 X 965 Y 978 X(Knoxville,)s 1203(TN)S 1293(37996)S 300 X 1081 Y 1052 X(April)s 1170(23,)S 1244(1990)S 300 X 1381 Y 0.0 11 11 f.B ft(Abstract.)s 0.0 11 11 f.R ft 506(The)S 591(L)S 619(A)S 652(P)S 676(A)S 709(C)S 740(K)S 786(software)S 958(project)S 1100(currently)S 1280(under)S 1399(development)S 1652(is)S 1697(intended)S 1871(to)S 1921(provide)S 2076(a)S 300 X 1431 Y(portable)s 467(linear)S 588(algebra)S 740(library)S 878(for)S 945(high)S 1041(performance)S 1288(computers.)S 1516(L)S 1544(A)S 1577(P)S 1601(A)S 1634(C)S 1665(K)S 1712(will)S 1798(make)S 1912(use)S 1988(of)S 2040(the)S 300 X 1481 Y(Level)s 419(1,)S 466(2,)S 513(and)S 593(3)S 629(BLAS)S 759(to)S 808(carry)S 916(out)S 988(basic)S 1097(operations.)S 1325(A)S 1371(principal)S 1549(focus)S 1663(of)S 1715(this)S 1796(project)S 1939(is)S 1984(to)S 2034(im)S 2082 X(-)s 300 X 1531 Y(plement)s 462(blocked)S 622(versions)S 789(of)S 840(a)S 874(number)S 1027(of)S 1078(algorithms)S 1288(to)S 1337(take)S 1428(advantage)S 1630(of)S 1681(the)S 1750(greater)S 1891(p)S 1914(a)S 1935(r)S 1950(a)S 1971(l)S 1984(l)S 1997(e)S 2018(l)S 2031(i)S 2044(s)S 2062(m)S 300 X 1581 Y(and)s 380(improved)S 569(data)S 660(locality)S 813(of)S 864(the)S 934(Level)S 1054(3)S 1091(BLAS.)S 1233(In)S 1285(this)S 1366(paper,)S 1494(we)S 1562(describe)S 1731(our)S 1806(work)S 1914(with)S 2010(vari)S 2082 X(-)s 300 X 1631 Y(ants)s 386(of)S 435(some)S 543(of)S 592(these)S 699(algorithms)S 907(and)S 985(the)S 1053(performance)S 1297(data)S 1386(we)S 1451(have)S 1550(c)S 1571(o)S 1594(l)S 1607(l)S 1620(e)S 1641(c)S 1662(t)S 1675(e)S 1696(d)S 1719(.)S 300 X 1781 Y(L)s 328(A)S 361(P)S 385(A)S 418(C)S 449(K)S 494(is)S 538(planned)S 698(to)S 747(be)S 804(a)S 838(collection)S 1035(of)S 1086(F)S 1110(o)S 1133(r)S 1148(t)S 1161(r)S 1176(a)S 1197(n)S 1233(77)S 1292(subroutines)S 1518(for)S 1584(the)S 1654(analysis)S 1817(and)S 1897(solution)S 2059(of)S 300 X 1831 Y(systems)s 465(of)S 522(simultaneous)S 785(linear)S 910(algebraic)S 1100(equations,)S 1308(linear)S 1433(l)S 1446(e)S 1467(a)S 1488(s)S 1506(t)S 1519(-)S 1534(s)S 1552(q)S 1575(u)S 1598(a)S 1619(r)S 1634(e)S 1655(s)S 1692(problems,)S 1892(and)S 1977(m)S 2012(a)S 2033(t)S 2046(r)S 2061(i)S 2074(x)S 300 X 1881 Y(eigenvalue)s 530(problems)S 729([1].)S 832(This)S 942(project)S 1099(will)S 1199(combine)S 1386(the)S 1471(f)S 1486(u)S 1509(n)S 1532(c)S 1553(t)S 1566(i)S 1579(o)S 1602(n)S 1625(a)S 1646(l)S 1659(i)S 1672(t)S 1685(y)S 1737(of)S 1804(L)S 1832(I)S 1847(N)S 1880(P)S 1904(A)S 1937(C)S 1968(K)S 2030(and)S 300 X 1931 Y(E)s 328(I)S 343(S)S 368(P)S 392(A)S 425(C)S 456(K)S 502(in)S 551(a)S 585(single)S 709(package,)S 886(incorporate)S 1110(recent)S 1237(algorithmic)S 1463(i)S 1476(m)S 1511(p)S 1534(r)S 1549(o)S 1572(v)S 1595(e)S 1616(m)S 1651(e)S 1672(n)S 1695(t)S 1708(s)S 1726(,)S 1750(and)S 1830(restructure)S 2040(the)S 300 X 1981 Y(algorithms)s 509(to)S 557(use)S 631(the)S 700(Level)S 818(2)S 853(and)S 932(3)S 967(BLAS)S 1096(\(Basic)S 1227(Linear)S 1360(Algebra)S 1521(Subprograms\))S 1792(for)S 1857(e)S 1878 X /CFi{ (f) show xsiz pt 20 div neg 0 rmoveto (\256) s }bind def CFi 1917(ciency)S 2051(on)S 300 X 2031 Y(t)s 313(o)S 336(d)S 359(a)S 380(y)S 401(')S 415(s)S 447(h)S 470(i)S 483(g)S 506(h)S 529(-)S 544(p)S 567(e)S 588(r)S 603(f)S 618(o)S 641(r)S 656(m)S 691(a)S 712(n)S 735(c)S 756(e)S 791(computers.)S 1019(W)S 1060(e)S 1095(are)S 1166(i)S 1179(n)S 1202(v)S 1225(e)S 1246(s)S 1264(t)S 1277(i)S 1290(g)S 1313(a)S 1334(t)S 1347(i)S 1360(n)S 1383(g)S 1420(variant)S 1563(versions)S 1731(of)S 1783(many)S 1899(of)S 1951(the)S 2021(rou)S 2082 X(-)s 300 X 2081 Y(tines)s 401(in)S 451(L)S 479(A)S 512(P)S 536(A)S 569(C)S 600(K)S 633(.)S 658(The)S 744(building)S 912(blocks)S 1047(of)S 1099(the)S 1170(L)S 1198(A)S 1231(P)S 1255(A)S 1288(C)S 1319(K)S 1366(library)S 1503(are)S 1574(the)S 1645(BLAS,)S 1787(a)S 1822(set)S 1888(of)S 1940(s)S 1958(t)S 1971(a)S 1992(n)S 2015(d)S 2038(a)S 2059(r)S 2074(d)S 300 X 2131 Y(subroutines)s 526(for)S 592(the)S 662(most)S 764(common)S 937(operations)S 1143(in)S 1192(linear)S 1311(algebra)S 1460([2,3,4].)S 1615(The)S 1699(original)S 1855(set)S 1919(of)S 1969(BLAS,)S 300 X 2181 Y(consisting)s 507(of)S 564(v)S 587(e)S 608(c)S 629(t)S 642(o)S 665(r)S 680(-)S 695(v)S 718(e)S 739(c)S 760(t)S 773(o)S 796(r)S 830(operations,)S 1053(was)S 1144(used)S 1248(in)S 1303(L)S 1331(I)S 1346(N)S 1379(P)S 1403(A)S 1436(C)S 1467(K)S 1500(.)S 1541(Recently,)S 1737(speci)S 1833 X(\256)s 1858(cations)S 2009(have)S 300 X 2231 Y(been)s 400(drawn)S 527(up)S 585(for)S 650(m)S 685(a)S 706(t)S 719(r)S 734(i)S 747(x)S 770(-)S 785(v)S 808(e)S 829(c)S 850(t)S 863(o)S 886(r)S 913(operations)S 1118(\(Level)S 1251(2)S 1285(BLAS\))S 1428(and)S 1506(m)S 1541(a)S 1562(t)S 1575(r)S 1590(i)S 1603(x)S 1626(-)S 1641(m)S 1676(a)S 1697(t)S 1710(r)S 1725(i)S 1738(x)S 1772(operations)S 1976(\()S 1991(L)S 2019(e)S 2040(v)S 2063(e)S 2084(l)S 300 X 2281 Y(3)s 341(BLAS\))S 491(to)S 545(meet)S 653(the)S 728(demands)S 910(of)S 967(m)S 1002(u)S 1025(l)S 1038(t)S 1051(i)S 1064(p)S 1087(r)S 1102(o)S 1125(c)S 1146(e)S 1167(s)S 1185(s)S 1203(i)S 1216(n)S 1239(g)S 1262(,)S 1292(v)S 1315(e)S 1336(c)S 1357(t)S 1370(o)S 1393(r)S 1408(i)S 1421(z)S 1442(a)S 1463(t)S 1476(i)S 1489(o)S 1512(n)S 1535(,)S 1565(and)S 1651(hierarchical)S 1890(memory)S 2061(in)S 300 X 2331 Y(t)s 313(o)S 336(d)S 359(a)S 380(y)S 401(')S 415(s)S 447(h)S 470(i)S 483(g)S 506(h)S 529(-)S 544(p)S 567(e)S 588(r)S 603(f)S 618(o)S 641(r)S 656(m)S 691(a)S 712(n)S 735(c)S 756(e)S 791(computers.)S 1008(In)S 1059(particular,)S 1261(the)S 1331(Level)S 1450(3)S 1486(BLAS)S 1616(perform)S %%IncludeFont: Times-Italic /Times-Italic /Times-Italic-8 roman-8-mappings AddRoman-8 /f.I /Times-Italic-8 findfont def 0.0 11 11 f.I ft 1776(O)S 0.0 11 11 f.R ft 1816(\()S 0.0 11 11 f.I ft 1831(n)S 2304 Y 0.0 8 8 f.R ft 1859(3)S 2331 Y 0.0 11 11 f.R ft 1876(\))S 1904(o)S 1927(p)S 1950(e)S 1971(r)S 1986(a)S 2007(t)S 2020(i)S 2033(o)S 2056(n)S 2079(s)S 300 X 2381 Y(on)s 0.0 11 11 f.I ft 359(O)S 0.0 11 11 f.R ft 399(\()S 0.0 11 11 f.I ft 414(n)S 2354 Y 0.0 8 8 f.R ft 442(2)S 2381 Y 0.0 11 11 f.R ft 459(\))S 487(data)S 578(elements,)S 767(which)S 894(helps)S 1006(to)S 1056(improve)S 1223(the)S 1294(ratio)S 1393(of)S 1445(computation)S 1690(to)S 1740(memory)S 1906(r)S 1921(e)S 1942(f)S 1957(e)S 1978(r)S 1993(e)S 2014(n)S 2037(c)S 2058(e)S 2079(s)S 300 X 2431 Y(on)s 358(machines)S 545(that)S 627(have)S 727(a)S 760(memory)S 924(hierarchy.)S 1132(This)S 1225(paper)S 1339(describes)S 1523(some)S 1631(of)S 1680(the)S 1748(block)S 1862(f)S 1877(a)S 1898(c)S 1919(t)S 1932(o)S 1955(r)S 1970(i)S 1983(z)S 2004(a)S 2025(t)S 2038(i)S 2051(o)S 2074(n)S 300 X 2481 Y(routines)s 466(in)S 519(L)S 547(A)S 580(P)S 604(A)S 637(C)S 668(K)S 701(.)S 740(The)S 829(blocked)S 993(version)S 1146(calls)S 1249(the)S 1323(Level)S 1446(3)S 1487(BLAS)S 1622(and,)S 1718(if)S 1764(necessary,)S 1974(an)S 2036(un)S 2082 X(-)s 300 X 2531 Y(blocked)s 464(version)S 617(of)S 672(the)S 746(algorithm)S 942(to)S 995(do)S 1058(the)S 1132(processing)S 1347(within)S 1482(a)S 1520(block.)S 1662(The)S 1751(unblocked)S 1961(v)S 1984(e)S 2005(r)S 2020(s)S 2038(i)S 2051(o)S 2074(n)S 300 X 2581 Y(calls)s 397(only)S 490(Level)S 607(1)S 641(and)S 719(2)S 753(BLAS)S 881(routines)S 1041(and)S 1119(is)S 1162(called)S 1286(directly)S 1440(from)S 1540(the)S 1609(blocked)S 1768(routine)S 1911(if)S 1951(the)S 2020(user)S 300 X 2631 Y(has)s 377(set)S 444(the)S 516(blocksize)S 707(to)S 758(1.)S 818(The)S 0.0 11 11 f.I ft 905(LU)S 0.0 11 11 f.R ft 985(d)S 1008(e)S 1029(c)S 1050(o)S 1073(m)S 1108(p)S 1131(o)S 1154(s)S 1172(i)S 1185(t)S 1198(i)S 1211(o)S 1234(n)S 1272(is)S 1318(derived)S 1472(by)S 1533(equating)S 1708(the)S 1780(product)S 1936(of)S 1989(a)S 2025(unit)S 300 X 2681 Y(lower)s 416(triangular)S 607(matrix)S 0.0 11 11 f.I ft 738(L)S 0.0 11 11 f.R ft 781(and)S 860(an)S 916(upper)S 1033(triangular)S 1225(matrix)S 0.0 11 11 f.I ft 1357(U)S 0.0 11 11 f.R ft 1409(to)S 1457(the)S 1526(original)S 1682(matrix)S 0.0 11 11 f.I ft 1814(A)S 0.0 11 11 f.R ft 1849(.)S 1883(As)S 1946(an)S 2002(illus)S 2082 X(-)s 300 X 2794 Y 696 X %%IncludeFont: BracketFont /f.S2 /BracketFont findfont def 0.0 11 11 f.S2 ft(H)s 2840 Y 696 X(A)s 2886 Y 696 X(A)s 2932 Y 696 X(A)s 2978 Y 696 X(I)s 2965 Y 0.0 11 11 f.I ft 731(A)S 2980 Y 0.0 8 8 f.R ft 764(31)S 2884 Y 731 X 0.0 11 11 f.I ft(A)s 2899 Y 0.0 8 8 f.R ft 764(21)S 2803 Y 731 X 0.0 11 11 f.I ft(A)s 2818 Y 0.0 8 8 f.R ft 764(11)S 2965 Y 0.0 11 11 f.I ft 819(A)S 2980 Y 0.0 8 8 f.R ft 852(32)S 2884 Y 819 X 0.0 11 11 f.I ft(A)s 2899 Y 0.0 8 8 f.R ft 852(22)S 2803 Y 819 X 0.0 11 11 f.I ft(A)s 2818 Y 0.0 8 8 f.R ft 852(12)S 2884 Y 0.0 11 11 f.I ft 886( )S 2965 Y 904(A)S 2980 Y 0.0 8 8 f.R ft 937(33)S 2884 Y 904 X 0.0 11 11 f.I ft(A)s 2899 Y 0.0 8 8 f.R ft 937(23)S 2803 Y 904 X 0.0 11 11 f.I ft(A)s 2818 Y 0.0 8 8 f.R ft 937(13)S 2794 Y 985 X 0.0 11 11 f.S2 ft(J)s 2840 Y 985 X(A)s 2886 Y 985 X(A)s 2932 Y 985 X(A)s 2978 Y 985 X(K)s 2884 Y 0.0 11 11 f.I ft 1008( )S 1019 X(=)s 1050( )S 2794 Y 1068 X 0.0 11 11 f.S2 ft(H)s 2840 Y 1068 X(A)s 2886 Y 1068 X(A)s 2932 Y 1068 X(A)s 2978 Y 1068 X(I)s 2965 Y 0.0 11 11 f.I ft 1103(L)S 2980 Y 0.0 8 8 f.R ft 1133(31)S 2884 Y 1103 X 0.0 11 11 f.I ft(L)s 2899 Y 0.0 8 8 f.R ft 1133(21)S 2803 Y 1103 X 0.0 11 11 f.I ft(L)s 2818 Y 0.0 8 8 f.R ft 1133(11)S 2965 Y 0.0 11 11 f.I ft 1188(L)S 2980 Y 0.0 8 8 f.R ft 1218(32)S 2884 Y 1188 X 0.0 11 11 f.I ft(L)s 2899 Y 0.0 8 8 f.R ft 1218(22)S 2965 Y 0.0 11 11 f.I ft 1273(L)S 2980 Y 0.0 8 8 f.R ft 1303(33)S 2794 Y 1351 X 0.0 11 11 f.S2 ft(J)s 2840 Y 1351 X(A)s 2886 Y 1351 X(A)s 2932 Y 1351 X(A)s 2978 Y 1351 X(K)s 2794 Y 1381 X(H)s 2840 Y 1381 X(A)s 2886 Y 1381 X(A)s 2932 Y 1381 X(A)s 2978 Y 1381 X(I)s 2950 Y 0.0 11 11 f.I ft 1446( )S 2884 Y 1446( )S 2803 Y 1416 X(U)s 2818 Y 0.0 8 8 f.R ft 1454(11)S 2965 Y 0.0 11 11 f.I ft 1539( )S 2884 Y 1509 X(U)s 2899 Y 0.0 8 8 f.R ft 1547(22)S 2803 Y 1509 X 0.0 11 11 f.I ft(U)s 2818 Y 0.0 8 8 f.R ft 1547(12)S 2965 Y 0.0 11 11 f.I ft 1602(U)S 2980 Y 0.0 8 8 f.R ft 1640(33)S 2884 Y 1602 X 0.0 11 11 f.I ft(U)s 2899 Y 0.0 8 8 f.R ft 1640(23)S 2803 Y 1602 X 0.0 11 11 f.I ft(U)s 2818 Y 0.0 8 8 f.R ft 1640(13)S 2794 Y 1688 X 0.0 11 11 f.S2 ft(J)s 2840 Y 1688 X(A)s 2886 Y 1688 X(A)s 2932 Y 1688 X(A)s 2978 Y 1688 X(K)s 3300 Y showpage PageState23724 restore %%PageFonts: Times-Bold Times-Roman Times-Italic BracketFont %%Page: label 2 %%PageFonts: (atend) /PageState23724 save def home %%IncludeFont: Times-Italic /Times-Italic /Times-Italic-8 roman-8-mappings AddRoman-8 /f.I /Times-Italic-8 findfont def 0.0 11 11 f.I ft 1.0000 setlinewidth 300 X 200 Y 1165 X %%IncludeFont: Times-Roman /Times-Roman /Times-Roman-8 roman-8-mappings AddRoman-8 /f.R /Times-Roman-8 findfont def 0.0 10 10 f.R ft(- 2 -)s 300 X 350 Y 0.0 11 11 f.R ft(In)s 352(the)S 423(left-looking)S 655(algorithm,)S 0.0 11 11 f.I ft 859(L)S 365 Y 0.0 8 8 f.R ft 889(11)S 350 Y 0.0 11 11 f.R ft 923(,)S 0.0 11 11 f.I ft 948(L)S 365 Y 0.0 8 8 f.R ft 978(21)S 350 Y 0.0 11 11 f.R ft 1012(,)S 1037(and)S 0.0 11 11 f.I ft 1118(L)S 365 Y 0.0 8 8 f.R ft 1148(31)S 350 Y 0.0 11 11 f.R ft 1196(are)S 1267(already)S 1418(known)S 1557(and)S 1638(we)S 1706(want)S 1811(to)S 1862(solve)S 1975(for)S 2043(the)S 300 X 403 Y(next)s 392(block)S 507(column)S 657(of)S 707(width)S 0.0 11 11 f.I ft 824(NB)S 0.0 11 11 f.R ft 902(in)S 0.0 11 11 f.I ft 950(L)S 0.0 11 11 f.R ft 994(and)S 0.0 11 11 f.I ft 1073(U)S 0.0 11 11 f.R ft 1113(.)S 1147(If)S 1189(we)S 1255(equate)S 1389(the)S 1458(second)S 1599(column)S 1749(of)S 1798(the)S 1866(product)S 2018(with)S 300 X 453 Y(the)s 368(second)S 508(column)S 657(of)S 0.0 11 11 f.I ft 706(A)S 0.0 11 11 f.R ft 741(,)S 763(we)S 828(obtain)S 955(the)S 1023(two)S 1103(matrix)S 1234(e)S 1255(q)S 1278(u)S 1301(a)S 1322(t)S 1335(i)S 1348(o)S 1371(n)S 1394(s)S 300 X 582 Y 728 X 0.0 11 11 f.I ft(A)s 597 Y 0.0 8 8 f.R ft 761(12)S 582 Y 802 X 0.0 11 11 f.I ft(=)s 840(L)S 597 Y 0.0 8 8 f.R ft 870(11)S 582 Y 0.0 11 11 f.I ft 904(U)S 597 Y 0.0 8 8 f.R ft 942(12)S 582 Y 0.0 11 11 f.I ft 976( )S 0.0 11 11 f.R ft 987(;)S 0.0 11 11 f.I ft 1000( )S 530 Y 1040 X %%IncludeFont: BracketFont /f.S2 /BracketFont findfont def 0.0 11 11 f.S2 ft(H)s 576 Y 1040 X(A)s 622 Y 1040 X(I)s 615 Y 0.0 11 11 f.I ft 1063(A)S 630 Y 0.0 8 8 f.R ft 1096(32)S 534 Y 1063 X 0.0 11 11 f.I ft(A)s 549 Y 0.0 8 8 f.R ft 1096(22)S 530 Y 1137 X 0.0 11 11 f.S2 ft(J)s 576 Y 1137 X(A)s 622 Y 1137 X(K)s 582 Y 0.0 11 11 f.I ft 1160( )S 1171 X(=)s 1202( )S 530 Y 1220 X 0.0 11 11 f.S2 ft(H)s 576 Y 1220 X(A)s 622 Y 1220 X(I)s 615 Y 0.0 11 11 f.I ft 1243(L)S 630 Y 0.0 8 8 f.R ft 1273(31)S 534 Y 1243 X 0.0 11 11 f.I ft(L)s 549 Y 0.0 8 8 f.R ft 1273(21)S 530 Y 1314 X 0.0 11 11 f.S2 ft(J)s 576 Y 1314 X(A)s 622 Y 1314 X(K)s 582 Y 0.0 11 11 f.I ft 1337( U)S 597 Y 0.0 8 8 f.R ft 1386(12)S 582 Y 1427 X 0.0 11 11 f.I ft(+)s 530 Y 1472 X 0.0 11 11 f.S2 ft(H)s 576 Y 1472 X(A)s 622 Y 1472 X(I)s 615 Y 0.0 11 11 f.I ft 1495(L)S 630 Y 0.0 8 8 f.R ft 1525(32)S 534 Y 1495 X 0.0 11 11 f.I ft(L)s 549 Y 0.0 8 8 f.R ft 1525(22)S 530 Y 1566 X 0.0 11 11 f.S2 ft(J)s 576 Y 1566 X(A)s 622 Y 1566 X(K)s 582 Y 0.0 11 11 f.I ft 1589( U)S 597 Y 0.0 8 8 f.R ft 1638(22)S 300 X 711 Y 0.0 11 11 f.R ft(Solving)s 458(for)S 0.0 11 11 f.I ft 526(U)S 726 Y 0.0 8 8 f.R ft 564(12)S 711 Y 0.0 11 11 f.R ft 613(in)S 664(the)S 736 X(\256)s 761(rst)S 822(equation)S 997(requires)S 1161(a)S 1197(solve)S 1311(\(with)S 1424(multiple)S 1594(right)S 1697(hand)S 1803(sides\))S 1927(using)S 2043(the)S 300 X 764 Y(lower)s 417(triangular)S 609(matrix)S 0.0 11 11 f.I ft 740(L)S 779 Y 0.0 8 8 f.R ft 770(11)S 764 Y 0.0 11 11 f.R ft 804(.)S 837(A)S 881(m)S 916(a)S 937(t)S 950(r)S 965(i)S 978(x)S 1001(-)S 1016(m)S 1051(a)S 1072(t)S 1085(r)S 1100(i)S 1113(x)S 1147(multiply)S 1314(is)S 1356(then)S 1447(used)S 1543(to)S 1590(compute)S 1760(the)S 1828(term)S 1923(i)S 1936(n)S 1959(v)S 1982(o)S 2005(l)S 2018(v)S 2041(i)S 2054(n)S 2077(g)S 300 X 817 Y 0.0 11 11 f.I ft(U)s 832 Y 0.0 8 8 f.R ft 338(12)S 817 Y 0.0 11 11 f.R ft 385(in)S 434(the)S 504(second)S 646(equation)S 820(and)S 901(subtract)S 1062(it)S 1102(from)S 1204(the)S 1275(left)S 1351(hand)S 1455(side.)S 1566(An)S 1636(unblocked)S 0.0 11 11 f.I ft 1843(LU)S 0.0 11 11 f.R ft 1922(factoriza)S 2085 X(-)s 300 X 870 Y(tion)s 386(is)S 431(then)S 525(applied)S 676(to)S 726(the)S 797(rectangular)S 1020(column)S 1172(of)S 1224(width)S 0.0 11 11 f.I ft 1343(NB)S 0.0 11 11 f.R ft 1423(to)S 1473(compute)S 0.0 11 11 f.I ft 1645(L)S 885 Y 0.0 8 8 f.R ft 1675(22)S 870 Y 0.0 11 11 f.R ft 1709(,)S 0.0 11 11 f.I ft 1733(L)S 885 Y 0.0 8 8 f.R ft 1763(32)S 870 Y 0.0 11 11 f.R ft 1797(,)S 1821(and)S 0.0 11 11 f.I ft 1901(U)S 885 Y 0.0 8 8 f.R ft 1939(22)S 870 Y 0.0 11 11 f.R ft 1973(,)S 1997(along)S 300 X 923 Y(with)s 394(the)S 463(pivot)S 570(indices.)S 736(Block)S 859(routines)S 1020(have)S 1120(been)S 1221(written)S 1365(for)S 1431(the)S 1501(dense)S 1620(and)S 1700(banded)S 1847(f)S 1862(a)S 1883(c)S 1904(t)S 1917(o)S 1940(r)S 1955(i)S 1968(z)S 1989(a)S 2010(t)S 2023(i)S 2036(o)S 2059(n)S 2082(s)S 300 X 973 Y(for)s 368(solving)S 519(linear)S 640(systems,)S 812(the)S 884(reductions)S 1092(using)S 1206(orthogonal)S 1420(t)S 1433(r)S 1448(a)S 1469(n)S 1492(s)S 1510(f)S 1525(o)S 1548(r)S 1563(m)S 1598(a)S 1619(t)S 1632(i)S 1645(o)S 1668(n)S 1691(s)S 1723(for)S 1790(eigenvalue)S 2006(com)S 2085 X(-)s 300 X 1023 Y(putations,)s 497(and)S 580(selected)S 747(other)S 858(operations.)S 1078(In)S 1133(this)S 1217(section,)S 1377(we)S 1448(provide)S 1606(details)S 1745(on)S 1808(the)S 1882(variants)S 2046(we)S 300 X 1073 Y(have)s 399(implemented)S 651(for)S 715(the)S 783(f)S 798(a)S 819(c)S 840(t)S 853(o)S 876(r)S 891(i)S 904(z)S 925(a)S 946(t)S 959(i)S 972(o)S 995(n)S 1029(of)S 1078(dense)S 1195(m)S 1230(a)S 1251(t)S 1264(r)S 1279(i)S 1292(c)S 1313(e)S 1334(s)S 1352(.)S 300 X 1753 Y 569 X(Figure)s 700(1:)S 758(Memory)S 927(access)S 1058(patterns)S 1216(for)S 1280(variants)S 1438(of)S 1487(LU)S 1559(d)S 1582(e)S 1603(c)S 1624(o)S 1647(m)S 1682(p)S 1705(o)S 1728(s)S 1746(i)S 1759(t)S 1772(i)S 1785(o)S 1808(n)S 300 X 1803 Y(The)s 386(three)S 493(block)S 610(variants)S 771(we)S 839(have)S 941(implemented)S 1197(for)S 1265(the)S 0.0 11 11 f.I ft 1337(LU)S 0.0 11 11 f.R ft 1417(f)S 1432(a)S 1453(c)S 1474(t)S 1487(o)S 1510(r)S 1525(i)S 1538(z)S 1559(a)S 1580(t)S 1593(i)S 1606(o)S 1629(n)S 1667(of)S 1720(a)S 1756(general)S 1908(matrix)S 2043(are)S 300 X 1853 Y(shown)s 436(in)S 488(Figure)S 623(1.)S 683(The)S 770(shaded)S 914(parts)S 1019(indicate)S 1182(the)S 1254(matrix)S 1389(elements)S 1569(accessed)S 1748(in)S 1799(forming)S 1961(a)S 1997(block)S 300 X 1903 Y(row)s 384(or)S 435(column,)S 597(and)S 677(the)S 747(darker)S 878(shading)S 1036(indicates)S 1216(the)S 1287(block)S 1404(row)S 1489(or)S 1541(column)S 1693(being)S 1810(computed.)S 2028(The)S 300 X 1953 Y(left-looking)s 531(variant)S 673(\(described)S 879(in)S 928(Section)S 1080(2\))S 1131(computes)S 1321(a)S 1355(block)S 1470(column)S 1620(at)S 1666(a)S 1699(time)S 1793(using)S 1905(p)S 1928(r)S 1943(e)S 1964(v)S 1987(i)S 2000(o)S 2023(u)S 2046(s)S 2064(l)S 2077(y)S 300 X 2003 Y(computed)s 503(columns.)S 702(The)S 795(r)S 810(i)S 823(g)S 846(h)S 869(t)S 882(-)S 897(l)S 910(o)S 933(o)S 956(k)S 979(i)S 992(n)S 1015(g)S 1059(variant)S 1210(\(the)S 1304(familiar)S 1472(recursive)S 1664(algorithm\))S 1880(computes)S 2079(a)S 300 X 2053 Y(block)s 419(row)S 506(and)S 589(column)S 743(at)S 793(each)S 895(step)S 986(and)S 1069(uses)S 1165(them)S 1273(to)S 1325(update)S 1465(the)S 1538(trailing)S 1687(submatrix.)S 1908(The)S 1995(Crout)S 300 X 2103 Y(variant)s 444(is)S 490(a)S 526(hybrid)S 661(algorithm)S 855(in)S 906(which)S 1034(a)S 1070(block)S 1188(row)S 1275(and)S 1358(column)S 1512(is)S 1559(computed)S 1757(at)S 1807(each)S 1909(step)S 2000(using)S 300 X 2153 Y(previously)s 508(computed)S 703(rows)S 804(and)S 883(previously)S 1090(computed)S 1284(columns.)S 1474(All)S 1545(of)S 1595(the)S 1664(c)S 1685(o)S 1708(m)S 1743(p)S 1766(u)S 1789(t)S 1802(a)S 1823(t)S 1836(i)S 1849(o)S 1872(n)S 1895(a)S 1916(l)S 1941(work)S 2047(for)S 300 X 2203 Y(the)s 0.0 11 11 f.I ft 369(LU)S 0.0 11 11 f.R ft 446(variants)S 606(is)S 650(contained)S 844(in)S 893(three)S 999(routines:)S 1185(the)S 1255(m)S 1290(a)S 1311(t)S 1324(r)S 1339(i)S 1352(x)S 1375(-)S 1390(m)S 1425(a)S 1446(t)S 1459(r)S 1474(i)S 1487(x)S 1523(multiply)S 1692(SGEMM,)S 1884(the)S 1954(triangu)S 2085 X(-)s 300 X 2253 Y(lar)s 364(solve)S 477(with)S 573(multiple)S 741(right)S 842(hand)S 946(sides)S 1053(STRSM,)S 1228(and)S 1309(the)S 1380(unblocked)S 0.0 11 11 f.I ft 1587(LU)S 0.0 11 11 f.R ft 1666(f)S 1681(a)S 1702(c)S 1723(t)S 1736(o)S 1759(r)S 1774(i)S 1787(z)S 1808(a)S 1829(t)S 1842(i)S 1855(o)S 1878(n)S 1915(for)S 1982(opera)S 2085 X(-)s 300 X 2303 Y(tions)s 403(within)S 534(a)S 568(block)S 684(column.)S 857(T)S 882(a)S 903(b)S 926(l)S 939(e)S 973(1)S 1009(shows)S 1137(the)S 1208(distribution)S 1435(of)S 1487(work)S 1595(among)S 1734(these)S 1844(three)S 1951(r)S 1966(o)S 1989(u)S 2012(t)S 2025(i)S 2038(n)S 2061(e)S 2082(s)S 300 X 2353 Y(and)s 380(the)S 450(average)S 608(performance)S 853(rates)S 953(on)S 1011(one)S 1090(processor)S 1279(of)S 1329(a)S 1362(Cray)S 1464(2)S 1499(for)S 1564(a)S 1597(sample)S 1740(matrix)S 1872(of)S 1922(order)S 2031(500)S 300 X 2403 Y(using)s 413(a)S 447(blocksize)S 636(of)S 687(64.)S 768(Each)S 874(variant)S 1016(calls)S 1116(its)S 1174(own)S 1267(unblocked)S 1474(variant,)S 1628(and)S 1709(the)S 1780(row)S 1865(i)S 1878(n)S 1901(t)S 1914(e)S 1935(r)S 1950(c)S 1971(h)S 1994(a)S 2015(n)S 2038(g)S 2061(e)S 2082(s)S 300 X 2453 Y(use)s 373(about)S 487(2%)S 560(of)S 609(the)S 677(total)S 771(time.)S 886(The)S 969(average)S 1125(speed)S 1242(of)S 1291(SGEMM)S 1470(is)S 1512(over)S 1605(400)S 1685(mega)S 1785 X(\257)s 1810(ops)S 1885(for)S 1949(all)S 2007(three)S 300 X 2503 Y(variants,)s 469(but)S 539(the)S 607(average)S 763(speed)S 880(of)S 929(STRSM)S 1091(depends)S 1255(on)S 1313(the)S 1382(size)S 1467(of)S 1517(the)S 1586(triangular)S 1778(matrices.)S 1969(F)S 1993(o)S 2016(r)S 2043(the)S 300 X 2553 Y(left-looking)s 530(variant,)S 682(the)S 751(triangular)S 943(matrices)S 1112(at)S 1158(each)S 1256(step)S 1343(range)S 1458(in)S 1506(size)S 1591(from)S 0.0 11 11 f.I ft 1690(NB)S 0.0 11 11 f.R ft 1767(to)S 0.0 11 11 f.I ft 1814(N)S 1852 X %%IncludeFont: Symbol /f.S /Symbol findfont def 0.0 11 11 f.S ft(-)s 0.0 11 11 f.I ft 1877(NB)S 0.0 11 11 f.R ft 1943(,)S 1965(and)S 2043(the)S 300 X 2603 Y(average)s 462(performance)S 712(is)S 760(268)S 846(mega)S 946 X(\257)s 971(ops,)S 1063(while)S 1183(for)S 1253(the)S 1327(r)S 1342(i)S 1355(g)S 1378(h)S 1401(t)S 1414(-)S 1429(l)S 1442(o)S 1465(o)S 1488(k)S 1511(i)S 1524(n)S 1547(g)S 1587(and)S 1671(Crout)S 1793(variants,)S 1969(the)S 2044(tri)S 2085 X(-)s 300 X 2653 Y(angular)s 454(matrices)S 626(are)S 697(always)S 840(of)S 892(order)S 0.0 11 11 f.I ft 1003(NB)S 0.0 11 11 f.R ft 1083(and)S 1164(the)S 1235(average)S 1394(speed)S 1514(is)S 1559(only)S 1655(105)S 1738(mega)S 1838 X(\257)s 1863(ops.)S 1963(C)S 1994(l)S 2007(e)S 2028(a)S 2049(r)S 2064(l)S 2077(y)S 300 X 2703 Y(the)s 368(average)S 524(performance)S 768(of)S 817(the)S 885(Level)S 1002(3)S 1036(BLAS)S 1164(routines)S 1324(in)S 1372(a)S 1405(blocked)S 1564(routine)S 1707(is)S 1750(as)S 1801(important)S 1992(as)S 2043(the)S 300 X 2753 Y(percent)s 448(of)S 497(Level)S 614(3)S 648(BLAS)S 776(work.)S 3300 Y showpage PageState23724 restore %%PageFonts: Times-Italic Times-Roman BracketFont Symbol %%Page: label 3 %%PageFonts: (atend) /PageState23724 save def home %%IncludeFont: Times-Roman /Times-Roman /Times-Roman-8 roman-8-mappings AddRoman-8 /f.R /Times-Roman-8 findfont def 0.0 11 11 f.R ft 1.0000 setlinewidth 300 X 200 Y 1165 X 0.0 10 10 f.R ft(- 3 -)s 300 X 309 Y 515 X %%IncludeFont: Symbol /f.S /Symbol findfont def 0.0 10 10 f.S ft(_)s 519 X(_)s 540 X(_)s 561 X(_)s 582 X(_)s 603 X(_)s 624 X(_)s 645 X(_)s 666 X(_)s 687 X(_)s 708 X(_)s 729 X(_)s 750 X(_)s 771 X(_)s 792 X(_)s 813 X(_)s 834 X(_)s 855 X(_)s 876 X(_)s 897 X(_)s 918 X(_)s 939 X(_)s 960 X(_)s 981 X(_)s 1002 X(_)s 1023 X(_)s 1044 X(_)s 1065 X(_)s 1086 X(_)s 1107 X(_)s 1128 X(_)s 1149 X(_)s 1170 X(_)s 1191 X(_)s 1212 X(_)s 1233 X(_)s 1254 X(_)s 1275 X(_)s 1296 X(_)s 1317 X(_)s 1338 X(_)s 1359 X(_)s 1380 X(_)s 1401 X(_)s 1422 X(_)s 1443 X(_)s 1464 X(_)s 1485 X(_)s 1506 X(_)s 1527 X(_)s 1548 X(_)s 1569 X(_)s 1590 X(_)s 1611 X(_)s 1632 X(_)s 1653 X(_)s 1674 X(_)s 1695 X(_)s 1716 X(_)s 1737 X(_)s 1758 X(_)s 1779 X(_)s 1800 X(_)s 1821 X(_)s 1842 X(_)s 1863 X(_)s 300 X 359 Y 536 X 0.0 10 10 f.R ft(V)s 563(a)S 582(r)S 596(i)S 608(a)S 627(n)S 648(t)S 836 X(Routine)s 1140 X(%)s 1185(o)S 1206(p)S 1227(e)S 1246(r)S 1260(a)S 1279(t)S 1291(i)S 1303(o)S 1324(n)S 1345(s)S 1424(%)S 1469(t)S 1481(i)S 1493(m)S 1525(e)S 1608(a)S 1627(v)S 1648(g)S 1669(.)S 1689(mega)S 1780 X(\257)s 1803(ops)S 300 X 364 Y 515 X 0.0 10 10 f.S ft(_)s 519 X(_)s 540 X(_)s 561 X(_)s 582 X(_)s 603 X(_)s 624 X(_)s 645 X(_)s 666 X(_)s 687 X(_)s 708 X(_)s 729 X(_)s 750 X(_)s 771 X(_)s 792 X(_)s 813 X(_)s 834 X(_)s 855 X(_)s 876 X(_)s 897 X(_)s 918 X(_)s 939 X(_)s 960 X(_)s 981 X(_)s 1002 X(_)s 1023 X(_)s 1044 X(_)s 1065 X(_)s 1086 X(_)s 1107 X(_)s 1128 X(_)s 1149 X(_)s 1170 X(_)s 1191 X(_)s 1212 X(_)s 1233 X(_)s 1254 X(_)s 1275 X(_)s 1296 X(_)s 1317 X(_)s 1338 X(_)s 1359 X(_)s 1380 X(_)s 1401 X(_)s 1422 X(_)s 1443 X(_)s 1464 X(_)s 1485 X(_)s 1506 X(_)s 1527 X(_)s 1548 X(_)s 1569 X(_)s 1590 X(_)s 1611 X(_)s 1632 X(_)s 1653 X(_)s 1674 X(_)s 1695 X(_)s 1716 X(_)s 1737 X(_)s 1758 X(_)s 1779 X(_)s 1800 X(_)s 1821 X(_)s 1842 X(_)s 1863 X(_)s 372 Y 515 X(_)s 519 X(_)s 540 X(_)s 561 X(_)s 582 X(_)s 603 X(_)s 624 X(_)s 645 X(_)s 666 X(_)s 687 X(_)s 708 X(_)s 729 X(_)s 750 X(_)s 771 X(_)s 792 X(_)s 813 X(_)s 834 X(_)s 855 X(_)s 876 X(_)s 897 X(_)s 918 X(_)s 939 X(_)s 960 X(_)s 981 X(_)s 1002 X(_)s 1023 X(_)s 1044 X(_)s 1065 X(_)s 1086 X(_)s 1107 X(_)s 1128 X(_)s 1149 X(_)s 1170 X(_)s 1191 X(_)s 1212 X(_)s 1233 X(_)s 1254 X(_)s 1275 X(_)s 1296 X(_)s 1317 X(_)s 1338 X(_)s 1359 X(_)s 1380 X(_)s 1401 X(_)s 1422 X(_)s 1443 X(_)s 1464 X(_)s 1485 X(_)s 1506 X(_)s 1527 X(_)s 1548 X(_)s 1569 X(_)s 1590 X(_)s 1611 X(_)s 1632 X(_)s 1653 X(_)s 1674 X(_)s 1695 X(_)s 1716 X(_)s 1737 X(_)s 1758 X(_)s 1779 X(_)s 1800 X(_)s 1821 X(_)s 1842 X(_)s 1863 X(_)s 300 X 418 Y 536 X 0.0 10 10 f.R ft(Left-looking)s 836 X(unblocked)s 1022(LU)S 1229 X(10)s 1463 X(20)s 1703 X(146)s 300 X 468 Y 536 X(\(SLUBL\))s 836 X(SGEMM)s 1229 X(49)s 1463 X(32)s 1703 X(438)s 300 X 518 Y 836 X(STRSM)s 1229 X(41)s 1463 X(45)s 1703 X(268)s 300 X 527 Y 515 X 0.0 10 10 f.S ft(_)s 519 X(_)s 540 X(_)s 561 X(_)s 582 X(_)s 603 X(_)s 624 X(_)s 645 X(_)s 666 X(_)s 687 X(_)s 708 X(_)s 729 X(_)s 750 X(_)s 771 X(_)s 792 X(_)s 813 X(_)s 834 X(_)s 855 X(_)s 876 X(_)s 897 X(_)s 918 X(_)s 939 X(_)s 960 X(_)s 981 X(_)s 1002 X(_)s 1023 X(_)s 1044 X(_)s 1065 X(_)s 1086 X(_)s 1107 X(_)s 1128 X(_)s 1149 X(_)s 1170 X(_)s 1191 X(_)s 1212 X(_)s 1233 X(_)s 1254 X(_)s 1275 X(_)s 1296 X(_)s 1317 X(_)s 1338 X(_)s 1359 X(_)s 1380 X(_)s 1401 X(_)s 1422 X(_)s 1443 X(_)s 1464 X(_)s 1485 X(_)s 1506 X(_)s 1527 X(_)s 1548 X(_)s 1569 X(_)s 1590 X(_)s 1611 X(_)s 1632 X(_)s 1653 X(_)s 1674 X(_)s 1695 X(_)s 1716 X(_)s 1737 X(_)s 1758 X(_)s 1779 X(_)s 1800 X(_)s 1821 X(_)s 1842 X(_)s 1863 X(_)s 300 X 577 Y 536 X 0.0 10 10 f.R ft(R)s 564(i)S 576(g)S 597(h)S 618(t)S 630(-)S 644(l)S 656(o)S 677(o)S 698(k)S 719(i)S 731(n)S 752(g)S 836(u)S 857(n)S 878(b)S 899(l)S 911(o)S 932(c)S 951(k)S 972(e)S 991(d)S 1022(LU)S 1229 X(10)s 1463 X(19)s 1703 X(151)s 300 X 627 Y 536 X(\(SLUBR\))s 836 X(SGEMM)s 1229 X(82)s 1463 X(56)s 1703 X(414)s 300 X 677 Y 836 X(STRSM)s 1250 X(8)s 1463 X(23)s 1703 X(105)s 300 X 686 Y 515 X 0.0 10 10 f.S ft(_)s 519 X(_)s 540 X(_)s 561 X(_)s 582 X(_)s 603 X(_)s 624 X(_)s 645 X(_)s 666 X(_)s 687 X(_)s 708 X(_)s 729 X(_)s 750 X(_)s 771 X(_)s 792 X(_)s 813 X(_)s 834 X(_)s 855 X(_)s 876 X(_)s 897 X(_)s 918 X(_)s 939 X(_)s 960 X(_)s 981 X(_)s 1002 X(_)s 1023 X(_)s 1044 X(_)s 1065 X(_)s 1086 X(_)s 1107 X(_)s 1128 X(_)s 1149 X(_)s 1170 X(_)s 1191 X(_)s 1212 X(_)s 1233 X(_)s 1254 X(_)s 1275 X(_)s 1296 X(_)s 1317 X(_)s 1338 X(_)s 1359 X(_)s 1380 X(_)s 1401 X(_)s 1422 X(_)s 1443 X(_)s 1464 X(_)s 1485 X(_)s 1506 X(_)s 1527 X(_)s 1548 X(_)s 1569 X(_)s 1590 X(_)s 1611 X(_)s 1632 X(_)s 1653 X(_)s 1674 X(_)s 1695 X(_)s 1716 X(_)s 1737 X(_)s 1758 X(_)s 1779 X(_)s 1800 X(_)s 1821 X(_)s 1842 X(_)s 1863 X(_)s 300 X 736 Y 536 X 0.0 10 10 f.R ft(Crout)s 836 X(unblocked)s 1022(LU)S 1229 X(10)s 1463 X(16)s 1703 X(189)s 300 X 786 Y 536 X(\(SLUBC\))s 836 X(SGEMM)s 1229 X(82)s 1463 X(57)s 1703 X(438)s 300 X 836 Y 836 X(STRSM)s 1250 X(8)s 1463 X(24)s 1703 X(105)s 300 X 845 Y 515 X 0.0 10 10 f.S ft(_)s 519 X(_)s 540 X(_)s 561 X(_)s 582 X(_)s 603 X(_)s 624 X(_)s 645 X(_)s 666 X(_)s 687 X(_)s 708 X(_)s 729 X(_)s 750 X(_)s 771 X(_)s 792 X(_)s 813 X(_)s 834 X(_)s 855 X(_)s 876 X(_)s 897 X(_)s 918 X(_)s 939 X(_)s 960 X(_)s 981 X(_)s 1002 X(_)s 1023 X(_)s 1044 X(_)s 1065 X(_)s 1086 X(_)s 1107 X(_)s 1128 X(_)s 1149 X(_)s 1170 X(_)s 1191 X(_)s 1212 X(_)s 1233 X(_)s 1254 X(_)s 1275 X(_)s 1296 X(_)s 1317 X(_)s 1338 X(_)s 1359 X(_)s 1380 X(_)s 1401 X(_)s 1422 X(_)s 1443 X(_)s 1464 X(_)s 1485 X(_)s 1506 X(_)s 1527 X(_)s 1548 X(_)s 1569 X(_)s 1590 X(_)s 1611 X(_)s 1632 X(_)s 1653 X(_)s 1674 X(_)s 1695 X(_)s 1716 X(_)s 1737 X(_)s 1758 X(_)s 1779 X(_)s 1800 X(_)s 1821 X(_)s 1842 X(_)s 1863 X(_)s 300 X 515 X %%IncludeFont: BracketFont /f.S2 /BracketFont findfont def 0.0 10 10 f.S2 ft(L)s 813 Y 515 X(L)s 771 Y 515 X(L)s 729 Y 515 X(L)s 687 Y 515 X(L)s 645 Y 515 X(L)s 603 Y 515 X(L)s 561 Y 515 X(L)s 519 Y 515 X(L)s 477 Y 515 X(L)s 435 Y 515 X(L)s 393 Y 515 X(L)s 351 Y 515 X(L)s 300 X 845 Y 804 X(L)s 813 Y 804 X(L)s 771 Y 804 X(L)s 729 Y 804 X(L)s 687 Y 804 X(L)s 645 Y 804 X(L)s 603 Y 804 X(L)s 561 Y 804 X(L)s 519 Y 804 X(L)s 477 Y 804 X(L)s 435 Y 804 X(L)s 393 Y 804 X(L)s 351 Y 804 X(L)s 300 X 845 Y 1108 X(L)s 813 Y 1108 X(L)s 771 Y 1108 X(L)s 729 Y 1108 X(L)s 687 Y 1108 X(L)s 645 Y 1108 X(L)s 603 Y 1108 X(L)s 561 Y 1108 X(L)s 519 Y 1108 X(L)s 477 Y 1108 X(L)s 435 Y 1108 X(L)s 393 Y 1108 X(L)s 351 Y 1108 X(L)s 300 X 845 Y 1392 X(L)s 813 Y 1392 X(L)s 771 Y 1392 X(L)s 729 Y 1392 X(L)s 687 Y 1392 X(L)s 645 Y 1392 X(L)s 603 Y 1392 X(L)s 561 Y 1392 X(L)s 519 Y 1392 X(L)s 477 Y 1392 X(L)s 435 Y 1392 X(L)s 393 Y 1392 X(L)s 351 Y 1392 X(L)s 300 X 845 Y 1575 X(L)s 813 Y 1575 X(L)s 771 Y 1575 X(L)s 729 Y 1575 X(L)s 687 Y 1575 X(L)s 645 Y 1575 X(L)s 603 Y 1575 X(L)s 561 Y 1575 X(L)s 519 Y 1575 X(L)s 477 Y 1575 X(L)s 435 Y 1575 X(L)s 393 Y 1575 X(L)s 351 Y 1575 X(L)s 300 X 845 Y 1884 X(L)s 813 Y 1884 X(L)s 771 Y 1884 X(L)s 729 Y 1884 X(L)s 687 Y 1884 X(L)s 645 Y 1884 X(L)s 603 Y 1884 X(L)s 561 Y 1884 X(L)s 519 Y 1884 X(L)s 477 Y 1884 X(L)s 435 Y 1884 X(L)s 393 Y 1884 X(L)s 351 Y 1884 X(L)s 300 X 920 Y 690 X 0.0 10 10 f.R ft(T)s 712(a)S 731(b)S 752(l)S 764(e)S 793(1:)S 846(Breakdown)S 1050(of)S 1095(operations)S 1281(and)S 1352(times)S 1453(for)S %%IncludeFont: Times-Italic /Times-Italic /Times-Italic-8 roman-8-mappings AddRoman-8 /f.I /Times-Italic-8 findfont def 0.0 10 10 f.I ft 1512(LU)S 0.0 10 10 f.R ft 1575(v)S 1596(a)S 1615(r)S 1629(i)S 1641(a)S 1660(n)S 1681(t)S 1693(s)S 300 X 970 Y 820 X(for)s 879(N)S 919(=)S 953(500,)S 1036(NB)S 1104(=)S 1138(64)S 1190(\(Cray)S 1296(2-S,)S 1374(1)S 1405(p)S 1426(r)S 1440(o)S 1461(c)S 1480(e)S 1499(s)S 1515(s)S 1531(o)S 1552(r)S 1566(\))S 300 X 1070 Y(Despite)s 439(the)S 501(di)S 534 X /Cff{ (f) show xsiz pt 20 div neg 0 rmoveto (f) s }bind def Cff 560(erences)S 697(in)S 740(the)S 803(performance)S 1027(rates)S 1118(of)S 1164(their)S 1253(components,)S 1477(the)S 1540(block)S 1645(variants)S 1790(of)S 1836(the)S 0.0 11 11 f.I ft 1899(LU)S 0.0 10 10 f.R ft 1975(factori)S 2086 X(-)s 300 X 1120 Y(zation)s 419(tend)S 507(to)S 555(show)S 658(similar)S 790(overall)S 923(performance,)S 1161(with)S 1251(a)S 1285(slight)S 1393(advantage)S 1579(to)S 1626(the)S 1692(r)S 1706(i)S 1718(g)S 1739(h)S 1760(t)S 1772(-)S 1786(l)S 1798(o)S 1819(o)S 1840(k)S 1861(i)S 1873(n)S 1894(g)S 1929(and)S 2004(Crout)S 300 X 1170 Y(variants)s 454(because)S 608(more)S 715(of)S 771(the)S 844(operations)S 1041(are)S 1114(in)S 1168(SGEMM.)S 1361(Figure)S 1492(2)S 1534(shows)S 1659(the)S 1732(performance)S 1966(rates)S 2067(in)S 300 X 1220 Y(mega)s 391 X(\257)s 414(ops)S 486(of)S 535(these)S 636(three)S 735(variants)S 883(for)S 946(di)S 979 X Cff 1005(erent)S 1104(matrix)S 1228(sizes)S 1324(on)S 1380(an)S 1434(8-processor)S 1644(Cray)S 1740(Y)S 1770(M)S 1807(P)S 1829(,)S 1853(along)S 1960(with)S 2048(the)S 300 X 1270 Y(performance)s 528(of)S 578(the)S 645(L)S 670(I)S 684(N)S 714(P)S 736(A)S 766(C)S 794(K)S 839(routine)S 974(S)S 997(G)S 1027(E)S 1052(F)S 1074(A)S 1104(.)S 1139(The)S 1219(optimal)S 1363(blocksize)S 1538(on)S 1595(the)S 1662(Cray)S 1759(computers)S 1949(is)S 1993(64)S 2051(for)S 300 X 1320 Y(most)s 396(matrix)S 521(sizes,)S 628(but)S 697(the)S 764(performance)S 992(varies)S 1108(less)S 1186(than)S 1274(10%)S 1365(over)S 1454(a)S 1487(wide)S 1583(range)S 1691(of)S 1740(blocksizes.)S 1950(W)S 1987(e)S 2020(have)S 300 X 1370 Y(considered)s 493(three)S 588(block)S 692(variants)S 836(for)S 896(the)S 959(Cholesky)S 1129(f)S 1143(a)S 1162(c)S 1181(t)S 1193(o)S 1214(r)S 1228(i)S 1240(z)S 1259(a)S 1278(t)S 1290(i)S 1302(o)S 1323(n)S 1355(of)S 1401(a)S 1431(symmetric)S 1619(positive)S 1764(de)S 1804 X(\256)s 1827(nite)S 1902(matrix.)S 2043(F)S 2065(o)S 2086(r)S 300 X 1420 Y(the)s 368(purpose)S 517(of)S 568(discussion,)S 769(we)S 834(consider)S 993(the)S 1061(f)S 1075(a)S 1094(c)S 1113(t)S 1125(o)S 1146(r)S 1160(i)S 1172(z)S 1191(a)S 1210(t)S 1222(i)S 1234(o)S 1255(n)S 0.0 11 11 f.I ft 1292(A)S 1327 X(=)s 1358(LL)S 1393 Y 0.0 8 8 f.I ft 1413(T)S 1420 Y 0.0 10 10 f.R ft 1438(.)S 1474(In)S 1525(the)S 1593(I-variant,)S 1765(also)S 1848(called)S 1965(the)S 2032(top-)S 300 X 1470 Y(looking)s 441(algorithm,)S 627(a)S 658(block)S 764(row)S 841(is)S 881(computed)S 1059(at)S 1102(each)S 1192(step)S 1272(using)S 1375(previously)S 1565(computed)S 1743(rows.)S 1846(The)S 1923(major)S 2034(part)S 300 X 1520 Y(of)s 348(the)S 413(computation)S 636(is)S 676(in)S 721(updating)S 881(the)S 945(current)S 1077(block)S 1183(row)S 1260(using)S 1363(a)S 1394(triangular)S 1571(solve)S 1672(with)S 1759(the)S 1823(leading)S 1960(t)S 1972(r)S 1986(i)S 1998(a)S 2017(n)S 2038(g)S 2059(l)S 2071(e)S 2090(,)S 300 X 1570 Y(which)s 419(involves)S 578(the)S 646(Level)S 758(3)S 795(BLAS)S 917(routine)S 1053(STRSM.)S 1225(In)S 1276(the)S 1345(J-variant)S 1510(or)S 1562(left-looking)S 1779(algorithm,)S 1970(a)S 2006(block)S 300 X 1620 Y(column)s 439(is)S 480(computed)S 659(at)S 703(a)S 735(time)S 823(using)S 927(previously)S 1118(computed)S 1297(columns.)S 1472(The)S 1550(major)S 1661(operation)S 1834(is)S 1875(the)S 1940(update)S 2065(of)S 300 X 1670 Y(the)s 368(block)S 478(column)S 620(using)S 727(the)S 795(m)S 827(a)S 846(t)S 858(r)S 872(i)S 884(x)S 905(-)S 919(m)S 951(a)S 970(t)S 982(r)S 996(i)S 1008(x)S 1045(multiply)S 1204(routine)S 1340(SGEMM.)S 1528(In)S 1579(the)S 1647(K-variant)S 1825(or)S 1877(r)S 1891(i)S 1903(g)S 1924(h)S 1945(t)S 1957(-)S 1971(l)S 1983(o)S 2004(o)S 2025(k)S 2046(i)S 2058(n)S 2079(g)S 300 X 1720 Y(algorithm,)s 487(a)S 519(block)S 626(column)S 765(is)S 806(factored)S 958(at)S 1002(each)S 1093(step)S 1174(and)S 1248(used)S 1338(to)S 1384(update)S 1510(the)S 1575(trailing)S 1710(submatrix.)S 1910(The)S 1987(u)S 2008(p)S 2029(d)S 2050(a)S 2069(t)S 2081(e)S 300 X 1770 Y(using)s 401(SSYRK)S 545(is)S 583(the)S 645(dominant)S 814(operation)S 985(in)S 1029(this)S 1101(case.)S 1205(Similar)S 1340(performance)S 1564(is)S 1603(observed)S 1766(for)S 1826(these)S 1924(three)S 2020(vari)S 2086 X(-)s 300 X 1820 Y(ants)s 382(when)S 486(the)S 551(three)S 649(dominant)S 821(Level)S 930(3)S 964(BLAS)S 1083(routines)S 1232(are)S 1297(implemented)S 1530(equally)S 1668(well.)S 1774(Figure)S 1897(2)S 1931(shows)S 2048(the)S 300 X 1870 Y(performance)s 533(in)S 586(mega)S 677 X(\257)s 700(ops)S 778(vs.)S 0.0 11 11 f.I ft 845(N)S 0.0 10 10 f.R ft 903(for)S 972(the)S 1044(three)S 1149(block)S 1263(variants)S 1417(of)S 0.0 11 11 f.I ft 1473(A)S 1508 X(=)s 1539(LL)S 1843 Y 0.0 8 8 f.I ft 1594(T)S 1870 Y 0.0 10 10 f.R ft 1619(,)S 1650(named)S 1783(S)S 1806(L)S 1831(L)S 1853(T)S 1878(B)S 1906(I)S 1920(,)S 1951(S)S 1974(L)S 1999(L)S 2021(T)S 2046(B)S 2074(J)S 2090(,)S 300 X 1920 Y(S)s 323(L)S 348(L)S 370(T)S 395(B)S 423(K)S 453(,)S 481(along)S 593(with)S 686(the)S 755(performance)S 985(of)S 1037(the)S 1106(L)S 1131(I)S 1145(N)S 1175(P)S 1197(A)S 1227(C)S 1255(K)S 1302(f)S 1316(a)S 1335(c)S 1354(t)S 1366(o)S 1387(r)S 1401(i)S 1413(z)S 1432(a)S 1451(t)S 1463(i)S 1475(o)S 1496(n)S 1534(S)S 1557(P)S 1580(O)S 1610(F)S 1632(A)S 1662(,)S 1689(on)S 1748(an)S 1805(8-processor)S 2018(Cray)S 300 X 1970 Y(Y)s 330(M)S 367(P)S 389(.)S 419(As)S 475(in)S 518(the)S 580(case)S 663(of)S 708(the)S 0.0 11 11 f.I ft 770(LU)S 0.0 10 10 f.R ft 846(d)S 867(e)S 886(c)S 905(o)S 926(m)S 958(p)S 979(o)S 1000(s)S 1016(i)S 1028(t)S 1040(i)S 1052(o)S 1073(n)S 1094(,)S 1115(the)S 1178(r)S 1192(i)S 1204(g)S 1225(h)S 1246(t)S 1258(-)S 1272(l)S 1284(o)S 1305(o)S 1326(k)S 1347(i)S 1359(n)S 1380(g)S 1412(variant)S 1541(\()S 1555(S)S 1578(L)S 1603(L)S 1625(T)S 1650(B)S 1678(K)S 1708(\))S 1733(and)S 1805(the)S 1868(variant)S 1997(which)S 300 X 2020 Y(calls)s 391(SGEMM)S 556(for)S 618(most)S 712(of)S 759(its)S 811(Level)S 919(3)S 952(BLAS)S 1070(work)S 1168(\()S 1182(S)S 1205(L)S 1230(L)S 1252(T)S 1277(B)S 1305(J)S 1321(\))S 1347(are)S 1411(slightly)S 1550(better)S 1659(than)S 1744(the)S 1808(left-looking)S 2020(vari)S 2086 X(-)s 300 X 2070 Y(ant,)s 372(which)S 485(does)S 572(more)S 668(of)S 713(its)S 763(Level)S 869(3)S 900(BLAS)S 1016(work)S 1112(in)S 1155(S)S 1178(T)S 1203(R)S 1231(S)S 1254(M)S 1291(.)S 3300 Y showpage PageState23724 restore %%PageFonts: Times-Roman Symbol BracketFont Times-Italic %%Page: label 4 %%PageFonts: (atend) /PageState23724 save def home %%IncludeFont: Times-Roman /Times-Roman /Times-Roman-8 roman-8-mappings AddRoman-8 /f.R /Times-Roman-8 findfont def 0.0 10 10 f.R ft 1.0000 setlinewidth 300 X 200 Y 1165 X(- 4 -)s 300 X 1250 Y 531 X(Figure)s 651(2:)S 704(P)S 726(e)S 745(r)S 759(f)S 773(o)S 794(r)S 808(m)S 840(a)S 859(n)S 880(c)S 899(e)S 928(of)S %%IncludeFont: Times-Italic /Times-Italic /Times-Italic-8 roman-8-mappings AddRoman-8 /f.I /Times-Italic-8 findfont def 0.0 11 11 f.I ft 973(LU)S 0.0 10 10 f.R ft 1048(and)S 1119(Cholesky)S 1288(variants)S 1432(\(Cray)S 1538(Y)S 1568(M)S 1605(P)S 1627(,)S 1647(8)S 1678(p)S 1699(r)S 1713(o)S 1734(c)S 1753(e)S 1772(s)S 1788(s)S 1804(o)S 1825(r)S 1839(s)S 1855(\))S 300 X 1300 Y(W)s 337(e)S 371(have)S 466(considered)S 664(two)S 742(f)S 756(a)S 775(c)S 794(t)S 806(o)S 827(r)S 841(i)S 853(z)S 872(a)S 891(t)S 903(i)S 915(o)S 936(n)S 957(s)S 988(for)S 1052(symmetric)S 1245(inde)S 1318 X(\256)s 1341(nite)S 1421(matrices,)S 1590(the)S 1658(B)S 1686(u)S 1707(n)S 1728(c)S 1747(h)S 1768(-)S 1782(K)S 1812(a)S 1831(u)S 1852(f)S 1866(m)S 1898(a)S 1917(n)S 1954(d)S 1975(i)S 1987(a)S 2006(g)S 2027(o)S 2048(n)S 2069(a)S 2088(l)S 300 X 1350 Y(pivoting)s 452(method,)S 599(which)S 713(was)S 789(used)S 877(in)S 920(L)S 945(I)S 959(N)S 989(P)S 1011(A)S 1041(C)S 1069(K)S 1099(,)S 1119(and)S 1190(A)S 1220(a)S 1239(s)S 1255(e)S 1274(n)S 1293(')S 1306(s)S 1332(method)S 1468([5].)S 1547(The)S 1622(form)S 1713(of)S 1758(the)S 1820(B)S 1848(u)S 1869(n)S 1890(c)S 1909(h)S 1930(-)S 1944(K)S 1974(a)S 1993(u)S 2014(f)S 2028(m)S 2060(a)S 2079(n)S 300 X 1400 Y(factorization)s 525(is)S 300 X 1475 Y 1060 X 0.0 11 11 f.I ft(P)s 1087(A)S 1115(P)S 1448 Y 0.0 8 8 f.I ft 1148(T)S 1475 Y 0.0 11 11 f.I ft 1173( )S 1184 X(=)s 1215( LDL)S 1448 Y 0.0 8 8 f.I ft 1314(T)S 300 X 1550 Y 0.0 10 10 f.R ft(where)s 0.0 11 11 f.I ft 417(P)S 0.0 10 10 f.R ft 466(is)S 508(a)S 541(permutation)S 759(matrix,)S 0.0 11 11 f.I ft 893(L)S 0.0 10 10 f.R ft 939(is)S 981(unit)S 1061(lower)S 1171(triangular,)S 1360(and)S 0.0 11 11 f.I ft 1435(D)S 0.0 10 10 f.R ft 1490(is)S 1533(block)S 1642(diagonal)S 1803(with)S 0.0 11 11 f.R ft 1893(1)S 1916 X %%IncludeFont: Symbol /f.S /Symbol findfont def 0.0 11 11 f.S ft(\264)s 0.0 11 11 f.R ft 1941(1)S 0.0 10 10 f.R ft 1979(or)S 0.0 11 11 f.R ft 2029(2)S 2052 X 0.0 11 11 f.S ft(\264)s 0.0 11 11 f.R ft 2077(2)S 300 X 1600 Y 0.0 10 10 f.R ft(diagonal)s 457(blocks.)S 598(A)S 0.0 11 11 f.R ft 639(2)S 662 X 0.0 11 11 f.S ft(\264)s 0.0 11 11 f.R ft 687(2)S 0.0 10 10 f.R ft 721(pivot)S 818(block)S 922(is)S 960(chosen)S 1087(in)S 1130(order)S 1229(to)S 1272(avoid)S 1376(large)S 1471(entries)S 1594(in)S 1637(the)S 1699(factor)S 0.0 11 11 f.I ft 1808(L)S 0.0 10 10 f.R ft 1850(when)S 1951(the)S 2013(diag)S 2086 X(-)s 300 X 1650 Y(onal)s 389(entries)S 518(are)S 586(small)S 693(in)S 742(magnitude)S 936(relative)S 1080(to)S 1129(the)S 1197(o)S 1218 X /Cff{ (f) show xsiz pt 20 div neg 0 rmoveto (f) s }bind def Cff 1244(diagonal)S 1406(entries.)S 1556(The)S 1638(f)S 1652(a)S 1671(c)S 1690(t)S 1702(o)S 1723(r)S 1737(i)S 1749(z)S 1768(a)S 1787(t)S 1799(i)S 1811(o)S 1832(n)S 1870(from)S 1968(A)S 1998(a)S 2017(s)S 2033(e)S 2052(n)S 2071(')S 2084(s)S 300 X 1700 Y(method)s 436(has)S 502(the)S 564(form)S 300 X 1775 Y 1066 X 0.0 11 11 f.I ft(P)s 1093(A)S 1121(P)S 1748 Y 0.0 8 8 f.I ft 1154(T)S 1775 Y 0.0 11 11 f.I ft 1179( )S 1190 X(=)s 1221( )S 1232(L)S 1254(T)S 1279(L)S 1748 Y 0.0 8 8 f.I ft 1309(T)S 300 X 1850 Y 0.0 10 10 f.R ft(where)s 0.0 11 11 f.I ft 413(T)S 0.0 10 10 f.R ft 455(is)S 493(tridiagonal.)S 707(Block)S 818(versions)S 968(of)S 1013(each)S 1101(of)S 1146(these)S 1243(f)S 1257(a)S 1276(c)S 1295(t)S 1307(o)S 1328(r)S 1342(i)S 1354(z)S 1373(a)S 1392(t)S 1404(i)S 1416(o)S 1437(n)S 1458(s)S 1484(have)S 1574(been)S 1665(developed)S 1850([6].)S 1930(The)S 2006(block)S 300 X 1900 Y(versions)s 457(accumulate)S 667(the)S 736(elementary)S 941(t)S 953(r)S 967(a)S 986(n)S 1007(s)S 1023(f)S 1037(o)S 1058(r)S 1072(m)S 1104(a)S 1123(t)S 1135(i)S 1147(o)S 1168(n)S 1189(s)S 1222(and)S 1300(apply)S 1411(them)S 1512(as)S 1564(a)S 1599(rank-)S 0.0 11 11 f.I ft 1688(k)S 0.0 10 10 f.R ft 1732(update,)S 1871(in)S 1920(one)S 1997(of)S 2048(the)S 300 X 1950 Y(forms)s 300 X 2025 Y 897 X 0.0 11 11 f.I ft(A)s 932 X 0.0 11 11 f.S ft(\254)s 0.0 11 11 f.I ft 977(A)S 1012 X 0.0 11 11 f.S ft(-)s 0.0 11 11 f.I ft 1037(XDX)S 1998 Y 0.0 8 8 f.I ft 1131(T)S 2025 Y 0.0 11 11 f.R ft 1156(\(Bunch)S 1292 X 0.0 11 11 f.S ft(-)s 0.0 11 11 f.R ft 1317(K)S 1350(a)S 1371(u)S 1394(f)S 1409(m)S 1444(a)S 1465(n)S 1488(\))S 300 X 2100 Y 1001 X 0.0 11 11 f.I ft(A)s 1036 X 0.0 11 11 f.S ft(\254)s 0.0 11 11 f.I ft 1081(A)S 1116 X 0.0 11 11 f.S ft(-)s 0.0 11 11 f.I ft 1141(XTX)S 2073 Y 0.0 8 8 f.I ft 1227(T)S 2100 Y 0.0 11 11 f.R ft 1252(\()S 1267(A)S 1300(a)S 1321(s)S 1339(e)S 1360(n)S 1383(\))S 300 X 2175 Y 0.0 10 10 f.R ft(T)s 322(a)S 341(b)S 362(l)S 374(e)S 403(2)S 434(compares)S 605(the)S 667(performance)S 890(of)S 935(the)S 997(unblocked)S 1183(\(Level)S 1303(2)S 1334(BLAS\))S 1464(and)S 1535(blocked)S 1679(\(Level)S 1799(3)S 1830(BLAS\))S 1960(v)S 1981(e)S 2000(r)S 2014(s)S 2030(i)S 2042(o)S 2063(n)S 2084(s)S 300 X 2225 Y(of)s 347(each)S 437(method)S 575(on)S 629(one)S 702(processor)S 875(of)S 922(a)S 953(Cray)S 1047(2.)S 1090(W)S 1127(e)S 1158(see)S 1224(that)S 1299(while)S 1404(the)S 1467(unblocked)S 1654(form)S 1746(of)S 1792(A)S 1822(a)S 1841(s)S 1857(e)S 1876(n)S 1895(')S 1908(s)S 1935(method)S 2072(is)S 300 X 2275 Y(better)s 412(than)S 501(the)S 569(unblocked)S 761(B)S 789(u)S 810(n)S 831(c)S 850(h)S 871(-)S 885(K)S 915(a)S 934(u)S 955(f)S 969(m)S 1001(a)S 1020(n)S 1057(f)S 1071(a)S 1090(c)S 1109(t)S 1121(o)S 1142(r)S 1156(i)S 1168(z)S 1187(a)S 1206(t)S 1218(i)S 1230(o)S 1251(n)S 1288(for)S 1353(large)S 1454(matrices,)S 1623(blocking)S 1787(favors)S 1908(the)S 1976(B)S 2004(u)S 2025(n)S 2046(c)S 2065(h)S 2086(-)S 300 X 2325 Y(Kaufmann)s 487(f)S 501(a)S 520(c)S 539(t)S 551(o)S 572(r)S 586(i)S 598(z)S 617(a)S 636(t)S 648(i)S 660(o)S 681(n)S 712(for)S 771(all)S 824(matrix)S 944(s)S 960(i)S 972(z)S 991(e)S 1010(s)S 1026(.)S 300 X 2359 Y 757 X 0.0 10 10 f.S ft(_)s 761 X(_)s 782 X(_)s 803 X(_)s 824 X(_)s 845 X(_)s 866 X(_)s 887 X(_)s 908 X(_)s 929 X(_)s 950 X(_)s 971 X(_)s 992 X(_)s 1013 X(_)s 1034 X(_)s 1055 X(_)s 1076 X(_)s 1097 X(_)s 1118 X(_)s 1139 X(_)s 1160 X(_)s 1181 X(_)s 1202 X(_)s 1223 X(_)s 1244 X(_)s 1265 X(_)s 1286 X(_)s 1307 X(_)s 1328 X(_)s 1349 X(_)s 1370 X(_)s 1391 X(_)s 1412 X(_)s 1433 X(_)s 1454 X(_)s 1475 X(_)s 1496 X(_)s 1517 X(_)s 1538 X(_)s 1559 X(_)s 1580 X(_)s 1601 X(_)s 1622 X(_)s 300 X 2409 Y 1031 X 0.0 10 10 f.R ft(Aasen)s 1323 X(B)s 1351(u)S 1372(n)S 1393(c)S 1412(h)S 1433(-)S 1447(K)S 1477(a)S 1496(u)S 1517(f)S 1531(m)S 1563(a)S 1582(n)S 300 X 2418 Y 893 X 0.0 10 10 f.S ft(_)s 908 X(_)s 929 X(_)s 950 X(_)s 971 X(_)s 992 X(_)s 1013 X(_)s 1034 X(_)s 1055 X(_)s 1076 X(_)s 1097 X(_)s 1118 X(_)s 1139 X(_)s 1160 X(_)s 1181 X(_)s 1202 X(_)s 1223 X(_)s 1244 X(_)s 1265 X(_)s 1286 X(_)s 1307 X(_)s 1328 X(_)s 1349 X(_)s 1370 X(_)s 1391 X(_)s 1412 X(_)s 1433 X(_)s 1454 X(_)s 1475 X(_)s 1496 X(_)s 1517 X(_)s 1538 X(_)s 1559 X(_)s 1580 X(_)s 1601 X(_)s 1622 X(_)s 300 X 2438 Y 805 X 0.0 10 10 f.R ft(N)s 2468 Y 925 X(Level)s 1031(2)S 1115(L)S 1140(e)S 1159(v)S 1180(e)S 1199(l)S 1221(3)S 1305(L)S 1330(e)S 1349(v)S 1370(e)S 1389(l)S 1411(2)S 1495(L)S 1520(e)S 1539(v)S 1560(e)S 1579(l)S 1601(3)S 300 X 2473 Y 757 X 0.0 10 10 f.S ft(_)s 761 X(_)s 782 X(_)s 803 X(_)s 824 X(_)s 845 X(_)s 866 X(_)s 887 X(_)s 908 X(_)s 929 X(_)s 950 X(_)s 971 X(_)s 992 X(_)s 1013 X(_)s 1034 X(_)s 1055 X(_)s 1076 X(_)s 1097 X(_)s 1118 X(_)s 1139 X(_)s 1160 X(_)s 1181 X(_)s 1202 X(_)s 1223 X(_)s 1244 X(_)s 1265 X(_)s 1286 X(_)s 1307 X(_)s 1328 X(_)s 1349 X(_)s 1370 X(_)s 1391 X(_)s 1412 X(_)s 1433 X(_)s 1454 X(_)s 1475 X(_)s 1496 X(_)s 1517 X(_)s 1538 X(_)s 1559 X(_)s 1580 X(_)s 1601 X(_)s 1622 X(_)s 2481 Y 757 X(_)s 761 X(_)s 782 X(_)s 803 X(_)s 824 X(_)s 845 X(_)s 866 X(_)s 887 X(_)s 908 X(_)s 929 X(_)s 950 X(_)s 971 X(_)s 992 X(_)s 1013 X(_)s 1034 X(_)s 1055 X(_)s 1076 X(_)s 1097 X(_)s 1118 X(_)s 1139 X(_)s 1160 X(_)s 1181 X(_)s 1202 X(_)s 1223 X(_)s 1244 X(_)s 1265 X(_)s 1286 X(_)s 1307 X(_)s 1328 X(_)s 1349 X(_)s 1370 X(_)s 1391 X(_)s 1412 X(_)s 1433 X(_)s 1454 X(_)s 1475 X(_)s 1496 X(_)s 1517 X(_)s 1538 X(_)s 1559 X(_)s 1580 X(_)s 1601 X(_)s 1622 X(_)s 300 X 2527 Y 799 X 0.0 10 10 f.R ft(100)s 978 X(37)s 1168 X(40)s 1358 X(44)s 1548 X(48)s 300 X 2577 Y 799 X(200)s 978 X(79)s 1168 X(92)s 1358 X(88)s 1527 X(109)s 300 X 2627 Y 799 X(300)s 957 X(114)s 1147 X(147)s 1337 X(115)s 1527 X(168)s 300 X 2677 Y 799 X(400)s 957 X(142)s 1147 X(180)s 1337 X(132)s 1527 X(198)s 300 X 2727 Y 799 X(500)s 957 X(165)s 1147 X(215)s 1337 X(140)s 1527 X(238)s 300 X 2777 Y 799 X(700)s 957 X(201)s 1147 X(257)s 1337 X(158)s 1527 X(265)s 300 X 2827 Y 778 X(1000)s 957 X(236)s 1147 X(292)s 1337 X(171)s 1527 X(314)s 300 X 2836 Y 757 X 0.0 10 10 f.S ft(_)s 761 X(_)s 782 X(_)s 803 X(_)s 824 X(_)s 845 X(_)s 866 X(_)s 887 X(_)s 908 X(_)s 929 X(_)s 950 X(_)s 971 X(_)s 992 X(_)s 1013 X(_)s 1034 X(_)s 1055 X(_)s 1076 X(_)s 1097 X(_)s 1118 X(_)s 1139 X(_)s 1160 X(_)s 1181 X(_)s 1202 X(_)s 1223 X(_)s 1244 X(_)s 1265 X(_)s 1286 X(_)s 1307 X(_)s 1328 X(_)s 1349 X(_)s 1370 X(_)s 1391 X(_)s 1412 X(_)s 1433 X(_)s 1454 X(_)s 1475 X(_)s 1496 X(_)s 1517 X(_)s 1538 X(_)s 1559 X(_)s 1580 X(_)s 1601 X(_)s 1622 X(_)s 300 X 757 X %%IncludeFont: BracketFont /f.S2 /BracketFont findfont def 0.0 10 10 f.S2 ft(L)s 2821 Y 757 X(L)s 2779 Y 757 X(L)s 2737 Y 757 X(L)s 2695 Y 757 X(L)s 2653 Y 757 X(L)s 2611 Y 757 X(L)s 2569 Y 757 X(L)s 2527 Y 757 X(L)s 2485 Y 757 X(L)s 2443 Y 757 X(L)s 2401 Y 757 X(L)s 300 X 2836 Y 893 X(L)s 2821 Y 893 X(L)s 2779 Y 893 X(L)s 2737 Y 893 X(L)s 2695 Y 893 X(L)s 2653 Y 893 X(L)s 2611 Y 893 X(L)s 2569 Y 893 X(L)s 2527 Y 893 X(L)s 2485 Y 893 X(L)s 2443 Y 893 X(L)s 2401 Y 893 X(L)s 300 X 2836 Y 1083 X(L)s 2795 Y 1083 X(L)s 2753 Y 1083 X(L)s 2711 Y 1083 X(L)s 2669 Y 1083 X(L)s 2627 Y 1083 X(L)s 2585 Y 1083 X(L)s 2543 Y 1083 X(L)s 2501 Y 1083 X(L)s 2459 Y 1083 X(L)s 300 X 2836 Y 1273 X(L)s 2821 Y 1273 X(L)s 2779 Y 1273 X(L)s 2737 Y 1273 X(L)s 2695 Y 1273 X(L)s 2653 Y 1273 X(L)s 2611 Y 1273 X(L)s 2569 Y 1273 X(L)s 2527 Y 1273 X(L)s 2485 Y 1273 X(L)s 2443 Y 1273 X(L)s 2401 Y 1273 X(L)s 300 X 2836 Y 1463 X(L)s 2795 Y 1463 X(L)s 2753 Y 1463 X(L)s 2711 Y 1463 X(L)s 2669 Y 1463 X(L)s 2627 Y 1463 X(L)s 2585 Y 1463 X(L)s 2543 Y 1463 X(L)s 2501 Y 1463 X(L)s 2459 Y 1463 X(L)s 300 X 2836 Y 1643 X(L)s 2821 Y 1643 X(L)s 2779 Y 1643 X(L)s 2737 Y 1643 X(L)s 2695 Y 1643 X(L)s 2653 Y 1643 X(L)s 2611 Y 1643 X(L)s 2569 Y 1643 X(L)s 2527 Y 1643 X(L)s 2485 Y 1643 X(L)s 2443 Y 1643 X(L)s 2401 Y 1643 X(L)s 300 X 2911 Y 0.0 11 11 f.R ft 322(T)S 347(a)S 368(b)S 391(l)S 404(e)S 436(2:)S 494(P)S 518(e)S 539(r)S 554(f)S 569(o)S 592(r)S 607(m)S 642(a)S 663(n)S 686(c)S 707(e)S 739(in)S 786(mega)S 886 X(\257)s 911(ops)S 986(of)S 1035(symmetric)S 1240(inde)S 1320 X(\256)s 1345(nite)S 1426(f)S 1441(a)S 1462(c)S 1483(t)S 1496(o)S 1519(r)S 1534(i)S 1547(z)S 1568(a)S 1589(t)S 1602(i)S 1615(o)S 1638(n)S 1661(s)S 1690(\(Cray)S 1806(2,)S 1851(1)S 1885(p)S 1908(r)S 1923(o)S 1946(c)S 1967(e)S 1988(s)S 2006(s)S 2024(o)S 2047(r)S 2062(\))S 3300 Y showpage PageState23724 restore %%PageFonts: Times-Roman Times-Italic Symbol BracketFont %%Page: label 5 %%PageFonts: (atend) /PageState23724 save def home %%IncludeFont: Times-Roman /Times-Roman /Times-Roman-8 roman-8-mappings AddRoman-8 /f.R /Times-Roman-8 findfont def 0.0 11 11 f.R ft 1.0000 setlinewidth 300 X 200 Y 1165 X 0.0 10 10 f.R ft(- 5 -)s 300 X 350 Y 0.0 11 11 f.R ft(The)s 384(QR)S 460(d)S 483(e)S 504(c)S 525(o)S 548(m)S 583(p)S 606(o)S 629(s)S 647(i)S 660(t)S 673(i)S 686(o)S 709(n)S 732(,)S 755(used)S 852(in)S 901(solving)S 1050(linear)S 1169(least)S 1268(squares)S 1420(problems,)S 1615(factors)S 1754(a)S 1788(matrix)S %%IncludeFont: Times-Italic /Times-Italic /Times-Italic-8 roman-8-mappings AddRoman-8 /f.I /Times-Italic-8 findfont def 0.0 11 11 f.I ft 1921(A)S 0.0 11 11 f.R ft 1969(as)S 0.0 11 11 f.I ft 2021(QR)S 0.0 11 11 f.R ft 2089(,)S 300 X 400 Y(where)s 0.0 11 11 f.I ft 427(Q)S 0.0 11 11 f.R ft 481(is)S 526(orthogonal)S 740(and)S 0.0 11 11 f.I ft 821(R)S 0.0 11 11 f.R ft 870(is)S 915(upper)S 1034(triangular.)S 1249(The)S 1334(matrix)S 0.0 11 11 f.I ft 1467(Q)S 0.0 11 11 f.R ft 1520(is)S 1564(a)S 1598(product)S 1752(of)S 0.0 11 11 f.I ft 1803(n)S 1833 X %%IncludeFont: Symbol /f.S /Symbol findfont def 0.0 11 11 f.S ft(-)s 0.0 11 11 f.R ft 1858(1)S 1894(e)S 1915(l)S 1928(e)S 1949(m)S 1984(e)S 2005(n)S 2028(t)S 2041(a)S 2062(r)S 2077(y)S 300 X 450 Y(re)s 336 X(\257)s 361(ectors)S 507(\(or)S 595(Householder)S 866(t)S 879(r)S 894(a)S 915(n)S 938(s)S 956(f)S 971(o)S 994(r)S 1009(m)S 1044(a)S 1065(t)S 1078(i)S 1091(o)S 1114(n)S 1137(s)S 1155(\))S 0.0 11 11 f.I ft 1206(Q)S 1246 X(=)s 1277(H)S 465 Y 0.0 8 8 f.R ft 1315(1)S 450 Y 0.0 11 11 f.I ft 1332(H)S 465 Y 0.0 8 8 f.R ft 1370(2)S 437 Y 0.0 11 11 f.I ft 1387( . . . )S 450 Y 1464(H)S 465 Y 0.0 8 8 f.I ft 1497(n)S 1519 X 0.0 8 8 f.S ft(-)s 0.0 8 8 f.R ft 1537(1)S 450 Y 0.0 11 11 f.R ft 1554(,)S 1601(where)S 0.0 11 11 f.I ft 1750(H)S 465 Y 0.0 8 8 f.I ft 1783(i)S 450 Y 1800 X 0.0 11 11 f.I ft(=)s 1831(I)S 1853 X 0.0 11 11 f.S ft(-)s 1878 X(t)s 465 Y 0.0 8 8 f.I ft 1898(i)S 450 Y 0.0 11 11 f.I ft 1915(v)S 465 Y 0.0 8 8 f.I ft 1936(i)S 450 Y 0.0 11 11 f.I ft 1953(v)S 465 Y 0.0 8 8 f.I ft 1974(i)S 423 Y 1979(T)S 450 Y 0.0 11 11 f.R ft 2033(and)S 300 X 503 Y 0.0 11 11 f.I ft(v)s 518 Y 0.0 8 8 f.I ft 321(i)S 503 Y 338 X 0.0 11 11 f.I ft(=)s 0.0 11 11 f.R ft 369([0)S 490 Y 0.0 11 11 f.I ft 407( . . . )S 503 Y 0.0 11 11 f.R ft 484(0)S 514(1)S 0.0 11 11 f.I ft 544(x)S 476 Y 0.0 8 8 f.I ft 570(T)S 503 Y 0.0 11 11 f.R ft 595(])S 476 Y 0.0 8 8 f.I ft 610(T)S 503 Y 0.0 11 11 f.R ft 635(.)S 671(In)S 723(the)S 794(unblocked)S 1001(QR)S 1079(d)S 1102(e)S 1123(c)S 1144(o)S 1167(m)S 1202(p)S 1225(o)S 1248(s)S 1266(i)S 1279(t)S 1292(i)S 1305(o)S 1328(n)S 1351(,)S 1376(a)S 1410(re)S 1446 X(\257)s 1471(ector)S 0.0 11 11 f.I ft 1577(H)S 518 Y 0.0 8 8 f.I ft 1610(i)S 503 Y 0.0 11 11 f.R ft 1640(is)S 1684(computed)S 1879(at)S 1926(each)S 2025(step)S 300 X 556 Y(and)s 378(then)S 469(applied)S 617(to)S 665(the)S 734(matrix)S 0.0 11 11 f.I ft 866(A)S 0.0 11 11 f.R ft 901(.)S 935(A)S 980(block)S 1095(form)S 1195(of)S 1245(the)S 1314(QR)S 1390(d)S 1413(e)S 1434(c)S 1455(o)S 1478(m)S 1513(p)S 1536(o)S 1559(s)S 1577(i)S 1590(t)S 1603(i)S 1616(o)S 1639(n)S 1674(is)S 1717(obtained)S 1889(by)S 1947(combin)S 2085 X(-)s 300 X 606 Y(ing)s 375(several)S 523(elementary)S 745(Householder)S 997(matrices)S 1170(into)S 1257(a)S 1293(block)S 1411(Householder)S 1662(matrix.)S 1819(The)S 1906(product)S 2062(of)S 300 X 656 Y(elementary)s 517(matrices)S 685(can)S 761(be)S 816(written)S 958(as)S 300 X 833 Y 961 X 0.0 8 8 f.I ft(i)s 976 X(=)s 0.0 8 8 f.R ft 998(1)S 780 Y 962 X 0.0 16 16 f.S ft(P)s 701 Y 0.0 8 8 f.I ft 980(k)S 767 Y 0.0 11 11 f.R ft 1015(\()S 0.0 11 11 f.I ft 1030(I)S 1052 X 0.0 11 11 f.S ft(-)s 1077 X(t)s 782 Y 0.0 8 8 f.I ft 1097(i)S 767 Y 0.0 11 11 f.I ft 1114(v)S 782 Y 0.0 8 8 f.I ft 1135(i)S 767 Y 0.0 11 11 f.I ft 1152(v)S 782 Y 0.0 8 8 f.I ft 1173(i)S 740 Y 1190(T)S 767 Y 0.0 11 11 f.R ft 1215(\))S 0.0 11 11 f.I ft 1230( )S 1241 X(=)s 1272( I)S 1305 X 0.0 11 11 f.S ft(-)s 0.0 11 11 f.I ft 1330(VSV)S 740 Y 0.0 8 8 f.I ft 1414(T)S 300 X 896 Y 0.0 11 11 f.R ft(where)s 0.0 11 11 f.I ft 427(V)S 462 X(=)s 0.0 11 11 f.R ft 493([)S 0.0 11 11 f.I ft 508(v)S 911 Y 0.0 8 8 f.R ft 534(1)S 896 Y 0.0 11 11 f.I ft 551(v)S 911 Y 0.0 8 8 f.R ft 577(2)S 883 Y 0.0 11 11 f.I ft 594( . . . )S 896 Y 671(v)S 911 Y 0.0 8 8 f.I ft 692(k)S 896 Y 0.0 11 11 f.R ft 714(])S 743(and)S 0.0 11 11 f.I ft 824(S)S 0.0 11 11 f.R ft 868(is)S 913(a)S 0.0 11 11 f.I ft 948(k)S 976 X 0.0 11 11 f.S ft(\264)s 0.0 11 11 f.I ft 1001(k)S 0.0 11 11 f.R ft 1043(upper)S 1162(triangular)S 1356(matrix.)S 1513(Some)S 1632(extra)S 1740(work)S 1849(is)S 1895(required)S 2064(to)S 300 X 949 Y(compute)s 0.0 11 11 f.I ft 476(S)S 0.0 11 11 f.R ft 506(,)S 534(so)S 592(the)S 666(optimal)S 824(block)S 944(size)S 1034(is)S 1082(usually)S 1232(smaller)S 1384(than)S 1480(for)S 0.0 11 11 f.I ft 1549(LU)S 0.0 11 11 f.R ft 1630(or)S 1684(Cholesky.)S 1897(In)S 1951(order)S 2064(to)S 300 X 999 Y(obtain)s 432(a)S 469(fair)S 549(comparison)S 780(of)S 834(the)S 907(variants,)S 1081(we)S 1152(always)S 1298(use)S 1377(the)S 1451(operation)S 1643(count)S 1763(for)S 1833(the)S 1907(u)S 1930(n)S 1953(b)S 1976(l)S 1989(o)S 2012(c)S 2033(k)S 2056(e)S 2077(d)S 300 X 1049 Y(algorithm.)s 516(T)S 541(a)S 562(b)S 585(l)S 598(e)S 634(3)S 672(shows)S 802(the)S 874(performance)S 1122(in)S 1173(mega)S 1273 X(\257)s 1298(ops)S 1377(of)S 1430(four)S 1521(variants)S 1683(of)S 1736(the)S 1807(QR)S 1885(decomposi)S 2085 X(-)s 300 X 1099 Y(tion)s 383(on)S 441(one)S 520(processor)S 709(of)S 759(a)S 792(Cray)S 894(2.)S 951(The)S 1035(two)S 1116(block)S 1231(variants)S 1390(are)S 1459(SQRR,)S 1602(a)S 1635(block)S 1750(r)S 1765(i)S 1778(g)S 1801(h)S 1824(t)S 1837(-)S 1852(l)S 1865(o)S 1888(o)S 1911(k)S 1934(i)S 1947(n)S 1970(g)S 2005(algo)S 2085 X(-)s 300 X 1149 Y(rithm)s 415(in)S 466(which)S 594(a)S 630(block)S 748(Householder)S 999(matrix)S 1134(is)S 1180(computed)S 1377(and)S 1459(immediately)S 1705(applied)S 1857(to)S 1908(the)S 1980(rest)S 2062(of)S 300 X 1199 Y(the)s 375(matrix)S 513(as)S 570(a)S 609(rank-)S 0.0 11 11 f.I ft 706(k)S 0.0 11 11 f.R ft 752(update,)S 905(and)S 990(SQRL,)S 1136(a)S 1175(block)S 1296(left-looking)S 1532(variant)S 1679(in)S 1733(which)S 1865(the)S 1941(p)S 1964(r)S 1979(e)S 2000(v)S 2023(i)S 2036(o)S 2059(u)S 2082(s)S 300 X 1249 Y(updates)s 455(are)S 525 X(\256)s 550(rst)S 609(applied)S 758(to)S 806(the)S 875(current)S 1018(block)S 1133(column)S 1283(before)S 1413(the)S 1482(next)S 1574(block)S 1689(Householder)S 1937(matrix)S 2069(is)S 300 X 1299 Y(computed.)s 518(SGEQR2)S 705(is)S 750(the)S 821(unblocked)S 1028(Level)S 1148(2)S 1185(BLAS)S 1316(variant)S 1459(and)S 1540(SQRDC)S 1707(is)S 1752(the)S 1824(Level)S 1945(1)S 1983(BLAS)S 300 X 1349 Y(variant)s 442(from)S 543(L)S 571(I)S 586(N)S 619(P)S 643(A)S 676(C)S 707(K)S 740(.)S 786(W)S 827(e)S 861(see)S 934(that)S 1017(the)S 1087(blocked)S 1247(variants)S 1407(only)S 1501(surpass)S 1649(the)S 1718(unblocked)S 1923(variant)S 2064(in)S 300 X 1399 Y(performance)s 544(for)S 608(matrices)S 776(of)S 825(order)S 933(greater)S 1073(than)S 1164(200.)S 300 X 1483 Y 718 X 0.0 10 10 f.S ft(_)s 736 X(_)s 757 X(_)s 778 X(_)s 799 X(_)s 820 X(_)s 841 X(_)s 862 X(_)s 883 X(_)s 904 X(_)s 925 X(_)s 946 X(_)s 967 X(_)s 988 X(_)s 1009 X(_)s 1030 X(_)s 1051 X(_)s 1072 X(_)s 1093 X(_)s 1114 X(_)s 1135 X(_)s 1156 X(_)s 1177 X(_)s 1198 X(_)s 1219 X(_)s 1240 X(_)s 1261 X(_)s 1282 X(_)s 1303 X(_)s 1324 X(_)s 1345 X(_)s 1366 X(_)s 1387 X(_)s 1408 X(_)s 1429 X(_)s 1450 X(_)s 1471 X(_)s 1492 X(_)s 1513 X(_)s 1534 X(_)s 1555 X(_)s 1576 X(_)s 1597 X(_)s 1618 X(_)s 1639 X(_)s 1660 X(_)s 300 X 1533 Y 1220 X 0.0 10 10 f.R ft(Matrix)s 1345(size)S 1421(M)S 1468(=)S 1502(N)S 300 X 1542 Y 1062 X 0.0 10 10 f.S ft(_)s 1072 X(_)s 1093 X(_)s 1114 X(_)s 1135 X(_)s 1156 X(_)s 1177 X(_)s 1198 X(_)s 1219 X(_)s 1240 X(_)s 1261 X(_)s 1282 X(_)s 1303 X(_)s 1324 X(_)s 1345 X(_)s 1366 X(_)s 1387 X(_)s 1408 X(_)s 1429 X(_)s 1450 X(_)s 1471 X(_)s 1492 X(_)s 1513 X(_)s 1534 X(_)s 1555 X(_)s 1576 X(_)s 1597 X(_)s 1618 X(_)s 1639 X(_)s 1660 X(_)s 300 X 1562 Y 739 X 0.0 10 10 f.R ft(QR)s 807(variant)S 1592 Y 1093 X(1)s 1114(0)S 1135(0)S 1219(2)S 1240(0)S 1261(0)S 1345(3)S 1366(0)S 1387(0)S 1471(4)S 1492(0)S 1513(0)S 1597(5)S 1618(0)S 1639(0)S 300 X 1597 Y 718 X 0.0 10 10 f.S ft(_)s 736 X(_)s 757 X(_)s 778 X(_)s 799 X(_)s 820 X(_)s 841 X(_)s 862 X(_)s 883 X(_)s 904 X(_)s 925 X(_)s 946 X(_)s 967 X(_)s 988 X(_)s 1009 X(_)s 1030 X(_)s 1051 X(_)s 1072 X(_)s 1093 X(_)s 1114 X(_)s 1135 X(_)s 1156 X(_)s 1177 X(_)s 1198 X(_)s 1219 X(_)s 1240 X(_)s 1261 X(_)s 1282 X(_)s 1303 X(_)s 1324 X(_)s 1345 X(_)s 1366 X(_)s 1387 X(_)s 1408 X(_)s 1429 X(_)s 1450 X(_)s 1471 X(_)s 1492 X(_)s 1513 X(_)s 1534 X(_)s 1555 X(_)s 1576 X(_)s 1597 X(_)s 1618 X(_)s 1639 X(_)s 1660 X(_)s 1605 Y 718 X(_)s 736 X(_)s 757 X(_)s 778 X(_)s 799 X(_)s 820 X(_)s 841 X(_)s 862 X(_)s 883 X(_)s 904 X(_)s 925 X(_)s 946 X(_)s 967 X(_)s 988 X(_)s 1009 X(_)s 1030 X(_)s 1051 X(_)s 1072 X(_)s 1093 X(_)s 1114 X(_)s 1135 X(_)s 1156 X(_)s 1177 X(_)s 1198 X(_)s 1219 X(_)s 1240 X(_)s 1261 X(_)s 1282 X(_)s 1303 X(_)s 1324 X(_)s 1345 X(_)s 1366 X(_)s 1387 X(_)s 1408 X(_)s 1429 X(_)s 1450 X(_)s 1471 X(_)s 1492 X(_)s 1513 X(_)s 1534 X(_)s 1555 X(_)s 1576 X(_)s 1597 X(_)s 1618 X(_)s 1639 X(_)s 1660 X(_)s 300 X 1651 Y 739 X 0.0 10 10 f.R ft(SQRR)s 858(\(NB)S 940(=)S 974(3)S 995(2)S 1016(\))S 1093(1)S 1114(0)S 1135(6)S 1219(2)S 1240(0)S 1261(9)S 1345(2)S 1366(6)S 1387(9)S 1471(3)S 1492(0)S 1513(6)S 1597(3)S 1618(2)S 1639(8)S 300 X 1701 Y 739 X(SQRL)s 855(\(NB)S 937(=)S 971(4)S 992(8)S 1013(\))S 1093(1)S 1114(0)S 1135(2)S 1219(1)S 1240(9)S 1261(8)S 1345(2)S 1366(5)S 1387(8)S 1471(2)S 1492(9)S 1513(3)S 1597(3)S 1618(1)S 1639(6)S 300 X 1751 Y 739 X(SGEQR2)s 1093 X(1)s 1114(4)S 1135(4)S 1219(2)S 1240(1)S 1261(5)S 1345(2)S 1366(4)S 1387(2)S 1471(2)S 1492(5)S 1513(1)S 1597(2)S 1618(5)S 1639(5)S 300 X 1801 Y 739 X(SQRDC)s 1114 X(24)s 1240 X(41)s 1366 X(55)s 1492 X(66)s 1618 X(76)s 300 X 1810 Y 718 X 0.0 10 10 f.S ft(_)s 736 X(_)s 757 X(_)s 778 X(_)s 799 X(_)s 820 X(_)s 841 X(_)s 862 X(_)s 883 X(_)s 904 X(_)s 925 X(_)s 946 X(_)s 967 X(_)s 988 X(_)s 1009 X(_)s 1030 X(_)s 1051 X(_)s 1072 X(_)s 1093 X(_)s 1114 X(_)s 1135 X(_)s 1156 X(_)s 1177 X(_)s 1198 X(_)s 1219 X(_)s 1240 X(_)s 1261 X(_)s 1282 X(_)s 1303 X(_)s 1324 X(_)s 1345 X(_)s 1366 X(_)s 1387 X(_)s 1408 X(_)s 1429 X(_)s 1450 X(_)s 1471 X(_)s 1492 X(_)s 1513 X(_)s 1534 X(_)s 1555 X(_)s 1576 X(_)s 1597 X(_)s 1618 X(_)s 1639 X(_)s 1660 X(_)s 300 X 718 X %%IncludeFont: BracketFont /f.S2 /BracketFont findfont def 0.0 10 10 f.S2 ft(L)s 1777 Y 718 X(L)s 1735 Y 718 X(L)s 1693 Y 718 X(L)s 1651 Y 718 X(L)s 1609 Y 718 X(L)s 1567 Y 718 X(L)s 1525 Y 718 X(L)s 300 X 1810 Y 1061 X(L)s 1777 Y 1061 X(L)s 1735 Y 1061 X(L)s 1693 Y 1061 X(L)s 1651 Y 1061 X(L)s 1609 Y 1061 X(L)s 1567 Y 1061 X(L)s 1525 Y 1061 X(L)s 300 X 1810 Y 1681 X(L)s 1777 Y 1681 X(L)s 1735 Y 1681 X(L)s 1693 Y 1681 X(L)s 1651 Y 1681 X(L)s 1609 Y 1681 X(L)s 1567 Y 1681 X(L)s 1525 Y 1681 X(L)s 300 X 1885 Y 569 X 0.0 10 10 f.R ft(T)s 591(a)S 610(b)S 631(l)S 643(e)S 672(3:)S 725(P)S 747(e)S 766(r)S 780(f)S 794(o)S 815(r)S 829(m)S 861(a)S 880(n)S 901(c)S 920(e)S 949(in)S 992(mega)S 1083 X(\257)s 1106(ops)S 1174(of)S 0.0 11 11 f.I ft 1219(QR)S 0.0 10 10 f.R ft 1297(variants)S 1441(\(Cray)S 1547(2-S,)S 1625(1)S 1656(p)S 1677(r)S 1691(o)S 1712(c)S 1731(e)S 1750(s)S 1766(s)S 1782(o)S 1803(r)S 1817(\))S 300 X 1985 Y(Block)s 413(algorithms)S 605(have)S 697(also)S 777(been)S 869(developed)S 1055(for)S 1116(the)S 1180(Cholesky)S 1351(and)S 0.0 11 11 f.I ft 1424(LU)S 0.0 10 10 f.R ft 1501(f)S 1515(a)S 1534(c)S 1553(t)S 1565(o)S 1586(r)S 1600(i)S 1612(z)S 1631(a)S 1650(t)S 1662(i)S 1674(o)S 1695(n)S 1716(s)S 1744(of)S 1791(band)S 1885(matrices)S 2041([7].)S 300 X 2035 Y(The)s 378(idea)S 462(is)S 503(to)S 549(factor)S 661(a)S 693(small)S 797(diagonal)S 956(block)S 1063(using)S 1167(an)S 1220(unblocked)S 1409(algorithm,)S 1596(and)S 1670(then)S 1756(to)S 1801(update)S 1926(the)S 1990(m)S 2022(a)S 2041(t)S 2053(r)S 2067(i)S 2079(x)S 300 X 2085 Y(within)s 419(the)S 482(band)S 576(using)S 679(Level)S 787(3)S 820(BLAS.)S 958(The)S 1035(use)S 1103(of)S 1150(Level)S 1258(3)S 1291(BLAS)S 1409(in)S 1454(the)S 1518(f)S 1532(a)S 1551(c)S 1570(t)S 1582(o)S 1603(r)S 1617(i)S 1629(z)S 1648(a)S 1667(t)S 1679(i)S 1691(o)S 1712(n)S 1745(of)S 1792(band)S 1886(matrices)S 2041(can)S 300 X 2135 Y(improve)s 452(the)S 516(performance)S 741(of)S 788(the)S 852(f)S 866(a)S 885(c)S 904(t)S 916(o)S 937(r)S 951(i)S 963(z)S 982(a)S 1001(t)S 1013(i)S 1025(o)S 1046(n)S 1079(as)S 1126(long)S 1212(as)S 1258(the)S 1321(bandwidth)S 1510(is)S 1549(not)S 1614(too)S 1679(small.)S 1801(Other)S 1908(block)S 2013(algo)S 2086 X(-)s 300 X 2185 Y(rithms)s 422(in)S 470(L)S 495(A)S 525(P)S 547(A)S 577(C)S 605(K)S 650(include)S 790(the)S 858(routines)S 1010(for)S 1075(reducing)S 1239(a)S 1274(general)S 1415(rectangular)S 1622(matrix)S 1748(to)S 1797(Hessenberg)S 2009(form,)S 300 X 2235 Y(reducing)s 466(a)S 503(symmetric)S 698(matrix)S 826(to)S 877(tridiagonal)S 1078(form,)S 1186(generating)S 1382(and)S 1460(multiplying)S 1674(by)S 1733(an)S 1790(orthogonal)S 1990(m)S 2022(a)S 2041(t)S 2053(r)S 2067(i)S 2079(x)S 300 X 2285 Y(represented)s 505(as)S 550(a)S 579(product)S 719(of)S 765(Householder)S 991(t)S 1003(r)S 1017(a)S 1036(n)S 1057(s)S 1073(f)S 1087(o)S 1108(r)S 1122(m)S 1154(a)S 1173(t)S 1185(i)S 1197(o)S 1218(n)S 1239(s)S 1255(,)S 1276(and)S 1348(computing)S 1539(the)S 1602(inverse)S 1735(of)S 1781(a)S 1811(square)S 1932(nonsingu)S 2086 X(-)s 300 X 2335 Y(lar)s 358(matrix.)S 491(The)S 569(reductions)S 758(follow)S 881(the)S 946(blocking)S 1107(strategy)S 1254(outlined)S 1406(in)S 1452([8],)S 1524(and)S 1598(as)S 1646(for)S 1708(the)S 1772(QR)S 1842(d)S 1863(e)S 1882(c)S 1901(o)S 1922(m)S 1954(p)S 1975(o)S 1996(s)S 2012(i)S 2024(t)S 2036(i)S 2048(o)S 2069(n)S 2090(,)S 300 X 2385 Y(extra)s 396(work)S 493(is)S 532(required)S 684(to)S 728(combine)S 884(the)S 948(updates)S 1089(to)S 1134(use)S 1202(Level)S 1310(3)S 1343(BLAS.)S 1481(As)S 1539(a)S 1570(result,)S 1686(improvements)S 1938(in)S 1983(perfor)S 2086 X(-)s 300 X 2435 Y(mance)s 422(compared)S 600(to)S 645(the)S 709(unblocked)S 896(routines)S 1043(are)S 1106(typically)S 1266(observed)S 1429(only)S 1515(for)S 1575(relatively)S 1747(large)S 1843(matrices)S 1997(\()S 2011(o)S 2032(r)S 2046(d)S 2067(e)S 2086(r)S 300 X 2485 Y(300)s 373(or)S 418(higher\).)S 570(F)S 592(o)S 613(r)S 637(each)S 725(f)S 739(a)S 758(c)S 777(t)S 789(o)S 810(r)S 824(i)S 836(z)S 855(a)S 874(t)S 886(i)S 898(o)S 919(n)S 950(or)S 996(reduction)S 1167(we)S 1227(have)S 1318(considered,)S 1522(the)S 1585(percentage)S 1780(of)S 1826(BLAS)S 1943(3)S 1975(work)S 2072(is)S 300 X 2535 Y(the)s 368(same)S 470(in)S 519(the)S 586(di)S 619 X /Cff{ (f) show xsiz pt 20 div neg 0 rmoveto (f) s }bind def Cff 645(erent)S 745(block)S 854(variants,)S 1013(but)S 1082(this)S 1158(is)S 1201(not)S 1270(the)S 1337(only)S 1427(c)S 1446(o)S 1467(n)S 1488(s)S 1504(i)S 1516(d)S 1537(e)S 1556(r)S 1570(a)S 1589(t)S 1601(i)S 1613(o)S 1634(n)S 1655(.)S 1690(The)S 1770(performance)S 1998(of)S 2048(the)S 300 X 2585 Y(Level)s 407(3)S 439(BLAS)S 556(routines)S 703(is)S 742(dependent)S 927(on)S 980(the)S 1043(matrix)S 1164(shapes)S 1287(on)S 1340(which)S 1454(they)S 1538(operate,)S 1684(and)S 1756(in)S 1800(a)S 1830(block)S 1936(a)S 1955(l)S 1967(g)S 1988(o)S 2009(r)S 2023(i)S 2035(t)S 2047(h)S 2068(m)S 300 X 2635 Y(one)s 372(of)S 418(the)S 481(dimensions)S 683(is)S 722(always)S 850(on)S 903(the)S 966(order)S 1066(of)S 1112(the)S 1175(blocksize.)S 1366(Small)S 1475(blocksizes)S 1662(will)S 1739(result)S 1844(in)S 1887(p)S 1908(e)S 1927(r)S 1941(f)S 1955(o)S 1976(r)S 1990(m)S 2022(a)S 2041(n)S 2062(c)S 2081(e)S 300 X 2685 Y(similar)s 433(to)S 482(the)S 550(unblocked)S 742(Level)S 854(2)S 891(BLAS)S 1013(algorithms,)S 1219(for)S 1284(which)S 1403(the)S 1472(choice)S 1600(of)S 1652(variant)S 1787(is)S 1832(highly)S 1957(m)S 1989(a)S 2008(c)S 2027(h)S 2048(i)S 2060(n)S 2081(e)S 300 X 2735 Y(dependent.)s 505(However,)S 680(blocking)S 839(tends)S 939(to)S 983(smooth)S 1117(out)S 1182(the)S 1245(di)S 1278 X Cff 1304(erences)S 1442(in)S 1486(the)S 1548(Level)S 1654(2)S 1685(BLAS)S 1801(algorithms,)S 2001(so)S 2048(the)S 300 X 2785 Y(choice)s 427(of)S 478(a)S 513(block)S 623(variant)S 757(on)S 815(computers)S 1006(such)S 1099(as)S 1151(Crays)S 1266(is)S 1311(not)S 1382(so)S 1436(critical.)S 1592(Although)S 1768(we)S 1834(are)S 1903(c)S 1922(o)S 1943(n)S 1964(s)S 1980(i)S 1992(d)S 2013(e)S 2032(r)S 2046(i)S 2058(n)S 2079(g)S 300 X 2835 Y(di)s 333 X Cff 359(erent)S 456(block)S 562(variants)S 708(now,)S 802(the)S 866(public)S 984(release)S 1114(of)S 1161(L)S 1186(A)S 1216(P)S 1238(A)S 1268(C)S 1296(K)S 1337(in)S 1381(1991)S 1476(will)S 1553(contain)S 1689(only)S 1775(one)S 1847(variant)S 1976(of)S 2022(each)S 300 X 2885 Y(algorithm.)s 487(F)S 509(o)S 530(r)S 557(shared)S 680(memory)S 833(machines,)S 1016(our)S 1086(current)S 1220(policy)S 1340(is)S 1382(to)S 1429(choose)S 1560(the)S 1626(unblocked)S 1816(variant)S 1948(based)S 2058(on)S 300 X 2935 Y(the)s 364(m)S 396(a)S 415(t)S 427(r)S 441(i)S 453(x)S 474(-)S 488(v)S 509(e)S 528(c)S 547(t)S 559(o)S 580(r)S 606(multiply)S 761(and)S 834(the)S 898(blocked)S 1044(variant)S 1174(for)S 1235(which)S 1349(the)S 1412(dominant)S 1582(Level)S 1689(3)S 1721(BLAS)S 1838(operation)S 2009(is)S 2048(the)S 300 X 2985 Y(matrix-matrix)s 546(multiply.)S 721(F)S 743(o)S 764(r)S 790(distributed)S 983(memory)S 1134(machines,)S 1315(we)S 1377(expect)S 1501(the)S 1566(r)S 1580(i)S 1592(g)S 1613(h)S 1634(t)S 1646(-)S 1660(l)S 1672(o)S 1693(o)S 1714(k)S 1735(i)S 1747(n)S 1768(g)S 1802(variants)S 1949(based)S 2058(on)S 300 X 3035 Y(the)s 364(rank-)S 0.0 11 11 f.I ft 453(k)S 0.0 10 10 f.R ft 493(update)S 618(to)S 663(be)S 715(most)S 808(useful.)S 943(F)S 965(u)S 986(r)S 1000(t)S 1012(h)S 1033(e)S 1052(r)S 1078(data)S 1161(is)S 1201(needed)S 1333(before)S 1453(a)S 1483 X(\256)s 1506(nal)S 1569(decision)S 1721(can)S 1791(be)S 1842(made)S 1944(on)S 1997(which)S 3300 Y showpage PageState23724 restore %%PageFonts: Times-Roman Times-Italic Symbol BracketFont %%Page: label 6 %%PageFonts: (atend) /PageState23724 save def home %%IncludeFont: Times-Roman /Times-Roman /Times-Roman-8 roman-8-mappings AddRoman-8 /f.R /Times-Roman-8 findfont def 0.0 10 10 f.R ft 1.0000 setlinewidth 300 X 200 Y 1165 X(- 6 -)s 300 X 350 Y(variants)s 444(will)S 520(give)S 603(the)S 665(best)S 743(performance)S 966(over)S 1051(the)S 1113(widest)S 1233(range)S 1337(of)S 1382(h)S 1403(i)S 1415(g)S 1436(h)S 1457(-)S 1471(p)S 1492(e)S 1511(r)S 1525(f)S 1539(o)S 1560(r)S 1574(m)S 1606(a)S 1625(n)S 1646(c)S 1665(e)S 1694(c)S 1713(o)S 1734(m)S 1766(p)S 1787(u)S 1808(t)S 1820(e)S 1839(r)S 1853(s)S 1869(.)S 300 X 450 Y 1070 X(R)s 1098(E)S 1123(F)S 1146(E)S 1171(R)S 1199(E)S 1224(N)S 1254(C)S 1282(E)S 1307(S)S 300 X 500 Y(C.)s 358(Bischof,)S 519(J.)S 565(Demmel,)S 739(J.)S 786(Dongarra,)S 976(J.)S 1023(Du)S 1095(Croz,)S 1208(A.)S 1269(Greenbaum,)S 1496(S.)S 1550(Hammarling,)S 1793(and)S 1875(D.)S 1936(S)S 1959(o)S 1980(r)S 1994(e)S 2013(n)S 2034(s)S 2050(e)S 2069(n)S 2090(,)S 300 X 550 Y %%IncludeFont: Times-Italic /Times-Italic /Times-Italic-8 roman-8-mappings AddRoman-8 /f.I /Times-Italic-8 findfont def 0.0 10 10 f.I ft(L)s 323(A)S 348(P)S 372(A)S 397(C)S 425(K)S 465(W)S 498(o)S 519(r)S 535(k)S 554(i)S 566(n)S 587(g)S 620(Note)S 712(#5:)S 790(P)S 814(r)S 830(o)S 851(v)S 870(i)S 882(s)S 898(i)S 910(o)S 931(n)S 952(a)S 973(l)S 997(Contents)S 0.0 10 10 f.R ft 1147(,)S 1169(Argonne)S 1328(National)S 1486(Lab.,)S 1583(ANL-88-38,)S 1802(Sept.)S 1899(1988.)S 2015(C.)S 2065(L.)S 300 X 600 Y(Lawson,)s 457(R.)S 510(J.)S 551(Hanson,)S 704(D.)S 759(R.)S 812(Kincaid,)S 971(and)S 1047(F)S 1069(.)S 1094(T)S 1116(.)S 1142(Krogh,)S 0.0 10 10 f.I ft 1275(Basic)S 1384(Linear)S 1512(Algebra)S 1663(Subprograms)S 1904(for)S 1969(F)S 1993(o)S 2014(r)S 2030(t)S 2042(r)S 2058(a)S 2079(n)S 300 X 650 Y(Usage)s 0.0 10 10 f.R ft 407(,)S 428(ACM)S 534(T)S 556(r)S 570(a)S 589(n)S 610(s)S 626(.)S 647(Math.)S 757(Soft.,)S 858(5)S 890(\(Sept.)S 1000(1979\),)S 1118(pp.)S 1180(308-323.)S 1350(J.)S 1386(Dongarra,)S 1565(J.)S 1601(Du)S 1662(Croz,)S 1764(S.)S 1807(Hammarling,)S 2039(and)S 300 X 700 Y(R.)s 351(Hanson,)S 0.0 10 10 f.I ft 502(An)S 561(Extended)S 731(Set)S 796(of)S 842(F)S 866(o)S 887(r)S 903(t)S 915(r)S 931(a)S 952(n)S 986(Basic)S 1092(Linear)S 1218(Algebra)S 1367(Subprograms)S 0.0 10 10 f.R ft 1592(,)S 1616(ACM)S 1725(T)S 1747(r)S 1761(a)S 1780(n)S 1801(s)S 1817(.)S 1841(Math.)S 1954(Soft.,)S 2058(14)S 300 X 750 Y(\(Mar.)s 406(1988\),)S 526(pp.)S 590(1-17.)S 699(J.)S 737(Dongarra,)S 918(J.)S 956(Du)S 1019(Croz,)S 1123(I.)S 1159(Du)S 1210 X /Cff{ (f) show xsiz pt 20 div neg 0 rmoveto (f) s }bind def Cff 1236(,)S 1257(and)S 1329(S.)S 1373(Hammarling,)S 0.0 10 10 f.I ft 1606(A)S 1642(Set)S 1705(of)S 1749(Level)S 1852(3)S 1884(Basic)S 1988(L)S 2011(i)S 2023(n)S 2044(e)S 2063(a)S 2084(r)S 300 X 800 Y(Algebra)s 452(Subprograms)S 0.0 10 10 f.R ft 677(,)S 704(to)S 754(appear)S 884(in)S 934(ACM)S 1046(T)S 1068(r)S 1082(a)S 1101(n)S 1122(s)S 1138(.)S 1165(Math.)S 1281(Soft.,)S 1388(Mar.)S 1485(1990.)S 1606(G.)S 1663(Golub)S 1785(and)S 1863(C.)S 1919(V)S 1946(a)S 1965(n)S 2004(Loan,)S 300 X 850 Y 0.0 10 10 f.I ft(Matrix)s 426(Computations)S 0.0 10 10 f.R ft 662(,)S 683(Johns)S 789(Hopkins,)S 952(Baltimore,)S 1142(1989.)S 1257(D.)S 1308(Sorensen)S 1473(and)S 1544(C.)S 1592(V)S 1619(a)S 1638(n)S 1669(Loan,)S 1775(personal)S 1928(communi)S 2086 X(-)s 300 X 900 Y(cation.)s 439(P)S 461(.)S 486(Mayes)S 613(and)S 690(G.)S 746(Radicati,)S 0.0 10 10 f.I ft 914(L)S 937(A)S 962(P)S 986(A)S 1011(C)S 1039(K)S 1083(W)S 1116(o)S 1137(r)S 1153(k)S 1172(i)S 1184(n)S 1205(g)S 1242(Note)S 1338(#12:)S 1441(Banded)S 1585(Cholesky)S 1756(F)S 1780(a)S 1801(c)S 1820(t)S 1832(o)S 1853(r)S 1869(i)S 1881(z)S 1897(a)S 1918(t)S 1930(i)S 1942(o)S 1963(n)S 2000(Using)S 300 X 950 Y(Level)s 405(3)S 439(BLAS)S 0.0 10 10 f.R ft 533(,)S 556(Argonne)S 716(National)S 874(Lab.,)S 971(A)S 1001(N)S 1031(L)S 1056(/)S 1068(M)S 1105(C)S 1133(S)S 1156(-)S 1170(T)S 1195(M)S 1232(-)S 1246(1)S 1267(3)S 1288(4)S 1309(,)S 1331(Aug.)S 1425(1989.)S 1541(J.)S 1579(Dongarra,)S 1760(S.)S 1805(Hammarling,)S 2039(and)S 300 X 1000 Y(D.)s 350(Sorensen,)S 0.0 10 10 f.I ft 524(L)S 547(A)S 572(P)S 596(A)S 621(C)S 649(K)S 687(W)S 720(o)S 741(r)S 757(k)S 776(i)S 788(n)S 809(g)S 840(Note)S 930(#2:)S 996(Block)S 1103(Reduction)S 1285(of)S 1329(Matrices)S 1490(to)S 1534(Condensed)S 1732(F)S 1756(o)S 1777(r)S 1793(m)S 1823(s)S 1850(for)S 1910(E)S 1935(i)S 1947(g)S 1968(e)S 1987(n)S 2008(v)S 2027(a)S 2048(l)S 2060(u)S 2081(e)S 300 X 1050 Y(Computations)s 0.0 10 10 f.R ft 536(,)S 556(Argonne)S 713(National)S 869(Lab.,)S 964(A)S 994(N)S 1024(L)S 1049(/)S 1061(M)S 1098(C)S 1126(S)S 1149(-)S 1163(T)S 1188(M)S 1225(-)S 1239(9)S 1260(9)S 1281(,)S 1301(Sept.)S 1396(1987.)S 300 X 1150 Y %%IncludeFont: Times-Bold /Times-Bold /Times-Bold-8 roman-8-mappings AddRoman-8 /f.B /Times-Bold-8 findfont def 0.0 10 10 f.B ft(R)s 330(e)S 349(f)S 363(e)S 382(r)S 401(e)S 420(n)S 443(c)S 462(e)S 481(s)S 3300 Y showpage PageState23724 restore %%PageFonts: Times-Roman Times-Italic Times-Bold %%Page: label 7 %%PageFonts: (atend) /PageState23724 save def home %%IncludeFont: Times-Bold /Times-Bold /Times-Bold-8 roman-8-mappings AddRoman-8 /f.B /Times-Bold-8 findfont def 0.0 10 10 f.B ft 1.0000 setlinewidth PageState23724 restore %%Trailer %%DocumentFonts: Times-Bold Times-Roman Times-Italic BracketFont Symbol %%Pages: 6 DocState23724 restore %%EOF .