********************************************************************* * * Your init code comes here. * ********************************************************************* ;**************************************************************************** ;** ;* Speed-optimized chunky circledraw routine, A ;* ;* This is one of my contributions to #amycoders circledrawing-compo. This ;* routine will participate in speed-optimizing competition. This code works ;* with 68020+ processors, with any cache setup. ;* ;* This is a simple bresenham circle algorithm implementation with ;* optimizing drawcode generator for points of all circle sizes. ;* ;* Code size without rts: 448 bytes. ~520K buffer ;* Average time (dec): very fast (030@59.220MHz) ;* ;* Coded by Harry "Piru" Sintonen . ;* This code is copyright C 1997 by Harry Sintonen, and was written 24th ;* september & 11th october 1997. ;* ;**************************************************************************** ;* CNOP 0,4 _initcode a MAXRAD EQU 128 CITEMS EQU (MAXRAD-0)*8*4 (maxrad-minrad)*8*sizeof(long) NUMCIRCLES EQU 128 move.l #codetable,d0 clr.b d0 move.l d0,codetablep move.l d0,a0 moveq #0,d7 .loop move.l d7,d2 movem.l d7/a0,-(sp) bsr makecircle movem.l (sp)+,d7/a0 lea (CITEMS,a0),a0 addq.l #1,d7 cmp.w #NUMCIRCLES,d7 NUMCIRCLES circles bls.b .loop move.l (4).w,a6 jmp (-$27C,a6) CacheClearU CNOP 0,4 makecircle ; IN: d2.l=radius ; a0=codetable for this circle ; OUT: circle :) d2/d4-d7/a0-a1 trashed tst.l d2 bne.b .not_zerop move.w (coda,pc),(a0)+ move.l #$00004E75,(a0)+ rts .not_zerop move.l d2,d5 2 y=r (NOTE: .l!) add.l d2,d2 2 subq.l #3,d2 2 neg.l d2 2 d=3-(2*r) moveq #0,d4 2 x=0 move.l a0,a1 clr.l (a0)+ .loop addq.l #8,(a1) move.l d5,d6 2 lsl.l #8,d6 2 add.l d4,d6 2 move.l d6,(a0)+ ;move.b d3,0(a0,d6.l) 4 move.l d5,d6 2 lsl.l #8,d6 2 neg.l d6 2 add.l d4,d6 2 move.l d6,(a0)+ ;move.b d3,0(a0,d6.l) 4 move.l d5,d6 2 lsl.l #8,d6 2 sub.l d4,d6 2 move.l d6,(a0)+ ;move.b d3,0(a0,d6.l) 4 move.l d5,d6 2 lsl.l #8,d6 2 neg.l d6 2 sub.l d4,d6 2 move.l d6,(a0)+ ;move.b d3,0(a0,d6.l) 4 move.l d4,d6 2 lsl.l #8,d6 2 add.l d5,d6 2 move.l d6,(a0)+ ;move.b d3,0(a0,d6.l) 4 move.l d4,d6 2 lsl.l #8,d6 2 neg.l d6 2 add.l d5,d6 2 move.l d6,(a0)+ ;move.b d3,0(a0,d6.l) 4 move.l d4,d6 2 lsl.l #8,d6 2 sub.l d5,d6 2 move.l d6,(a0)+ ;move.b d3,0(a0,d6.l) 4 move.l d4,d6 2 lsl.l #8,d6 2 neg.l d6 2 sub.l d5,d6 2 move.l d6,(a0)+ ;move.b d3,0(a0,d6.l) 4 tst.l d2 2 bpl.b .skipa 2 move.l d4,d6 2 lsl.l #2,d6 2 addq.l #6,d6 2 add.l d6,d2 2 d=d+(4*x)+6 bra.b .skipb 2 .skipa move.l d4,d6 2 sub.l d5,d6 2 add.l d6,d6 2 addq.l #5,d6 2 add.l d6,d6 2 add.l d6,d2 2 d=d+4*(x-y)+10 subq.l #1,d5 2 y=y-1 .skipb addq.l #1,d4 2 x=x+1 cmp.l d4,d5 2 bhs .loop 2 move.l #$0BADF00D,(a0)+ These are here to ensure that move.l #$0CAEF00D,(a0)+ optimizer doesn't get endpart move.l #$0DAFF00D,(a0)+ wrong. move.l #$0EACF00D,(a0)+ ; offset table done... lets sort it (urgh) move.l a1,a0 a1=ptr to count move.l (a0)+,d0 bsr isort ; ok, offset table sorted now. lets strip clones. move.l a1,a0 bsr strip ; offset table sorted&stripped now. lets generate the code ; out of it. also generate move.w & move.l optimizations. lea coda,a2 movem.l (a2),d5-d7 swap d5 swap d6 swap d7 move.l a1,a0 move.l (a0)+,d4 .next ble.b .alldone move.l (a0),d0 movem.l (a0),d0-d3 subq.l #3,d3 cmp.l d0,d3 beq.b .put_long subq.l #1,d1 cmp.l d0,d1 beq.b .put_word .put_byte move.w d5,(a1)+ move.w d0,(a1)+ addq.l #4,a0 subq.l #1,d4 bra.b .next .put_long move.w d7,(a1)+ move.w d0,(a1)+ lea (16,a0),a0 subq.l #4,d4 bra.b .next .put_word move.w d6,(a1)+ move.w d0,(a1)+ addq.l #8,a0 subq.l #2,d4 bra.b .next .alldone move.w #$4E75,(a1) rts coda move.b d3,($100,a0) put one pixel move.w d3,($100,a0) put two pixels move.l d3,($100,a0) put four pixels CNOP 0,4 ; insertation-sort, yea this sucks, but who cares :) ; IN: a0=a, d0=n ; OUT: d1-d3 trashed isort moveq #1,d1 .oloop move.l d1,d2 .iloop move.l -4(a0,d2.l*4),d3 cmp.l 0(a0,d2.l*4),d3 blt.b .iskip move.l 0(a0,d2.l*4),-4(a0,d2.l*4) move.l d3,0(a0,d2.l*4) .iskip subq.l #1,d2 bne.b .iloop addq.l #1,d1 cmp.l d1,d0 bhi.b .oloop rts CNOP 0,4 ; IN: a0=table strip movem.l a0-a1,-(sp) move.l (a0)+,d0 move.l a0,a1 .loop move.l (a0)+,d1 .loop2 cmp.l (a0)+,d1 bne.b .normal subq.l #1,d0 bne.b .loop2 move.l d1,(a1)+ bra.b .done .normal subq.l #4,a0 move.l d1,(a1)+ subq.l #1,d0 bne.b .loop .done move.l (sp),a0 addq.l #4,a0 move.l a1,d0 sub.l a0,d0 lsr.l #2,d0 move.l d0,-(a0) movem.l (sp)+,a0-a1 rts b rts ********************************************************************* * * Your line routine. * Every time you are given: A0.L : Chunky Screen (256x256x8) * * d0.l/d1.l x/y * d2.l radius * d3.b color * * other registers are in undefined state !! * you may trash all regs except a7 * ********************************************************************* CNOP 0,8 _circle c ; IN: d0.l=xc, d1.l=yc, d2.l=radius, d3.b=color ; a0=chunky buffer (256x256x8) ; OUT: circle :) d0-d4/a0-a1 trashed move.l d3,d4 000c add.l d2,d2 d2=r*2 lsl.l #8,d3 00c0 lsl.l #8,d1 offs=yc*256 or.b d4,d3 00cc lsl.l #8,d2 d2=r*CITEMS/16 (=256) move.l d3,d4 00cc add.l d0,d1 offs=yc*256+xc swap d3 cc00 lea $BADF00D,a1 codetablep EQU *-4 move.w d4,d3 cccc add.l d1,a0 a0=circle center jmp 0(a1,d2.l*8) draw circle (codetable+r*CITEMS) d rts ; Comment this out on Asmone for auto length-output ; printt "Length of your code:" ; printv ((b-a)+(d-c)) ;len dc.l ((b-a)+(d-c)) ********************************************************************** * * Put your tables here * ********************************************************************** section yourbss,bss ds.b 256 codetable ds.b CITEMS*(NUMCIRCLES+1) .