*********************************************************************
*
* Your init code comes here.
*
*********************************************************************

;****************************************************************************
;**
;*       Speed-optimized chunky circledraw routine, A
;*
;*  This is one of my contributions to #amycoders circledrawing-compo. This
;*  routine will participate in speed-optimizing competition. This code works
;*  with 68020+ processors, with any cache setup.
;*
;*  This is a simple bresenham circle algorithm implementation with
;*  optimizing drawcode generator for points of all circle sizes.
;*
;*  Code size without rts: 448 bytes. ~520K buffer
;*  Average time (dec): very fast (030@59.220MHz)
;*
;*  Coded by Harry "Piru" Sintonen <sintonen@jyu.fi>.
;*  This code is copyright C 1997 by Harry Sintonen, and was written 24th
;*  september & 11th october 1997.
;*
;****************************************************************************
;*

	CNOP	0,4
_initcode
a

MAXRAD	EQU	128
CITEMS	EQU	(MAXRAD-0)*8*4	(maxrad-minrad)*8*sizeof(long)
NUMCIRCLES	EQU	128

	move.l	#codetable,d0
	clr.b	d0
	move.l	d0,codetablep
	move.l	d0,a0

	moveq	#0,d7

.loop	move.l	d7,d2
	movem.l	d7/a0,-(sp)
	bsr	makecircle
	movem.l	(sp)+,d7/a0
	lea	(CITEMS,a0),a0
	addq.l	#1,d7
	cmp.w	#NUMCIRCLES,d7		NUMCIRCLES circles
	bls.b	.loop

	move.l	(4).w,a6
	jmp	(-$27C,a6)		CacheClearU


	CNOP	0,4
makecircle

;  IN: d2.l=radius
;      a0=codetable for this circle
; OUT: circle :) d2/d4-d7/a0-a1 trashed

	tst.l	d2
	bne.b	.not_zerop

	move.w	(coda,pc),(a0)+
	move.l	#$00004E75,(a0)+
	rts
.not_zerop
	move.l	d2,d5		2	y=r (NOTE: .l!)
	add.l	d2,d2		2
	subq.l	#3,d2		2
	neg.l	d2		2	d=3-(2*r)
	moveq	#0,d4		2	x=0

	move.l	a0,a1
	clr.l	(a0)+

.loop	addq.l	#8,(a1)

	move.l	d5,d6		2
	lsl.l	#8,d6		2
	add.l	d4,d6		2
	move.l	d6,(a0)+
	;move.b	d3,0(a0,d6.l)	4
	move.l	d5,d6		2
	lsl.l	#8,d6		2
	neg.l	d6		2
	add.l	d4,d6		2
	move.l	d6,(a0)+
	;move.b	d3,0(a0,d6.l)	4
	move.l	d5,d6		2
	lsl.l	#8,d6		2
	sub.l	d4,d6		2
	move.l	d6,(a0)+
	;move.b	d3,0(a0,d6.l)	4
	move.l	d5,d6		2
	lsl.l	#8,d6		2
	neg.l	d6		2
	sub.l	d4,d6		2
	move.l	d6,(a0)+
	;move.b	d3,0(a0,d6.l)	4

	move.l	d4,d6		2
	lsl.l	#8,d6		2
	add.l	d5,d6		2
	move.l	d6,(a0)+
	;move.b	d3,0(a0,d6.l)	4
	move.l	d4,d6		2
	lsl.l	#8,d6		2
	neg.l	d6		2
	add.l	d5,d6		2
	move.l	d6,(a0)+
	;move.b	d3,0(a0,d6.l)	4
	move.l	d4,d6		2
	lsl.l	#8,d6		2
	sub.l	d5,d6		2
	move.l	d6,(a0)+
	;move.b	d3,0(a0,d6.l)	4
	move.l	d4,d6		2
	lsl.l	#8,d6		2
	neg.l	d6		2
	sub.l	d5,d6		2
	move.l	d6,(a0)+
	;move.b	d3,0(a0,d6.l)	4

	tst.l	d2		2
	bpl.b	.skipa		2

	move.l	d4,d6		2
	lsl.l	#2,d6		2
	addq.l	#6,d6		2	
	add.l	d6,d2		2	d=d+(4*x)+6
	bra.b	.skipb		2
.skipa
	move.l	d4,d6		2
	sub.l	d5,d6		2
	add.l	d6,d6		2
	addq.l	#5,d6		2
	add.l	d6,d6		2
	add.l	d6,d2		2	d=d+4*(x-y)+10
	subq.l	#1,d5		2	y=y-1

.skipb	addq.l	#1,d4		2	x=x+1
	cmp.l	d4,d5		2
	bhs	.loop		2


	move.l	#$0BADF00D,(a0)+ These are here to ensure that
	move.l	#$0CAEF00D,(a0)+ optimizer doesn't get endpart
	move.l	#$0DAFF00D,(a0)+ wrong.
	move.l	#$0EACF00D,(a0)+

	; offset table done... lets sort it (urgh)

	move.l	a1,a0		a1=ptr to count
	move.l	(a0)+,d0
	bsr	isort

	; ok, offset table sorted now. lets strip clones.

	move.l	a1,a0
	bsr	strip

	; offset table sorted&stripped now. lets generate the code
	; out of it. also generate move.w & move.l optimizations.


	lea	coda,a2
	movem.l	(a2),d5-d7
	swap	d5
	swap	d6
	swap	d7

	move.l	a1,a0
	move.l	(a0)+,d4
.next
	ble.b	.alldone
	move.l	(a0),d0

	movem.l	(a0),d0-d3
	subq.l	#3,d3
	cmp.l	d0,d3
	beq.b	.put_long
	subq.l	#1,d1
	cmp.l	d0,d1
	beq.b	.put_word

.put_byte	move.w	d5,(a1)+
	move.w	d0,(a1)+
	addq.l	#4,a0
	subq.l	#1,d4
	bra.b	.next

.put_long	move.w	d7,(a1)+
	move.w	d0,(a1)+
	lea	(16,a0),a0
	subq.l	#4,d4
	bra.b	.next

.put_word	move.w	d6,(a1)+
	move.w	d0,(a1)+
	addq.l	#8,a0
	subq.l	#2,d4
	bra.b	.next

.alldone	move.w	#$4E75,(a1)
	rts


coda	move.b	d3,($100,a0)		put one pixel
	move.w	d3,($100,a0)		put two pixels
	move.l	d3,($100,a0)		put four pixels


	CNOP	0,4
; insertation-sort, yea this sucks, but who cares :)
;  IN: a0=a, d0=n
; OUT: d1-d3 trashed
isort	moveq	#1,d1
.oloop	move.l	d1,d2
.iloop	move.l	-4(a0,d2.l*4),d3
	cmp.l	0(a0,d2.l*4),d3
	blt.b	.iskip
	move.l	0(a0,d2.l*4),-4(a0,d2.l*4)
	move.l	d3,0(a0,d2.l*4)
.iskip	subq.l	#1,d2
	bne.b	.iloop
	addq.l	#1,d1
	cmp.l	d1,d0
	bhi.b	.oloop
	rts


	CNOP	0,4
;  IN: a0=table
strip	movem.l	a0-a1,-(sp)
	move.l	(a0)+,d0
	move.l	a0,a1

.loop	move.l	(a0)+,d1
.loop2	cmp.l	(a0)+,d1
	bne.b	.normal
	subq.l	#1,d0
	bne.b	.loop2
	move.l	d1,(a1)+
	bra.b	.done

.normal	subq.l	#4,a0
	move.l	d1,(a1)+
	subq.l	#1,d0
	bne.b	.loop

.done
	move.l	(sp),a0
	addq.l	#4,a0
	move.l	a1,d0
	sub.l	a0,d0
	lsr.l	#2,d0
	move.l	d0,-(a0)

	movem.l	(sp)+,a0-a1
	rts


b
	rts
		
	
*********************************************************************
*
* Your line routine.
* Every time you are given: A0.L : Chunky Screen (256x256x8)
*
* d0.l/d1.l  x/y
* d2.l       radius
* d3.b       color
*
* other registers are in undefined state !!
* you may trash all regs except a7
*
*********************************************************************

	CNOP	0,8
_circle
c

;  IN: d0.l=xc, d1.l=yc, d2.l=radius, d3.b=color
;      a0=chunky buffer (256x256x8)
; OUT: circle :) d0-d4/a0-a1 trashed

	move.l	d3,d4			000c
	add.l	d2,d2			d2=r*2
	lsl.l	#8,d3			00c0
	lsl.l	#8,d1			offs=yc*256
	or.b	d4,d3			00cc
	lsl.l	#8,d2			d2=r*CITEMS/16 (=256)
	move.l	d3,d4			00cc
	add.l	d0,d1			offs=yc*256+xc
	swap	d3			cc00
	lea	$BADF00D,a1
codetablep	EQU	*-4
	move.w	d4,d3			cccc
	add.l	d1,a0			a0=circle center
	jmp	0(a1,d2.l*8)		draw circle (codetable+r*CITEMS)
d
	rts



;	Comment this out on Asmone for auto length-output

;	printt	"Length of your code:"
;	printv	((b-a)+(d-c))

;len	dc.l	((b-a)+(d-c))


**********************************************************************
*
* Put your tables here
*
**********************************************************************
	section yourbss,bss

	ds.b	256
codetable	ds.b	CITEMS*(NUMCIRCLES+1)

.