URI: 
       tdraw.c - plan9port - [fork] Plan 9 from user space
  HTML git clone git://src.adamsgaard.dk/plan9port
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
       tdraw.c (56850B)
       ---
            1 #include <u.h>
            2 #include <libc.h>
            3 #include <draw.h>
            4 #include <memdraw.h>
            5 
            6 int drawdebug;
            7 static int        tablesbuilt;
            8 
            9 /* perfect approximation to NTSC = .299r+.587g+.114b when 0 ≤ r,g,b < 256 */
           10 #define RGB2K(r,g,b)        ((156763*(r)+307758*(g)+59769*(b))>>19)
           11 
           12 /*
           13  * For 16-bit values, x / 255 == (t = x+1, (t+(t>>8)) >> 8).
           14  * We add another 127 to round to the nearest value rather
           15  * than truncate.
           16  *
           17  * CALCxy does x bytewise calculations on y input images (x=1,4; y=1,2).
           18  * CALC2x does two parallel 16-bit calculations on y input images (y=1,2).
           19  */
           20 #define CALC11(a, v, tmp) \
           21         (tmp=(a)*(v)+128, (tmp+(tmp>>8))>>8)
           22 
           23 #define CALC12(a1, v1, a2, v2, tmp) \
           24         (tmp=(a1)*(v1)+(a2)*(v2)+128, (tmp+(tmp>>8))>>8)
           25 
           26 #define MASK 0xFF00FF
           27 
           28 #define CALC21(a, vvuu, tmp) \
           29         (tmp=(a)*(vvuu)+0x00800080, ((tmp+((tmp>>8)&MASK))>>8)&MASK)
           30 
           31 #define CALC41(a, rgba, tmp1, tmp2) \
           32         (CALC21(a, rgba & MASK, tmp1) | \
           33          (CALC21(a, (rgba>>8)&MASK, tmp2)<<8))
           34 
           35 #define CALC22(a1, vvuu1, a2, vvuu2, tmp) \
           36         (tmp=(a1)*(vvuu1)+(a2)*(vvuu2)+0x00800080, ((tmp+((tmp>>8)&MASK))>>8)&MASK)
           37 
           38 #define CALC42(a1, rgba1, a2, rgba2, tmp1, tmp2) \
           39         (CALC22(a1, rgba1 & MASK, a2, rgba2 & MASK, tmp1) | \
           40          (CALC22(a1, (rgba1>>8) & MASK, a2, (rgba2>>8) & MASK, tmp2)<<8))
           41 
           42 static void mktables(void);
           43 typedef int Subdraw(Memdrawparam*);
           44 static Subdraw chardraw, alphadraw, memoptdraw;
           45 
           46 static Memimage*        memones;
           47 static Memimage*        memzeros;
           48 Memimage *memwhite;
           49 Memimage *memblack;
           50 Memimage *memtransparent;
           51 Memimage *memopaque;
           52 
           53 int        __ifmt(Fmt*);
           54 
           55 void
           56 memimageinit(void)
           57 {
           58         static int didinit = 0;
           59 
           60         if(didinit)
           61                 return;
           62 
           63         didinit = 1;
           64 
           65         mktables();
           66         _memmkcmap();
           67 
           68         fmtinstall('R', Rfmt);
           69         fmtinstall('P', Pfmt);
           70         fmtinstall('b', __ifmt);
           71 
           72         memones = allocmemimage(Rect(0,0,1,1), GREY1);
           73         memones->flags |= Frepl;
           74         memones->clipr = Rect(-0x3FFFFFF, -0x3FFFFFF, 0x3FFFFFF, 0x3FFFFFF);
           75         *byteaddr(memones, ZP) = ~0;
           76 
           77         memzeros = allocmemimage(Rect(0,0,1,1), GREY1);
           78         memzeros->flags |= Frepl;
           79         memzeros->clipr = Rect(-0x3FFFFFF, -0x3FFFFFF, 0x3FFFFFF, 0x3FFFFFF);
           80         *byteaddr(memzeros, ZP) = 0;
           81 
           82         if(memones == nil || memzeros == nil)
           83                 assert(0 /*cannot initialize memimage library */);        /* RSC BUG */
           84 
           85         memwhite = memones;
           86         memblack = memzeros;
           87         memopaque = memones;
           88         memtransparent = memzeros;
           89 }
           90 
           91 u32int _imgtorgba(Memimage*, u32int);
           92 u32int _rgbatoimg(Memimage*, u32int);
           93 u32int _pixelbits(Memimage*, Point);
           94 
           95 #define DBG if(drawdebug)
           96 static Memdrawparam par;
           97 
           98 Memdrawparam*
           99 _memimagedrawsetup(Memimage *dst, Rectangle r, Memimage *src, Point p0, Memimage *mask, Point p1, int op)
          100 {
          101         if(mask == nil)
          102                 mask = memopaque;
          103 
          104 DBG        print("memimagedraw %p/%luX %R @ %p %p/%luX %P %p/%luX %P... ", dst, dst->chan, r, dst->data->bdata, src, src->chan, p0, mask, mask->chan, p1);
          105 
          106         if(drawclip(dst, &r, src, &p0, mask, &p1, &par.sr, &par.mr) == 0){
          107 /*                if(drawdebug) */
          108 /*                        iprint("empty clipped rectangle\n"); */
          109                 return nil;
          110         }
          111 
          112         if(op < Clear || op > SoverD){
          113 /*                if(drawdebug) */
          114 /*                        iprint("op out of range: %d\n", op); */
          115                 return nil;
          116         }
          117 
          118         par.op = op;
          119         par.dst = dst;
          120         par.r = r;
          121         par.src = src;
          122         /* par.sr set by drawclip */
          123         par.mask = mask;
          124         /* par.mr set by drawclip */
          125 
          126         par.state = 0;
          127         if(src->flags&Frepl){
          128                 par.state |= Replsrc;
          129                 if(Dx(src->r)==1 && Dy(src->r)==1){
          130                         par.sval = pixelbits(src, src->r.min);
          131                         par.state |= Simplesrc;
          132                         par.srgba = _imgtorgba(src, par.sval);
          133                         par.sdval = _rgbatoimg(dst, par.srgba);
          134                         if((par.srgba&0xFF) == 0 && (op&DoutS)){
          135 /*                                if (drawdebug) iprint("fill with transparent source\n"); */
          136                                 return nil;        /* no-op successfully handled */
          137                         }
          138                         if((par.srgba&0xFF) == 0xFF)
          139                                 par.state |= Fullsrc;
          140                 }
          141         }
          142 
          143         if(mask->flags & Frepl){
          144                 par.state |= Replmask;
          145                 if(Dx(mask->r)==1 && Dy(mask->r)==1){
          146                         par.mval = pixelbits(mask, mask->r.min);
          147                         if(par.mval == 0 && (op&DoutS)){
          148 /*                                if(drawdebug) iprint("fill with zero mask\n"); */
          149                                 return nil;        /* no-op successfully handled */
          150                         }
          151                         par.state |= Simplemask;
          152                         if(par.mval == ~0)
          153                                 par.state |= Fullmask;
          154                         par.mrgba = _imgtorgba(mask, par.mval);
          155                 }
          156         }
          157 
          158 /*        if(drawdebug) */
          159 /*                iprint("dr %R sr %R mr %R...", r, par.sr, par.mr); */
          160 DBG print("draw dr %R sr %R mr %R %lux\n", r, par.sr, par.mr, par.state);
          161 
          162         return &par;
          163 }
          164 
          165 void
          166 _memimagedraw(Memdrawparam *par)
          167 {
          168         /*
          169          * Now that we've clipped the parameters down to be consistent, we
          170          * simply try sub-drawing routines in order until we find one that was able
          171          * to handle us.  If the sub-drawing routine returns zero, it means it was
          172          * unable to satisfy the request, so we do not return.
          173          */
          174 
          175         /*
          176          * Hardware support.  Each video driver provides this function,
          177          * which checks to see if there is anything it can help with.
          178          * There could be an if around this checking to see if dst is in video memory.
          179          */
          180 DBG print("test hwdraw\n");
          181         if(hwdraw(par)){
          182 /*if(drawdebug) iprint("hw handled\n"); */
          183 DBG print("hwdraw handled\n");
          184                 return;
          185         }
          186         /*
          187          * Optimizations using memmove and memset.
          188          */
          189 DBG print("test memoptdraw\n");
          190         if(memoptdraw(par)){
          191 /*if(drawdebug) iprint("memopt handled\n"); */
          192 DBG print("memopt handled\n");
          193                 return;
          194         }
          195 
          196         /*
          197          * Character drawing.
          198          * Solid source color being painted through a boolean mask onto a high res image.
          199          */
          200 DBG print("test chardraw\n");
          201         if(chardraw(par)){
          202 /*if(drawdebug) iprint("chardraw handled\n"); */
          203 DBG print("chardraw handled\n");
          204                 return;
          205         }
          206 
          207         /*
          208          * General calculation-laden case that does alpha for each pixel.
          209          */
          210 DBG print("do alphadraw\n");
          211         alphadraw(par);
          212 /*if(drawdebug) iprint("alphadraw handled\n"); */
          213 DBG print("alphadraw handled\n");
          214 }
          215 #undef DBG
          216 
          217 /*
          218  * Clip the destination rectangle further based on the properties of the
          219  * source and mask rectangles.  Once the destination rectangle is properly
          220  * clipped, adjust the source and mask rectangles to be the same size.
          221  * Then if source or mask is replicated, move its clipped rectangle
          222  * so that its minimum point falls within the repl rectangle.
          223  *
          224  * Return zero if the final rectangle is null.
          225  */
          226 int
          227 drawclip(Memimage *dst, Rectangle *r, Memimage *src, Point *p0, Memimage *mask, Point *p1, Rectangle *sr, Rectangle *mr)
          228 {
          229         Point rmin, delta;
          230         int splitcoords;
          231         Rectangle omr;
          232 
          233         if(r->min.x>=r->max.x || r->min.y>=r->max.y)
          234                 return 0;
          235         splitcoords = (p0->x!=p1->x) || (p0->y!=p1->y);
          236         /* clip to destination */
          237         rmin = r->min;
          238         if(!rectclip(r, dst->r) || !rectclip(r, dst->clipr))
          239                 return 0;
          240         /* move mask point */
          241         p1->x += r->min.x-rmin.x;
          242         p1->y += r->min.y-rmin.y;
          243         /* move source point */
          244         p0->x += r->min.x-rmin.x;
          245         p0->y += r->min.y-rmin.y;
          246         /* map destination rectangle into source */
          247         sr->min = *p0;
          248         sr->max.x = p0->x+Dx(*r);
          249         sr->max.y = p0->y+Dy(*r);
          250         /* sr is r in source coordinates; clip to source */
          251         if(!(src->flags&Frepl) && !rectclip(sr, src->r))
          252                 return 0;
          253         if(!rectclip(sr, src->clipr))
          254                 return 0;
          255         /* compute and clip rectangle in mask */
          256         if(splitcoords){
          257                 /* move mask point with source */
          258                 p1->x += sr->min.x-p0->x;
          259                 p1->y += sr->min.y-p0->y;
          260                 mr->min = *p1;
          261                 mr->max.x = p1->x+Dx(*sr);
          262                 mr->max.y = p1->y+Dy(*sr);
          263                 omr = *mr;
          264                 /* mr is now rectangle in mask; clip it */
          265                 if(!(mask->flags&Frepl) && !rectclip(mr, mask->r))
          266                         return 0;
          267                 if(!rectclip(mr, mask->clipr))
          268                         return 0;
          269                 /* reflect any clips back to source */
          270                 sr->min.x += mr->min.x-omr.min.x;
          271                 sr->min.y += mr->min.y-omr.min.y;
          272                 sr->max.x += mr->max.x-omr.max.x;
          273                 sr->max.y += mr->max.y-omr.max.y;
          274                 *p1 = mr->min;
          275         }else{
          276                 if(!(mask->flags&Frepl) && !rectclip(sr, mask->r))
          277                         return 0;
          278                 if(!rectclip(sr, mask->clipr))
          279                         return 0;
          280                 *p1 = sr->min;
          281         }
          282 
          283         /* move source clipping back to destination */
          284         delta.x = r->min.x - p0->x;
          285         delta.y = r->min.y - p0->y;
          286         r->min.x = sr->min.x + delta.x;
          287         r->min.y = sr->min.y + delta.y;
          288         r->max.x = sr->max.x + delta.x;
          289         r->max.y = sr->max.y + delta.y;
          290 
          291         /* move source rectangle so sr->min is in src->r */
          292         if(src->flags&Frepl) {
          293                 delta.x = drawreplxy(src->r.min.x, src->r.max.x, sr->min.x) - sr->min.x;
          294                 delta.y = drawreplxy(src->r.min.y, src->r.max.y, sr->min.y) - sr->min.y;
          295                 sr->min.x += delta.x;
          296                 sr->min.y += delta.y;
          297                 sr->max.x += delta.x;
          298                 sr->max.y += delta.y;
          299         }
          300         *p0 = sr->min;
          301 
          302         /* move mask point so it is in mask->r */
          303         *p1 = drawrepl(mask->r, *p1);
          304         mr->min = *p1;
          305         mr->max.x = p1->x+Dx(*sr);
          306         mr->max.y = p1->y+Dy(*sr);
          307 
          308         assert(Dx(*sr) == Dx(*mr) && Dx(*mr) == Dx(*r));
          309         assert(Dy(*sr) == Dy(*mr) && Dy(*mr) == Dy(*r));
          310         assert(ptinrect(*p0, src->r));
          311         assert(ptinrect(*p1, mask->r));
          312         assert(ptinrect(r->min, dst->r));
          313 
          314         return 1;
          315 }
          316 
          317 /*
          318  * Conversion tables.
          319  */
          320 static uchar replbit[1+8][256];                /* replbit[x][y] is the replication of the x-bit quantity y to 8-bit depth */
          321 static uchar conv18[256][8];                /* conv18[x][y] is the yth pixel in the depth-1 pixel x */
          322 static uchar conv28[256][4];                /* ... */
          323 static uchar conv48[256][2];
          324 
          325 /*
          326  * bitmap of how to replicate n bits to fill 8, for 1 ≤ n ≤ 8.
          327  * the X's are where to put the bottom (ones) bit of the n-bit pattern.
          328  * only the top 8 bits of the result are actually used.
          329  * (the lower 8 bits are needed to get bits in the right place
          330  * when n is not a divisor of 8.)
          331  *
          332  * Should check to see if its easier to just refer to replmul than
          333  * use the precomputed values in replbit.  On PCs it may well
          334  * be; on machines with slow multiply instructions it probably isn't.
          335  */
          336 #define a ((((((((((((((((0
          337 #define X *2+1)
          338 #define _ *2)
          339 static int replmul[1+8] = {
          340         0,
          341         a X X X X X X X X X X X X X X X X,
          342         a _ X _ X _ X _ X _ X _ X _ X _ X,
          343         a _ _ X _ _ X _ _ X _ _ X _ _ X _,
          344         a _ _ _ X _ _ _ X _ _ _ X _ _ _ X,
          345         a _ _ _ _ X _ _ _ _ X _ _ _ _ X _,
          346         a _ _ _ _ _ X _ _ _ _ _ X _ _ _ _,
          347         a _ _ _ _ _ _ X _ _ _ _ _ _ X _ _,
          348         a _ _ _ _ _ _ _ X _ _ _ _ _ _ _ X,
          349 };
          350 #undef a
          351 #undef X
          352 #undef _
          353 
          354 static void
          355 mktables(void)
          356 {
          357         int i, j, mask, sh, small;
          358 
          359         if(tablesbuilt)
          360                 return;
          361 
          362         fmtinstall('R', Rfmt);
          363         fmtinstall('P', Pfmt);
          364         tablesbuilt = 1;
          365 
          366         /* bit replication up to 8 bits */
          367         for(i=0; i<256; i++){
          368                 for(j=0; j<=8; j++){        /* j <= 8 [sic] */
          369                         small = i & ((1<<j)-1);
          370                         replbit[j][i] = (small*replmul[j])>>8;
          371                 }
          372         }
          373 
          374         /* bit unpacking up to 8 bits, only powers of 2 */
          375         for(i=0; i<256; i++){
          376                 for(j=0, sh=7, mask=1; j<8; j++, sh--)
          377                         conv18[i][j] = replbit[1][(i>>sh)&mask];
          378 
          379                 for(j=0, sh=6, mask=3; j<4; j++, sh-=2)
          380                         conv28[i][j] = replbit[2][(i>>sh)&mask];
          381 
          382                 for(j=0, sh=4, mask=15; j<2; j++, sh-=4)
          383                         conv48[i][j] = replbit[4][(i>>sh)&mask];
          384         }
          385 }
          386 
          387 static uchar ones = 0xff;
          388 
          389 /*
          390  * General alpha drawing case.  Can handle anything.
          391  */
          392 typedef struct        Buffer        Buffer;
          393 struct Buffer {
          394         /* used by most routines */
          395         uchar        *red;
          396         uchar        *grn;
          397         uchar        *blu;
          398         uchar        *alpha;
          399         uchar        *grey;
          400         u32int        *rgba;
          401         int        delta;        /* number of bytes to add to pointer to get next pixel to the right */
          402 
          403         /* used by boolcalc* for mask data */
          404         uchar        *m;                /* ptr to mask data r.min byte; like p->bytermin */
          405         int                mskip;        /* no. of left bits to skip in *m */
          406         uchar        *bm;                /* ptr to mask data img->r.min byte; like p->bytey0s */
          407         int                bmskip;        /* no. of left bits to skip in *bm */
          408         uchar        *em;                /* ptr to mask data img->r.max.x byte; like p->bytey0e */
          409         int                emskip;        /* no. of right bits to skip in *em */
          410 };
          411 
          412 typedef struct        Param        Param;
          413 typedef Buffer        Readfn(Param*, uchar*, int);
          414 typedef void        Writefn(Param*, uchar*, Buffer);
          415 typedef Buffer        Calcfn(Buffer, Buffer, Buffer, int, int, int);
          416 
          417 enum {
          418         MAXBCACHE = 16
          419 };
          420 
          421 /* giant rathole to customize functions with */
          422 struct Param {
          423         Readfn        *replcall;
          424         Readfn        *greymaskcall;
          425         Readfn        *convreadcall;
          426         Writefn        *convwritecall;
          427 
          428         Memimage *img;
          429         Rectangle        r;
          430         int        dx;        /* of r */
          431         int        needbuf;
          432         int        convgrey;
          433         int        alphaonly;
          434 
          435         uchar        *bytey0s;                /* byteaddr(Pt(img->r.min.x, img->r.min.y)) */
          436         uchar        *bytermin;        /* byteaddr(Pt(r.min.x, img->r.min.y)) */
          437         uchar        *bytey0e;                /* byteaddr(Pt(img->r.max.x, img->r.min.y)) */
          438         int                bwidth;
          439 
          440         int        replcache;        /* if set, cache buffers */
          441         Buffer        bcache[MAXBCACHE];
          442         u32int        bfilled;
          443         uchar        *bufbase;
          444         int        bufoff;
          445         int        bufdelta;
          446 
          447         int        dir;
          448 
          449         int        convbufoff;
          450         uchar        *convbuf;
          451         Param        *convdpar;
          452         int        convdx;
          453 };
          454 
          455 static uchar *drawbuf;
          456 static int        ndrawbuf;
          457 static int        mdrawbuf;
          458 static Param spar, mpar, dpar;        /* easier on the stacks */
          459 static Readfn        greymaskread, replread, readptr;
          460 static Writefn        nullwrite;
          461 static Calcfn        alphacalc0, alphacalc14, alphacalc2810, alphacalc3679, alphacalc5, alphacalc11, alphacalcS;
          462 static Calcfn        boolcalc14, boolcalc236789, boolcalc1011;
          463 
          464 static Readfn*        readfn(Memimage*);
          465 static Readfn*        readalphafn(Memimage*);
          466 static Writefn*        writefn(Memimage*);
          467 
          468 static Calcfn*        boolcopyfn(Memimage*, Memimage*);
          469 static Readfn*        convfn(Memimage*, Param*, Memimage*, Param*);
          470 
          471 static Calcfn *alphacalc[Ncomp] =
          472 {
          473         alphacalc0,                /* Clear */
          474         alphacalc14,                /* DoutS */
          475         alphacalc2810,                /* SoutD */
          476         alphacalc3679,                /* DxorS */
          477         alphacalc14,                /* DinS */
          478         alphacalc5,                /* D */
          479         alphacalc3679,                /* DatopS */
          480         alphacalc3679,                /* DoverS */
          481         alphacalc2810,                /* SinD */
          482         alphacalc3679,                /* SatopD */
          483         alphacalc2810,                /* S */
          484         alphacalc11,                /* SoverD */
          485 };
          486 
          487 static Calcfn *boolcalc[Ncomp] =
          488 {
          489         alphacalc0,                /* Clear */
          490         boolcalc14,                /* DoutS */
          491         boolcalc236789,                /* SoutD */
          492         boolcalc236789,                /* DxorS */
          493         boolcalc14,                /* DinS */
          494         alphacalc5,                /* D */
          495         boolcalc236789,                /* DatopS */
          496         boolcalc236789,                /* DoverS */
          497         boolcalc236789,                /* SinD */
          498         boolcalc236789,                /* SatopD */
          499         boolcalc1011,                /* S */
          500         boolcalc1011,                /* SoverD */
          501 };
          502 
          503 static int
          504 allocdrawbuf(void)
          505 {
          506         uchar *p;
          507 
          508         if(ndrawbuf > mdrawbuf){
          509                 p = realloc(drawbuf, ndrawbuf);
          510                 if(p == nil){
          511                         werrstr("memimagedraw out of memory");
          512                         return -1;
          513                 }
          514                 drawbuf = p;
          515                 mdrawbuf = ndrawbuf;
          516         }
          517         return 0;
          518 }
          519 
          520 static void
          521 getparam(Param *p, Memimage *img, Rectangle r, int convgrey, int needbuf)
          522 {
          523         int nbuf;
          524 
          525         memset(p, 0, sizeof *p);
          526 
          527         p->img = img;
          528         p->r = r;
          529         p->dx = Dx(r);
          530         p->needbuf = needbuf;
          531         p->convgrey = convgrey;
          532 
          533         assert(img->r.min.x <= r.min.x && r.min.x < img->r.max.x);
          534 
          535         p->bytey0s = byteaddr(img, Pt(img->r.min.x, img->r.min.y));
          536         p->bytermin = byteaddr(img, Pt(r.min.x, img->r.min.y));
          537         p->bytey0e = byteaddr(img, Pt(img->r.max.x, img->r.min.y));
          538         p->bwidth = sizeof(u32int)*img->width;
          539 
          540         assert(p->bytey0s <= p->bytermin && p->bytermin <= p->bytey0e);
          541 
          542         if(p->r.min.x == p->img->r.min.x)
          543                 assert(p->bytermin == p->bytey0s);
          544 
          545         nbuf = 1;
          546         if((img->flags&Frepl) && Dy(img->r) <= MAXBCACHE && Dy(img->r) < Dy(r)){
          547                 p->replcache = 1;
          548                 nbuf = Dy(img->r);
          549         }
          550         p->bufdelta = 4*p->dx;
          551         p->bufoff = ndrawbuf;
          552         ndrawbuf += p->bufdelta*nbuf;
          553 }
          554 
          555 static void
          556 clipy(Memimage *img, int *y)
          557 {
          558         int dy;
          559 
          560         dy = Dy(img->r);
          561         if(*y == dy)
          562                 *y = 0;
          563         else if(*y == -1)
          564                 *y = dy-1;
          565         assert(0 <= *y && *y < dy);
          566 }
          567 
          568 static void
          569 dumpbuf(char *s, Buffer b, int n)
          570 {
          571         int i;
          572         uchar *p;
          573 
          574         print("%s", s);
          575         for(i=0; i<n; i++){
          576                 print(" ");
          577                 if(p=b.grey){
          578                         print(" k%.2uX", *p);
          579                         b.grey += b.delta;
          580                 }else{
          581                         if(p=b.red){
          582                                 print(" r%.2uX", *p);
          583                                 b.red += b.delta;
          584                         }
          585                         if(p=b.grn){
          586                                 print(" g%.2uX", *p);
          587                                 b.grn += b.delta;
          588                         }
          589                         if(p=b.blu){
          590                                 print(" b%.2uX", *p);
          591                                 b.blu += b.delta;
          592                         }
          593                 }
          594                 if((p=b.alpha) != &ones){
          595                         print(" α%.2uX", *p);
          596                         b.alpha += b.delta;
          597                 }
          598         }
          599         print("\n");
          600 }
          601 
          602 /*
          603  * For each scan line, we expand the pixels from source, mask, and destination
          604  * into byte-aligned red, green, blue, alpha, and grey channels.  If buffering is not
          605  * needed and the channels were already byte-aligned (grey8, rgb24, rgba32, rgb32),
          606  * the readers need not copy the data: they can simply return pointers to the data.
          607  * If the destination image is grey and the source is not, it is converted using the NTSC
          608  * formula.
          609  *
          610  * Once we have all the channels, we call either rgbcalc or greycalc, depending on
          611  * whether the destination image is color.  This is allowed to overwrite the dst buffer (perhaps
          612  * the actual data, perhaps a copy) with its result.  It should only overwrite the dst buffer
          613  * with the same format (i.e. red bytes with red bytes, etc.)  A new buffer is returned from
          614  * the calculator, and that buffer is passed to a function to write it to the destination.
          615  * If the buffer is already pointing at the destination, the writing function is a no-op.
          616  */
          617 #define DBG if(drawdebug)
          618 static int
          619 alphadraw(Memdrawparam *par)
          620 {
          621         int isgrey, starty, endy, op;
          622         int needbuf, dsty, srcy, masky;
          623         int y, dir, dx, dy;
          624         Buffer bsrc, bdst, bmask;
          625         Readfn *rdsrc, *rdmask, *rddst;
          626         Calcfn *calc;
          627         Writefn *wrdst;
          628         Memimage *src, *mask, *dst;
          629         Rectangle r, sr, mr;
          630 
          631         if(drawdebug)
          632                 print("alphadraw %R\n", par->r);
          633         r = par->r;
          634         dx = Dx(r);
          635         dy = Dy(r);
          636 
          637         ndrawbuf = 0;
          638 
          639         src = par->src;
          640         mask = par->mask;
          641         dst = par->dst;
          642         sr = par->sr;
          643         mr = par->mr;
          644         op = par->op;
          645 
          646         isgrey = dst->flags&Fgrey;
          647 
          648         /*
          649          * Buffering when src and dst are the same bitmap is sufficient but not
          650          * necessary.  There are stronger conditions we could use.  We could
          651          * check to see if the rectangles intersect, and if simply moving in the
          652          * correct y direction can avoid the need to buffer.
          653          */
          654         needbuf = (src->data == dst->data);
          655 
          656         getparam(&spar, src, sr, isgrey, needbuf);
          657         getparam(&dpar, dst, r, isgrey, needbuf);
          658         getparam(&mpar, mask, mr, 0, needbuf);
          659 
          660         dir = (needbuf && byteaddr(dst, r.min) > byteaddr(src, sr.min)) ? -1 : 1;
          661         spar.dir = mpar.dir = dpar.dir = dir;
          662 
          663         /*
          664          * If the mask is purely boolean, we can convert from src to dst format
          665          * when we read src, and then just copy it to dst where the mask tells us to.
          666          * This requires a boolean (1-bit grey) mask and lack of a source alpha channel.
          667          *
          668          * The computation is accomplished by assigning the function pointers as follows:
          669          *        rdsrc - read and convert source into dst format in a buffer
          670          *         rdmask - convert mask to bytes, set pointer to it
          671          *         rddst - fill with pointer to real dst data, but do no reads
          672          *        calc - copy src onto dst when mask says to.
          673          *        wrdst - do nothing
          674          * This is slightly sleazy, since things aren't doing exactly what their names say,
          675          * but it avoids a fair amount of code duplication to make this a case here
          676          * rather than have a separate booldraw.
          677          */
          678 /*if(drawdebug) iprint("flag %lud mchan %lux=?%x dd %d\n", src->flags&Falpha, mask->chan, GREY1, dst->depth); */
          679         if(!(src->flags&Falpha) && mask->chan == GREY1 && dst->depth >= 8 && op == SoverD){
          680 /*if(drawdebug) iprint("boolcopy..."); */
          681                 rdsrc = convfn(dst, &dpar, src, &spar);
          682                 rddst = readptr;
          683                 rdmask = readfn(mask);
          684                 calc = boolcopyfn(dst, mask);
          685                 wrdst = nullwrite;
          686         }else{
          687                 /* usual alphadraw parameter fetching */
          688                 rdsrc = readfn(src);
          689                 rddst = readfn(dst);
          690                 wrdst = writefn(dst);
          691                 calc = alphacalc[op];
          692 
          693                 /*
          694                  * If there is no alpha channel, we'll ask for a grey channel
          695                  * and pretend it is the alpha.
          696                  */
          697                 if(mask->flags&Falpha){
          698                         rdmask = readalphafn(mask);
          699                         mpar.alphaonly = 1;
          700                 }else{
          701                         mpar.greymaskcall = readfn(mask);
          702                         mpar.convgrey = 1;
          703                         rdmask = greymaskread;
          704 
          705                         /*
          706                          * Should really be above, but then boolcopyfns would have
          707                          * to deal with bit alignment, and I haven't written that.
          708                          *
          709                          * This is a common case for things like ellipse drawing.
          710                          * When there's no alpha involved and the mask is boolean,
          711                          * we can avoid all the division and multiplication.
          712                          */
          713                         if(mask->chan == GREY1 && !(src->flags&Falpha))
          714                                 calc = boolcalc[op];
          715                         else if(op == SoverD && !(src->flags&Falpha))
          716                                 calc = alphacalcS;
          717                 }
          718         }
          719 
          720         /*
          721          * If the image has a small enough repl rectangle,
          722          * we can just read each line once and cache them.
          723          */
          724         if(spar.replcache){
          725                 spar.replcall = rdsrc;
          726                 rdsrc = replread;
          727         }
          728         if(mpar.replcache){
          729                 mpar.replcall = rdmask;
          730                 rdmask = replread;
          731         }
          732 
          733         if(allocdrawbuf() < 0)
          734                 return 0;
          735 
          736         /*
          737          * Before we were saving only offsets from drawbuf in the parameter
          738          * structures; now that drawbuf has been grown to accomodate us,
          739          * we can fill in the pointers.
          740          */
          741         spar.bufbase = drawbuf+spar.bufoff;
          742         mpar.bufbase = drawbuf+mpar.bufoff;
          743         dpar.bufbase = drawbuf+dpar.bufoff;
          744         spar.convbuf = drawbuf+spar.convbufoff;
          745 
          746         if(dir == 1){
          747                 starty = 0;
          748                 endy = dy;
          749         }else{
          750                 starty = dy-1;
          751                 endy = -1;
          752         }
          753 
          754         /*
          755          * srcy, masky, and dsty are offsets from the top of their
          756          * respective Rectangles.  they need to be contained within
          757          * the rectangles, so clipy can keep them there without division.
          758           */
          759         srcy = (starty + sr.min.y - src->r.min.y)%Dy(src->r);
          760         masky = (starty + mr.min.y - mask->r.min.y)%Dy(mask->r);
          761         dsty = starty + r.min.y - dst->r.min.y;
          762 
          763         assert(0 <= srcy && srcy < Dy(src->r));
          764         assert(0 <= masky && masky < Dy(mask->r));
          765         assert(0 <= dsty && dsty < Dy(dst->r));
          766 
          767         if(drawdebug)
          768                 print("alphadraw: rdsrc=%p rdmask=%p rddst=%p calc=%p wrdst=%p\n",
          769                         rdsrc, rdmask, rddst, calc, wrdst);
          770         for(y=starty; y!=endy; y+=dir, srcy+=dir, masky+=dir, dsty+=dir){
          771                 clipy(src, &srcy);
          772                 clipy(dst, &dsty);
          773                 clipy(mask, &masky);
          774 
          775                 bsrc = rdsrc(&spar, spar.bufbase, srcy);
          776 DBG print("[");
          777                 bmask = rdmask(&mpar, mpar.bufbase, masky);
          778 DBG print("]\n");
          779                 bdst = rddst(&dpar, dpar.bufbase, dsty);
          780 DBG                dumpbuf("src", bsrc, dx);
          781 DBG                dumpbuf("mask", bmask, dx);
          782 DBG                dumpbuf("dst", bdst, dx);
          783                 bdst = calc(bdst, bsrc, bmask, dx, isgrey, op);
          784 DBG                dumpbuf("bdst", bdst, dx);
          785                 wrdst(&dpar, dpar.bytermin+dsty*dpar.bwidth, bdst);
          786         }
          787 
          788         return 1;
          789 }
          790 #undef DBG
          791 
          792 static Buffer
          793 alphacalc0(Buffer bdst, Buffer b1, Buffer b2, int dx, int grey, int op)
          794 {
          795         USED(grey);
          796         USED(op);
          797         memset(bdst.rgba, 0, dx*bdst.delta);
          798         return bdst;
          799 }
          800 
          801 /*
          802  * Do the channels in the buffers match enough
          803  * that we can do word-at-a-time operations
          804  * on the pixels?
          805  */
          806 static int
          807 chanmatch(Buffer *bdst, Buffer *bsrc)
          808 {
          809         uchar *drgb, *srgb;
          810 
          811         /*
          812          * first, r, g, b must be in the same place
          813          * in the rgba word.
          814          */
          815         drgb = (uchar*)bdst->rgba;
          816         srgb = (uchar*)bsrc->rgba;
          817         if(bdst->red - drgb != bsrc->red - srgb
          818         || bdst->blu - drgb != bsrc->blu - srgb
          819         || bdst->grn - drgb != bsrc->grn - srgb)
          820                 return 0;
          821 
          822         /*
          823          * that implies alpha is in the same place,
          824          * if it is there at all (it might be == &ones).
          825          * if the destination is &ones, we can scribble
          826          * over the rgba slot just fine.
          827          */
          828         if(bdst->alpha == &ones)
          829                 return 1;
          830 
          831         /*
          832          * if the destination is not ones but the src is,
          833          * then the simultaneous calculation will use
          834          * bogus bytes from the src's rgba.  no good.
          835          */
          836         if(bsrc->alpha == &ones)
          837                 return 0;
          838 
          839         /*
          840          * otherwise, alphas are in the same place.
          841          */
          842         return 1;
          843 }
          844 
          845 static Buffer
          846 alphacalc14(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
          847 {
          848         Buffer obdst;
          849         int fd, sadelta;
          850         int i, sa, ma, q;
          851         u32int t, t1;
          852 
          853         obdst = bdst;
          854         sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
          855         q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);
          856 
          857         for(i=0; i<dx; i++){
          858                 sa = *bsrc.alpha;
          859                 ma = *bmask.alpha;
          860                 fd = CALC11(sa, ma, t);
          861                 if(op == DoutS)
          862                         fd = 255-fd;
          863 
          864                 if(grey){
          865                         *bdst.grey = CALC11(fd, *bdst.grey, t);
          866                         bsrc.grey += bsrc.delta;
          867                         bdst.grey += bdst.delta;
          868                 }else{
          869                         if(q){
          870                                 *bdst.rgba = CALC41(fd, *bdst.rgba, t, t1);
          871                                 bsrc.rgba++;
          872                                 bdst.rgba++;
          873                                 bsrc.alpha += sadelta;
          874                                 bmask.alpha += bmask.delta;
          875                                 continue;
          876                         }
          877                         *bdst.red = CALC11(fd, *bdst.red, t);
          878                         *bdst.grn = CALC11(fd, *bdst.grn, t);
          879                         *bdst.blu = CALC11(fd, *bdst.blu, t);
          880                         bsrc.red += bsrc.delta;
          881                         bsrc.blu += bsrc.delta;
          882                         bsrc.grn += bsrc.delta;
          883                         bdst.red += bdst.delta;
          884                         bdst.blu += bdst.delta;
          885                         bdst.grn += bdst.delta;
          886                 }
          887                 if(bdst.alpha != &ones){
          888                         *bdst.alpha = CALC11(fd, *bdst.alpha, t);
          889                         bdst.alpha += bdst.delta;
          890                 }
          891                 bmask.alpha += bmask.delta;
          892                 bsrc.alpha += sadelta;
          893         }
          894         return obdst;
          895 }
          896 
          897 static Buffer
          898 alphacalc2810(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
          899 {
          900         Buffer obdst;
          901         int fs, sadelta;
          902         int i, ma, da, q;
          903         u32int t, t1;
          904 
          905         obdst = bdst;
          906         sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
          907         q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);
          908 
          909         for(i=0; i<dx; i++){
          910                 ma = *bmask.alpha;
          911                 da = *bdst.alpha;
          912                 if(op == SoutD)
          913                         da = 255-da;
          914                 fs = ma;
          915                 if(op != S)
          916                         fs = CALC11(fs, da, t);
          917 
          918                 if(grey){
          919                         *bdst.grey = CALC11(fs, *bsrc.grey, t);
          920                         bsrc.grey += bsrc.delta;
          921                         bdst.grey += bdst.delta;
          922                 }else{
          923                         if(q){
          924                                 *bdst.rgba = CALC41(fs, *bsrc.rgba, t, t1);
          925                                 bsrc.rgba++;
          926                                 bdst.rgba++;
          927                                 bmask.alpha += bmask.delta;
          928                                 bdst.alpha += bdst.delta;
          929                                 continue;
          930                         }
          931                         *bdst.red = CALC11(fs, *bsrc.red, t);
          932                         *bdst.grn = CALC11(fs, *bsrc.grn, t);
          933                         *bdst.blu = CALC11(fs, *bsrc.blu, t);
          934                         bsrc.red += bsrc.delta;
          935                         bsrc.blu += bsrc.delta;
          936                         bsrc.grn += bsrc.delta;
          937                         bdst.red += bdst.delta;
          938                         bdst.blu += bdst.delta;
          939                         bdst.grn += bdst.delta;
          940                 }
          941                 if(bdst.alpha != &ones){
          942                         *bdst.alpha = CALC11(fs, *bsrc.alpha, t);
          943                         bdst.alpha += bdst.delta;
          944                 }
          945                 bmask.alpha += bmask.delta;
          946                 bsrc.alpha += sadelta;
          947         }
          948         return obdst;
          949 }
          950 
          951 static Buffer
          952 alphacalc3679(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
          953 {
          954         Buffer obdst;
          955         int fs, fd, sadelta;
          956         int i, sa, ma, da, q;
          957         u32int t, t1;
          958 
          959         obdst = bdst;
          960         sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
          961         q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);
          962 
          963         for(i=0; i<dx; i++){
          964                 sa = *bsrc.alpha;
          965                 ma = *bmask.alpha;
          966                 da = *bdst.alpha;
          967                 if(op == SatopD)
          968                         fs = CALC11(ma, da, t);
          969                 else
          970                         fs = CALC11(ma, 255-da, t);
          971                 if(op == DoverS)
          972                         fd = 255;
          973                 else{
          974                         fd = CALC11(sa, ma, t);
          975                         if(op != DatopS)
          976                                 fd = 255-fd;
          977                 }
          978 
          979                 if(grey){
          980                         *bdst.grey = CALC12(fs, *bsrc.grey, fd, *bdst.grey, t);
          981                         bsrc.grey += bsrc.delta;
          982                         bdst.grey += bdst.delta;
          983                 }else{
          984                         if(q){
          985                                 *bdst.rgba = CALC42(fs, *bsrc.rgba, fd, *bdst.rgba, t, t1);
          986                                 bsrc.rgba++;
          987                                 bdst.rgba++;
          988                                 bsrc.alpha += sadelta;
          989                                 bmask.alpha += bmask.delta;
          990                                 bdst.alpha += bdst.delta;
          991                                 continue;
          992                         }
          993                         *bdst.red = CALC12(fs, *bsrc.red, fd, *bdst.red, t);
          994                         *bdst.grn = CALC12(fs, *bsrc.grn, fd, *bdst.grn, t);
          995                         *bdst.blu = CALC12(fs, *bsrc.blu, fd, *bdst.blu, t);
          996                         bsrc.red += bsrc.delta;
          997                         bsrc.blu += bsrc.delta;
          998                         bsrc.grn += bsrc.delta;
          999                         bdst.red += bdst.delta;
         1000                         bdst.blu += bdst.delta;
         1001                         bdst.grn += bdst.delta;
         1002                 }
         1003                 if(bdst.alpha != &ones){
         1004                         *bdst.alpha = CALC12(fs, sa, fd, da, t);
         1005                         bdst.alpha += bdst.delta;
         1006                 }
         1007                 bmask.alpha += bmask.delta;
         1008                 bsrc.alpha += sadelta;
         1009         }
         1010         return obdst;
         1011 }
         1012 
         1013 static Buffer
         1014 alphacalc5(Buffer bdst, Buffer b1, Buffer b2, int dx, int grey, int op)
         1015 {
         1016         USED(dx);
         1017         USED(grey);
         1018         USED(op);
         1019         return bdst;
         1020 }
         1021 
         1022 static Buffer
         1023 alphacalc11(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
         1024 {
         1025         Buffer obdst;
         1026         int fd, sadelta;
         1027         int i, sa, ma, q;
         1028         u32int t, t1;
         1029 
         1030         USED(op);
         1031         obdst = bdst;
         1032         sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
         1033         q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);
         1034 
         1035         for(i=0; i<dx; i++){
         1036                 sa = *bsrc.alpha;
         1037                 ma = *bmask.alpha;
         1038                 fd = 255-CALC11(sa, ma, t);
         1039 
         1040                 if(grey){
         1041                         *bdst.grey = CALC12(ma, *bsrc.grey, fd, *bdst.grey, t);
         1042                         bsrc.grey += bsrc.delta;
         1043                         bdst.grey += bdst.delta;
         1044                 }else{
         1045                         if(q){
         1046                                 *bdst.rgba = CALC42(ma, *bsrc.rgba, fd, *bdst.rgba, t, t1);
         1047                                 bsrc.rgba++;
         1048                                 bdst.rgba++;
         1049                                 bsrc.alpha += sadelta;
         1050                                 bmask.alpha += bmask.delta;
         1051                                 continue;
         1052                         }
         1053                         *bdst.red = CALC12(ma, *bsrc.red, fd, *bdst.red, t);
         1054                         *bdst.grn = CALC12(ma, *bsrc.grn, fd, *bdst.grn, t);
         1055                         *bdst.blu = CALC12(ma, *bsrc.blu, fd, *bdst.blu, t);
         1056                         bsrc.red += bsrc.delta;
         1057                         bsrc.blu += bsrc.delta;
         1058                         bsrc.grn += bsrc.delta;
         1059                         bdst.red += bdst.delta;
         1060                         bdst.blu += bdst.delta;
         1061                         bdst.grn += bdst.delta;
         1062                 }
         1063                 if(bdst.alpha != &ones){
         1064                         *bdst.alpha = CALC12(ma, sa, fd, *bdst.alpha, t);
         1065                         bdst.alpha += bdst.delta;
         1066                 }
         1067                 bmask.alpha += bmask.delta;
         1068                 bsrc.alpha += sadelta;
         1069         }
         1070         return obdst;
         1071 }
         1072 
         1073 /*
         1074 not used yet
         1075 source and mask alpha 1
         1076 static Buffer
         1077 alphacalcS0(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
         1078 {
         1079         Buffer obdst;
         1080         int i;
         1081 
         1082         USED(op);
         1083         obdst = bdst;
         1084         if(bsrc.delta == bdst.delta){
         1085                 memmove(bdst.rgba, bsrc.rgba, dx*bdst.delta);
         1086                 return obdst;
         1087         }
         1088         for(i=0; i<dx; i++){
         1089                 if(grey){
         1090                         *bdst.grey = *bsrc.grey;
         1091                         bsrc.grey += bsrc.delta;
         1092                         bdst.grey += bdst.delta;
         1093                 }else{
         1094                         *bdst.red = *bsrc.red;
         1095                         *bdst.grn = *bsrc.grn;
         1096                         *bdst.blu = *bsrc.blu;
         1097                         bsrc.red += bsrc.delta;
         1098                         bsrc.blu += bsrc.delta;
         1099                         bsrc.grn += bsrc.delta;
         1100                         bdst.red += bdst.delta;
         1101                         bdst.blu += bdst.delta;
         1102                         bdst.grn += bdst.delta;
         1103                 }
         1104                 if(bdst.alpha != &ones){
         1105                         *bdst.alpha = 255;
         1106                         bdst.alpha += bdst.delta;
         1107                 }
         1108         }
         1109         return obdst;
         1110 }
         1111 */
         1112 
         1113 /* source alpha 1 */
         1114 static Buffer
         1115 alphacalcS(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
         1116 {
         1117         Buffer obdst;
         1118         int fd;
         1119         int i, ma;
         1120         u32int t;
         1121 
         1122         USED(op);
         1123         obdst = bdst;
         1124 
         1125         for(i=0; i<dx; i++){
         1126                 ma = *bmask.alpha;
         1127                 fd = 255-ma;
         1128 
         1129                 if(grey){
         1130                         *bdst.grey = CALC12(ma, *bsrc.grey, fd, *bdst.grey, t);
         1131                         bsrc.grey += bsrc.delta;
         1132                         bdst.grey += bdst.delta;
         1133                 }else{
         1134                         *bdst.red = CALC12(ma, *bsrc.red, fd, *bdst.red, t);
         1135                         *bdst.grn = CALC12(ma, *bsrc.grn, fd, *bdst.grn, t);
         1136                         *bdst.blu = CALC12(ma, *bsrc.blu, fd, *bdst.blu, t);
         1137                         bsrc.red += bsrc.delta;
         1138                         bsrc.blu += bsrc.delta;
         1139                         bsrc.grn += bsrc.delta;
         1140                         bdst.red += bdst.delta;
         1141                         bdst.blu += bdst.delta;
         1142                         bdst.grn += bdst.delta;
         1143                 }
         1144                 if(bdst.alpha != &ones){
         1145                         *bdst.alpha = ma+CALC11(fd, *bdst.alpha, t);
         1146                         bdst.alpha += bdst.delta;
         1147                 }
         1148                 bmask.alpha += bmask.delta;
         1149         }
         1150         return obdst;
         1151 }
         1152 
         1153 static Buffer
         1154 boolcalc14(Buffer bdst, Buffer b1, Buffer bmask, int dx, int grey, int op)
         1155 {
         1156         Buffer obdst;
         1157         int i, ma, zero;
         1158 
         1159         obdst = bdst;
         1160 
         1161         for(i=0; i<dx; i++){
         1162                 ma = *bmask.alpha;
         1163                 zero = ma ? op == DoutS : op == DinS;
         1164 
         1165                 if(grey){
         1166                         if(zero)
         1167                                 *bdst.grey = 0;
         1168                         bdst.grey += bdst.delta;
         1169                 }else{
         1170                         if(zero)
         1171                                 *bdst.red = *bdst.grn = *bdst.blu = 0;
         1172                         bdst.red += bdst.delta;
         1173                         bdst.blu += bdst.delta;
         1174                         bdst.grn += bdst.delta;
         1175                 }
         1176                 bmask.alpha += bmask.delta;
         1177                 if(bdst.alpha != &ones){
         1178                         if(zero)
         1179                                 *bdst.alpha = 0;
         1180                         bdst.alpha += bdst.delta;
         1181                 }
         1182         }
         1183         return obdst;
         1184 }
         1185 
         1186 static Buffer
         1187 boolcalc236789(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
         1188 {
         1189         Buffer obdst;
         1190         int fs, fd;
         1191         int i, ma, da, zero;
         1192         u32int t;
         1193 
         1194         obdst = bdst;
         1195         zero = !(op&1);
         1196 
         1197         for(i=0; i<dx; i++){
         1198                 ma = *bmask.alpha;
         1199                 da = *bdst.alpha;
         1200                 fs = da;
         1201                 if(op&2)
         1202                         fs = 255-da;
         1203                 fd = 0;
         1204                 if(op&4)
         1205                         fd = 255;
         1206 
         1207                 if(grey){
         1208                         if(ma)
         1209                                 *bdst.grey = CALC12(fs, *bsrc.grey, fd, *bdst.grey, t);
         1210                         else if(zero)
         1211                                 *bdst.grey = 0;
         1212                         bsrc.grey += bsrc.delta;
         1213                         bdst.grey += bdst.delta;
         1214                 }else{
         1215                         if(ma){
         1216                                 *bdst.red = CALC12(fs, *bsrc.red, fd, *bdst.red, t);
         1217                                 *bdst.grn = CALC12(fs, *bsrc.grn, fd, *bdst.grn, t);
         1218                                 *bdst.blu = CALC12(fs, *bsrc.blu, fd, *bdst.blu, t);
         1219                         }
         1220                         else if(zero)
         1221                                 *bdst.red = *bdst.grn = *bdst.blu = 0;
         1222                         bsrc.red += bsrc.delta;
         1223                         bsrc.blu += bsrc.delta;
         1224                         bsrc.grn += bsrc.delta;
         1225                         bdst.red += bdst.delta;
         1226                         bdst.blu += bdst.delta;
         1227                         bdst.grn += bdst.delta;
         1228                 }
         1229                 bmask.alpha += bmask.delta;
         1230                 if(bdst.alpha != &ones){
         1231                         if(ma)
         1232                                 *bdst.alpha = fs+CALC11(fd, da, t);
         1233                         else if(zero)
         1234                                 *bdst.alpha = 0;
         1235                         bdst.alpha += bdst.delta;
         1236                 }
         1237         }
         1238         return obdst;
         1239 }
         1240 
         1241 static Buffer
         1242 boolcalc1011(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
         1243 {
         1244         Buffer obdst;
         1245         int i, ma, zero;
         1246 
         1247         obdst = bdst;
         1248         zero = !(op&1);
         1249 
         1250         for(i=0; i<dx; i++){
         1251                 ma = *bmask.alpha;
         1252 
         1253                 if(grey){
         1254                         if(ma)
         1255                                 *bdst.grey = *bsrc.grey;
         1256                         else if(zero)
         1257                                 *bdst.grey = 0;
         1258                         bsrc.grey += bsrc.delta;
         1259                         bdst.grey += bdst.delta;
         1260                 }else{
         1261                         if(ma){
         1262                                 *bdst.red = *bsrc.red;
         1263                                 *bdst.grn = *bsrc.grn;
         1264                                 *bdst.blu = *bsrc.blu;
         1265                         }
         1266                         else if(zero)
         1267                                 *bdst.red = *bdst.grn = *bdst.blu = 0;
         1268                         bsrc.red += bsrc.delta;
         1269                         bsrc.blu += bsrc.delta;
         1270                         bsrc.grn += bsrc.delta;
         1271                         bdst.red += bdst.delta;
         1272                         bdst.blu += bdst.delta;
         1273                         bdst.grn += bdst.delta;
         1274                 }
         1275                 bmask.alpha += bmask.delta;
         1276                 if(bdst.alpha != &ones){
         1277                         if(ma)
         1278                                 *bdst.alpha = 255;
         1279                         else if(zero)
         1280                                 *bdst.alpha = 0;
         1281                         bdst.alpha += bdst.delta;
         1282                 }
         1283         }
         1284         return obdst;
         1285 }
         1286 /*
         1287  * Replicated cached scan line read.  Call the function listed in the Param,
         1288  * but cache the result so that for replicated images we only do the work once.
         1289  */
         1290 static Buffer
         1291 replread(Param *p, uchar *s, int y)
         1292 {
         1293         Buffer *b;
         1294 
         1295         USED(s);
         1296         b = &p->bcache[y];
         1297         if((p->bfilled & (1<<y)) == 0){
         1298                 p->bfilled |= 1<<y;
         1299                 *b = p->replcall(p, p->bufbase+y*p->bufdelta, y);
         1300         }
         1301         return *b;
         1302 }
         1303 
         1304 /*
         1305  * Alpha reading function that simply relabels the grey pointer.
         1306  */
         1307 static Buffer
         1308 greymaskread(Param *p, uchar *buf, int y)
         1309 {
         1310         Buffer b;
         1311 
         1312         b = p->greymaskcall(p, buf, y);
         1313         b.alpha = b.grey;
         1314         return b;
         1315 }
         1316 
         1317 #define DBG if(0)
         1318 static Buffer
         1319 readnbit(Param *p, uchar *buf, int y)
         1320 {
         1321         Buffer b;
         1322         Memimage *img;
         1323         uchar *repl, *r, *w, *ow, bits;
         1324         int i, n, sh, depth, x, dx, npack, nbits;
         1325 
         1326         memset(&b, 0, sizeof b);
         1327         b.rgba = (u32int*)buf;
         1328         b.grey = w = buf;
         1329         b.red = b.blu = b.grn = w;
         1330         b.alpha = &ones;
         1331         b.delta = 1;
         1332 
         1333         dx = p->dx;
         1334         img = p->img;
         1335         depth = img->depth;
         1336         repl = &replbit[depth][0];
         1337         npack = 8/depth;
         1338         sh = 8-depth;
         1339 
         1340         /* copy from p->r.min.x until end of repl rectangle */
         1341         x = p->r.min.x;
         1342         n = dx;
         1343         if(n > p->img->r.max.x - x)
         1344                 n = p->img->r.max.x - x;
         1345 
         1346         r = p->bytermin + y*p->bwidth;
         1347 DBG print("readnbit dx %d %p=%p+%d*%d, *r=%d fetch %d ", dx, r, p->bytermin, y, p->bwidth, *r, n);
         1348         bits = *r++;
         1349         nbits = 8;
         1350         if(i=x&(npack-1)){
         1351 DBG print("throwaway %d...", i);
         1352                 bits <<= depth*i;
         1353                 nbits -= depth*i;
         1354         }
         1355         for(i=0; i<n; i++){
         1356                 if(nbits == 0){
         1357 DBG print("(%.2ux)...", *r);
         1358                         bits = *r++;
         1359                         nbits = 8;
         1360                 }
         1361                 *w++ = repl[bits>>sh];
         1362 DBG print("bit %x...", repl[bits>>sh]);
         1363                 bits <<= depth;
         1364                 nbits -= depth;
         1365         }
         1366         dx -= n;
         1367         if(dx == 0)
         1368                 return b;
         1369 
         1370         assert(x+i == p->img->r.max.x);
         1371 
         1372         /* copy from beginning of repl rectangle until where we were before. */
         1373         x = p->img->r.min.x;
         1374         n = dx;
         1375         if(n > p->r.min.x - x)
         1376                 n = p->r.min.x - x;
         1377 
         1378         r = p->bytey0s + y*p->bwidth;
         1379 DBG print("x=%d r=%p...", x, r);
         1380         bits = *r++;
         1381         nbits = 8;
         1382         if(i=x&(npack-1)){
         1383                 bits <<= depth*i;
         1384                 nbits -= depth*i;
         1385         }
         1386 DBG print("nbits=%d...", nbits);
         1387         for(i=0; i<n; i++){
         1388                 if(nbits == 0){
         1389                         bits = *r++;
         1390                         nbits = 8;
         1391                 }
         1392                 *w++ = repl[bits>>sh];
         1393 DBG print("bit %x...", repl[bits>>sh]);
         1394                 bits <<= depth;
         1395                 nbits -= depth;
         1396 DBG print("bits %x nbits %d...", bits, nbits);
         1397         }
         1398         dx -= n;
         1399         if(dx == 0)
         1400                 return b;
         1401 
         1402         assert(dx > 0);
         1403         /* now we have exactly one full scan line: just replicate the buffer itself until we are done */
         1404         ow = buf;
         1405         while(dx--)
         1406                 *w++ = *ow++;
         1407 
         1408         return b;
         1409 }
         1410 #undef DBG
         1411 
         1412 #define DBG if(0)
         1413 static void
         1414 writenbit(Param *p, uchar *w, Buffer src)
         1415 {
         1416         uchar *r;
         1417         u32int bits;
         1418         int i, sh, depth, npack, nbits, x, ex;
         1419 
         1420         assert(src.grey != nil && src.delta == 1);
         1421 
         1422         x = p->r.min.x;
         1423         ex = x+p->dx;
         1424         depth = p->img->depth;
         1425         npack = 8/depth;
         1426 
         1427         i=x&(npack-1);
         1428         bits = i ? (*w >> (8-depth*i)) : 0;
         1429         nbits = depth*i;
         1430         sh = 8-depth;
         1431         r = src.grey;
         1432 
         1433         for(; x<ex; x++){
         1434                 bits <<= depth;
         1435 DBG print(" %x", *r);
         1436                 bits |= (*r++ >> sh);
         1437                 nbits += depth;
         1438                 if(nbits == 8){
         1439                         *w++ = bits;
         1440                         nbits = 0;
         1441                 }
         1442         }
         1443 
         1444         if(nbits){
         1445                 sh = 8-nbits;
         1446                 bits <<= sh;
         1447                 bits |= *w & ((1<<sh)-1);
         1448                 *w = bits;
         1449         }
         1450 DBG print("\n");
         1451         return;
         1452 }
         1453 #undef DBG
         1454 
         1455 static Buffer
         1456 readcmap(Param *p, uchar *buf, int y)
         1457 {
         1458         Buffer b;
         1459         int a, convgrey, copyalpha, dx, i, m;
         1460         uchar *q, *cmap, *begin, *end, *r, *w;
         1461 
         1462         memset(&b, 0, sizeof b);
         1463         begin = p->bytey0s + y*p->bwidth;
         1464         r = p->bytermin + y*p->bwidth;
         1465         end = p->bytey0e + y*p->bwidth;
         1466         cmap = p->img->cmap->cmap2rgb;
         1467         convgrey = p->convgrey;
         1468         copyalpha = (p->img->flags&Falpha) ? 1 : 0;
         1469 
         1470         w = buf;
         1471         dx = p->dx;
         1472         if(copyalpha){
         1473                 b.alpha = buf++;
         1474                 a = p->img->shift[CAlpha]/8;
         1475                 m = p->img->shift[CMap]/8;
         1476                 for(i=0; i<dx; i++){
         1477                         *w++ = r[a];
         1478                         q = cmap+r[m]*3;
         1479                         r += 2;
         1480                         if(r == end)
         1481                                 r = begin;
         1482                         if(convgrey){
         1483                                 *w++ = RGB2K(q[0], q[1], q[2]);
         1484                         }else{
         1485                                 *w++ = q[2];        /* blue */
         1486                                 *w++ = q[1];        /* green */
         1487                                 *w++ = q[0];        /* red */
         1488                         }
         1489                 }
         1490         }else{
         1491                 b.alpha = &ones;
         1492                 for(i=0; i<dx; i++){
         1493                         q = cmap+*r++*3;
         1494                         if(r == end)
         1495                                 r = begin;
         1496                         if(convgrey){
         1497                                 *w++ = RGB2K(q[0], q[1], q[2]);
         1498                         }else{
         1499                                 *w++ = q[2];        /* blue */
         1500                                 *w++ = q[1];        /* green */
         1501                                 *w++ = q[0];        /* red */
         1502                         }
         1503                 }
         1504         }
         1505 
         1506         b.rgba = (u32int*)(buf-copyalpha);
         1507 
         1508         if(convgrey){
         1509                 b.grey = buf;
         1510                 b.red = b.blu = b.grn = buf;
         1511                 b.delta = 1+copyalpha;
         1512         }else{
         1513                 b.blu = buf;
         1514                 b.grn = buf+1;
         1515                 b.red = buf+2;
         1516                 b.grey = nil;
         1517                 b.delta = 3+copyalpha;
         1518         }
         1519         return b;
         1520 }
         1521 
         1522 static void
         1523 writecmap(Param *p, uchar *w, Buffer src)
         1524 {
         1525         uchar *cmap, *red, *grn, *blu;
         1526         int i, dx, delta;
         1527 
         1528         cmap = p->img->cmap->rgb2cmap;
         1529 
         1530         delta = src.delta;
         1531         red= src.red;
         1532         grn = src.grn;
         1533         blu = src.blu;
         1534 
         1535         dx = p->dx;
         1536         for(i=0; i<dx; i++, red+=delta, grn+=delta, blu+=delta)
         1537                 *w++ = cmap[(*red>>4)*256+(*grn>>4)*16+(*blu>>4)];
         1538 }
         1539 
         1540 #define DBG if(drawdebug)
         1541 static Buffer
         1542 readbyte(Param *p, uchar *buf, int y)
         1543 {
         1544         Buffer b;
         1545         Memimage *img;
         1546         int dx, isgrey, convgrey, alphaonly, copyalpha, i, nb;
         1547         uchar *begin, *end, *r, *w, *rrepl, *grepl, *brepl, *arepl, *krepl;
         1548         uchar ured, ugrn, ublu;
         1549         u32int u;
         1550 
         1551         img = p->img;
         1552         begin = p->bytey0s + y*p->bwidth;
         1553         r = p->bytermin + y*p->bwidth;
         1554         end = p->bytey0e + y*p->bwidth;
         1555 
         1556         w = buf;
         1557         dx = p->dx;
         1558         nb = img->depth/8;
         1559 
         1560         convgrey = p->convgrey;        /* convert rgb to grey */
         1561         isgrey = img->flags&Fgrey;
         1562         alphaonly = p->alphaonly;
         1563         copyalpha = (img->flags&Falpha) ? 1 : 0;
         1564 
         1565         /* if we can, avoid processing everything */
         1566         if(!(img->flags&Frepl) && !convgrey && (img->flags&Fbytes)){
         1567                 memset(&b, 0, sizeof b);
         1568                 if(p->needbuf){
         1569                         memmove(buf, r, dx*nb);
         1570                         r = buf;
         1571                 }
         1572                 b.rgba = (u32int*)r;
         1573                 if(copyalpha)
         1574                         b.alpha = r+img->shift[CAlpha]/8;
         1575                 else
         1576                         b.alpha = &ones;
         1577                 if(isgrey){
         1578                         b.grey = r+img->shift[CGrey]/8;
         1579                         b.red = b.grn = b.blu = b.grey;
         1580                 }else{
         1581                         b.red = r+img->shift[CRed]/8;
         1582                         b.grn = r+img->shift[CGreen]/8;
         1583                         b.blu = r+img->shift[CBlue]/8;
         1584                 }
         1585                 b.delta = nb;
         1586                 return b;
         1587         }
         1588 
         1589         rrepl = replbit[img->nbits[CRed]];
         1590         grepl = replbit[img->nbits[CGreen]];
         1591         brepl = replbit[img->nbits[CBlue]];
         1592         arepl = replbit[img->nbits[CAlpha]];
         1593         krepl = replbit[img->nbits[CGrey]];
         1594 
         1595         for(i=0; i<dx; i++){
         1596                 u = r[0] | (r[1]<<8) | (r[2]<<16) | (r[3]<<24);
         1597                 if(copyalpha)
         1598                         *w++ = arepl[(u>>img->shift[CAlpha]) & img->mask[CAlpha]];
         1599 
         1600                 if(isgrey)
         1601                         *w++ = krepl[(u >> img->shift[CGrey]) & img->mask[CGrey]];
         1602                 else if(!alphaonly){
         1603                         ured = rrepl[(u >> img->shift[CRed]) & img->mask[CRed]];
         1604                         ugrn = grepl[(u >> img->shift[CGreen]) & img->mask[CGreen]];
         1605                         ublu = brepl[(u >> img->shift[CBlue]) & img->mask[CBlue]];
         1606                         if(convgrey){
         1607                                 *w++ = RGB2K(ured, ugrn, ublu);
         1608                         }else{
         1609                                 *w++ = brepl[(u >> img->shift[CBlue]) & img->mask[CBlue]];
         1610                                 *w++ = grepl[(u >> img->shift[CGreen]) & img->mask[CGreen]];
         1611                                 *w++ = rrepl[(u >> img->shift[CRed]) & img->mask[CRed]];
         1612                         }
         1613                 }
         1614                 r += nb;
         1615                 if(r == end)
         1616                         r = begin;
         1617         }
         1618 
         1619         b.alpha = copyalpha ? buf : &ones;
         1620         b.rgba = (u32int*)buf;
         1621         if(alphaonly){
         1622                 b.red = b.grn = b.blu = b.grey = nil;
         1623                 if(!copyalpha)
         1624                         b.rgba = nil;
         1625                 b.delta = 1;
         1626         }else if(isgrey || convgrey){
         1627                 b.grey = buf+copyalpha;
         1628                 b.red = b.grn = b.blu = buf+copyalpha;
         1629                 b.delta = copyalpha+1;
         1630         }else{
         1631                 b.blu = buf+copyalpha;
         1632                 b.grn = buf+copyalpha+1;
         1633                 b.grey = nil;
         1634                 b.red = buf+copyalpha+2;
         1635                 b.delta = copyalpha+3;
         1636         }
         1637         return b;
         1638 }
         1639 #undef DBG
         1640 
         1641 #define DBG if(drawdebug)
         1642 static void
         1643 writebyte(Param *p, uchar *w, Buffer src)
         1644 {
         1645         Memimage *img;
         1646         int i, isalpha, isgrey, nb, delta, dx, adelta;
         1647         uchar ff, *red, *grn, *blu, *grey, *alpha;
         1648         u32int u, mask;
         1649 
         1650         img = p->img;
         1651 
         1652         red = src.red;
         1653         grn = src.grn;
         1654         blu = src.blu;
         1655         alpha = src.alpha;
         1656         delta = src.delta;
         1657         grey = src.grey;
         1658         dx = p->dx;
         1659 
         1660         nb = img->depth/8;
         1661         mask = (nb==4) ? 0 : ~((1<<img->depth)-1);
         1662 
         1663         isalpha = img->flags&Falpha;
         1664         isgrey = img->flags&Fgrey;
         1665         adelta = src.delta;
         1666 
         1667         if(isalpha && (alpha == nil || alpha == &ones)){
         1668                 ff = 0xFF;
         1669                 alpha = &ff;
         1670                 adelta = 0;
         1671         }
         1672 
         1673         for(i=0; i<dx; i++){
         1674                 u = w[0] | (w[1]<<8) | (w[2]<<16) | (w[3]<<24);
         1675 DBG print("u %.8lux...", u);
         1676                 u &= mask;
         1677 DBG print("&mask %.8lux...", u);
         1678                 if(isgrey){
         1679                         u |= ((*grey >> (8-img->nbits[CGrey])) & img->mask[CGrey]) << img->shift[CGrey];
         1680 DBG print("|grey %.8lux...", u);
         1681                         grey += delta;
         1682                 }else{
         1683                         u |= ((*red >> (8-img->nbits[CRed])) & img->mask[CRed]) << img->shift[CRed];
         1684                         u |= ((*grn >> (8-img->nbits[CGreen])) & img->mask[CGreen]) << img->shift[CGreen];
         1685                         u |= ((*blu >> (8-img->nbits[CBlue])) & img->mask[CBlue]) << img->shift[CBlue];
         1686                         red += delta;
         1687                         grn += delta;
         1688                         blu += delta;
         1689 DBG print("|rgb %.8lux...", u);
         1690                 }
         1691 
         1692                 if(isalpha){
         1693                         u |= ((*alpha >> (8-img->nbits[CAlpha])) & img->mask[CAlpha]) << img->shift[CAlpha];
         1694                         alpha += adelta;
         1695 DBG print("|alpha %.8lux...", u);
         1696                 }
         1697 
         1698                 w[0] = u;
         1699                 w[1] = u>>8;
         1700                 w[2] = u>>16;
         1701                 w[3] = u>>24;
         1702 DBG print("write back %.8lux...", u);
         1703                 w += nb;
         1704         }
         1705 }
         1706 #undef DBG
         1707 
         1708 static Readfn*
         1709 readfn(Memimage *img)
         1710 {
         1711         if(img->depth < 8)
         1712                 return readnbit;
         1713         if(img->nbits[CMap] == 8)
         1714                 return readcmap;
         1715         return readbyte;
         1716 }
         1717 
         1718 static Readfn*
         1719 readalphafn(Memimage *m)
         1720 {
         1721         USED(m);
         1722         return readbyte;
         1723 }
         1724 
         1725 static Writefn*
         1726 writefn(Memimage *img)
         1727 {
         1728         if(img->depth < 8)
         1729                 return writenbit;
         1730         if(img->chan == CMAP8)
         1731                 return writecmap;
         1732         return writebyte;
         1733 }
         1734 
         1735 static void
         1736 nullwrite(Param *p, uchar *s, Buffer b)
         1737 {
         1738         USED(p);
         1739         USED(s);
         1740 }
         1741 
         1742 static Buffer
         1743 readptr(Param *p, uchar *s, int y)
         1744 {
         1745         Buffer b;
         1746         uchar *q;
         1747 
         1748         USED(s);
         1749         memset(&b, 0, sizeof b);
         1750         q = p->bytermin + y*p->bwidth;
         1751         b.red = q;        /* ptr to data */
         1752         b.grn = b.blu = b.grey = b.alpha = nil;
         1753         b.rgba = (u32int*)q;
         1754         b.delta = p->img->depth/8;
         1755         return b;
         1756 }
         1757 
         1758 static Buffer
         1759 boolmemmove(Buffer bdst, Buffer bsrc, Buffer b1, int dx, int i, int o)
         1760 {
         1761         USED(i);
         1762         USED(o);
         1763         memmove(bdst.red, bsrc.red, dx*bdst.delta);
         1764         return bdst;
         1765 }
         1766 
         1767 static Buffer
         1768 boolcopy8(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
         1769 {
         1770         uchar *m, *r, *w, *ew;
         1771 
         1772         USED(i);
         1773         USED(o);
         1774         m = bmask.grey;
         1775         w = bdst.red;
         1776         r = bsrc.red;
         1777         ew = w+dx;
         1778         for(; w < ew; w++,r++)
         1779                 if(*m++)
         1780                         *w = *r;
         1781         return bdst;        /* not used */
         1782 }
         1783 
         1784 static Buffer
         1785 boolcopy16(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
         1786 {
         1787         uchar *m;
         1788         ushort *r, *w, *ew;
         1789 
         1790         USED(i);
         1791         USED(o);
         1792         m = bmask.grey;
         1793         w = (ushort*)bdst.red;
         1794         r = (ushort*)bsrc.red;
         1795         ew = w+dx;
         1796         for(; w < ew; w++,r++)
         1797                 if(*m++)
         1798                         *w = *r;
         1799         return bdst;        /* not used */
         1800 }
         1801 
         1802 static Buffer
         1803 boolcopy24(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
         1804 {
         1805         uchar *m;
         1806         uchar *r, *w, *ew;
         1807 
         1808         USED(i);
         1809         USED(o);
         1810         m = bmask.grey;
         1811         w = bdst.red;
         1812         r = bsrc.red;
         1813         ew = w+dx*3;
         1814         while(w < ew){
         1815                 if(*m++){
         1816                         *w++ = *r++;
         1817                         *w++ = *r++;
         1818                         *w++ = *r++;
         1819                 }else{
         1820                         w += 3;
         1821                         r += 3;
         1822                 }
         1823         }
         1824         return bdst;        /* not used */
         1825 }
         1826 
         1827 static Buffer
         1828 boolcopy32(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
         1829 {
         1830         uchar *m;
         1831         u32int *r, *w, *ew;
         1832 
         1833         USED(i);
         1834         USED(o);
         1835         m = bmask.grey;
         1836         w = (u32int*)bdst.red;
         1837         r = (u32int*)bsrc.red;
         1838         ew = w+dx;
         1839         for(; w < ew; w++,r++)
         1840                 if(*m++)
         1841                         *w = *r;
         1842         return bdst;        /* not used */
         1843 }
         1844 
         1845 static Buffer
         1846 genconv(Param *p, uchar *buf, int y)
         1847 {
         1848         Buffer b;
         1849         int nb;
         1850         uchar *r, *w, *ew;
         1851 
         1852         /* read from source into RGB format in convbuf */
         1853         b = p->convreadcall(p, p->convbuf, y);
         1854 
         1855         /* write RGB format into dst format in buf */
         1856         p->convwritecall(p->convdpar, buf, b);
         1857 
         1858         if(p->convdx){
         1859                 nb = p->convdpar->img->depth/8;
         1860                 r = buf;
         1861                 w = buf+nb*p->dx;
         1862                 ew = buf+nb*p->convdx;
         1863                 while(w<ew)
         1864                         *w++ = *r++;
         1865         }
         1866 
         1867         b.red = buf;
         1868         b.blu = b.grn = b.grey = b.alpha = nil;
         1869         b.rgba = (u32int*)buf;
         1870         b.delta = 0;
         1871 
         1872         return b;
         1873 }
         1874 
         1875 static Readfn*
         1876 convfn(Memimage *dst, Param *dpar, Memimage *src, Param *spar)
         1877 {
         1878         if(dst->chan == src->chan && !(src->flags&Frepl)){
         1879 /*if(drawdebug) iprint("readptr..."); */
         1880                 return readptr;
         1881         }
         1882 
         1883         if(dst->chan==CMAP8 && (src->chan==GREY1||src->chan==GREY2||src->chan==GREY4)){
         1884                 /* cheat because we know the replicated value is exactly the color map entry. */
         1885 /*if(drawdebug) iprint("Readnbit..."); */
         1886                 return readnbit;
         1887         }
         1888 
         1889         spar->convreadcall = readfn(src);
         1890         spar->convwritecall = writefn(dst);
         1891         spar->convdpar = dpar;
         1892 
         1893         /* allocate a conversion buffer */
         1894         spar->convbufoff = ndrawbuf;
         1895         ndrawbuf += spar->dx*4;
         1896 
         1897         if(spar->dx > Dx(spar->img->r)){
         1898                 spar->convdx = spar->dx;
         1899                 spar->dx = Dx(spar->img->r);
         1900         }
         1901 
         1902 /*if(drawdebug) iprint("genconv..."); */
         1903         return genconv;
         1904 }
         1905 
         1906 /*
         1907  * Do NOT call this directly.  pixelbits is a wrapper
         1908  * around this that fetches the bits from the X server
         1909  * when necessary.
         1910  */
         1911 u32int
         1912 _pixelbits(Memimage *i, Point pt)
         1913 {
         1914         uchar *p;
         1915         u32int val;
         1916         int off, bpp, npack;
         1917 
         1918         val = 0;
         1919         p = byteaddr(i, pt);
         1920         switch(bpp=i->depth){
         1921         case 1:
         1922         case 2:
         1923         case 4:
         1924                 npack = 8/bpp;
         1925                 off = pt.x%npack;
         1926                 val = p[0] >> bpp*(npack-1-off);
         1927                 val &= (1<<bpp)-1;
         1928                 break;
         1929         case 8:
         1930                 val = p[0];
         1931                 break;
         1932         case 16:
         1933                 val = p[0]|(p[1]<<8);
         1934                 break;
         1935         case 24:
         1936                 val = p[0]|(p[1]<<8)|(p[2]<<16);
         1937                 break;
         1938         case 32:
         1939                 val = p[0]|(p[1]<<8)|(p[2]<<16)|(p[3]<<24);
         1940                 break;
         1941         }
         1942         while(bpp<32){
         1943                 val |= val<<bpp;
         1944                 bpp *= 2;
         1945         }
         1946         return val;
         1947 }
         1948 
         1949 static Calcfn*
         1950 boolcopyfn(Memimage *img, Memimage *mask)
         1951 {
         1952         if(mask->flags&Frepl && Dx(mask->r)==1 && Dy(mask->r)==1 && pixelbits(mask, mask->r.min)==~0)
         1953                 return boolmemmove;
         1954 
         1955         switch(img->depth){
         1956         case 8:
         1957                 return boolcopy8;
         1958         case 16:
         1959                 return boolcopy16;
         1960         case 24:
         1961                 return boolcopy24;
         1962         case 32:
         1963                 return boolcopy32;
         1964         default:
         1965                 assert(0 /* boolcopyfn */);
         1966         }
         1967         return 0;
         1968 }
         1969 
         1970 /*
         1971  * Optimized draw for filling and scrolling; uses memset and memmove.
         1972  */
         1973 static void
         1974 memsets(void *vp, ushort val, int n)
         1975 {
         1976         ushort *p, *ep;
         1977 
         1978         p = vp;
         1979         ep = p+n;
         1980         while(p<ep)
         1981                 *p++ = val;
         1982 }
         1983 
         1984 static void
         1985 memsetl(void *vp, u32int val, int n)
         1986 {
         1987         u32int *p, *ep;
         1988 
         1989         p = vp;
         1990         ep = p+n;
         1991         while(p<ep)
         1992                 *p++ = val;
         1993 }
         1994 
         1995 static void
         1996 memset24(void *vp, u32int val, int n)
         1997 {
         1998         uchar *p, *ep;
         1999         uchar a,b,c;
         2000 
         2001         p = vp;
         2002         ep = p+3*n;
         2003         a = val;
         2004         b = val>>8;
         2005         c = val>>16;
         2006         while(p<ep){
         2007                 *p++ = a;
         2008                 *p++ = b;
         2009                 *p++ = c;
         2010         }
         2011 }
         2012 
         2013 u32int
         2014 _imgtorgba(Memimage *img, u32int val)
         2015 {
         2016         uchar r, g, b, a;
         2017         int nb, ov, v;
         2018         u32int chan;
         2019         uchar *p;
         2020 
         2021         a = 0xFF;
         2022         r = g = b = 0xAA;        /* garbage */
         2023         for(chan=img->chan; chan; chan>>=8){
         2024                 nb = NBITS(chan);
         2025                 ov = v = val&((1<<nb)-1);
         2026                 val >>= nb;
         2027 
         2028                 while(nb < 8){
         2029                         v |= v<<nb;
         2030                         nb *= 2;
         2031                 }
         2032                 v >>= (nb-8);
         2033 
         2034                 switch(TYPE(chan)){
         2035                 case CRed:
         2036                         r = v;
         2037                         break;
         2038                 case CGreen:
         2039                         g = v;
         2040                         break;
         2041                 case CBlue:
         2042                         b = v;
         2043                         break;
         2044                 case CAlpha:
         2045                         a = v;
         2046                         break;
         2047                 case CGrey:
         2048                         r = g = b = v;
         2049                         break;
         2050                 case CMap:
         2051                         p = img->cmap->cmap2rgb+3*ov;
         2052                         r = *p++;
         2053                         g = *p++;
         2054                         b = *p;
         2055                         break;
         2056                 }
         2057         }
         2058         return (r<<24)|(g<<16)|(b<<8)|a;
         2059 }
         2060 
         2061 u32int
         2062 _rgbatoimg(Memimage *img, u32int rgba)
         2063 {
         2064         u32int chan;
         2065         int d, nb;
         2066         u32int v;
         2067         uchar *p, r, g, b, a, m;
         2068 
         2069         v = 0;
         2070         r = rgba>>24;
         2071         g = rgba>>16;
         2072         b = rgba>>8;
         2073         a = rgba;
         2074         d = 0;
         2075         for(chan=img->chan; chan; chan>>=8){
         2076                 nb = NBITS(chan);
         2077                 switch(TYPE(chan)){
         2078                 case CRed:
         2079                         v |= (r>>(8-nb))<<d;
         2080                         break;
         2081                 case CGreen:
         2082                         v |= (g>>(8-nb))<<d;
         2083                         break;
         2084                 case CBlue:
         2085                         v |= (b>>(8-nb))<<d;
         2086                         break;
         2087                 case CAlpha:
         2088                         v |= (a>>(8-nb))<<d;
         2089                         break;
         2090                 case CMap:
         2091                         p = img->cmap->rgb2cmap;
         2092                         m = p[(r>>4)*256+(g>>4)*16+(b>>4)];
         2093                         v |= (m>>(8-nb))<<d;
         2094                         break;
         2095                 case CGrey:
         2096                         m = RGB2K(r,g,b);
         2097                         v |= (m>>(8-nb))<<d;
         2098                         break;
         2099                 }
         2100                 d += nb;
         2101         }
         2102 /*        print("rgba2img %.8lux = %.*lux\n", rgba, 2*d/8, v); */
         2103         return v;
         2104 }
         2105 
         2106 #define DBG if(0)
         2107 static int
         2108 memoptdraw(Memdrawparam *par)
         2109 {
         2110         int m, y, dy, dx, op;
         2111         u32int v;
         2112         u16int u16;
         2113         Memimage *src;
         2114         Memimage *dst;
         2115 
         2116         dx = Dx(par->r);
         2117         dy = Dy(par->r);
         2118         src = par->src;
         2119         dst = par->dst;
         2120         op = par->op;
         2121 
         2122 DBG print("state %lux mval %lux dd %d\n", par->state, par->mval, dst->depth);
         2123         /*
         2124          * If we have an opaque mask and source is one opaque pixel we can convert to the
         2125          * destination format and just replicate with memset.
         2126          */
         2127         m = Simplesrc|Simplemask|Fullmask;
         2128         if((par->state&m)==m && (par->srgba&0xFF) == 0xFF && (op ==S || op == SoverD)){
         2129                 uchar *dp, p[4];
         2130                 int d, dwid, ppb, np, nb;
         2131                 uchar lm, rm;
         2132 
         2133 DBG print("memopt, dst %p, dst->data->bdata %p\n", dst, dst->data->bdata);
         2134                 dwid = dst->width*sizeof(u32int);
         2135                 dp = byteaddr(dst, par->r.min);
         2136                 v = par->sdval;
         2137 DBG print("sdval %lud, depth %d\n", v, dst->depth);
         2138                 switch(dst->depth){
         2139                 case 1:
         2140                 case 2:
         2141                 case 4:
         2142                         for(d=dst->depth; d<8; d*=2)
         2143                                 v |= (v<<d);
         2144                         ppb = 8/dst->depth;        /* pixels per byte */
         2145                         m = ppb-1;
         2146                         /* left edge */
         2147                         np = par->r.min.x&m;                /* no. pixels unused on left side of word */
         2148                         dx -= (ppb-np);
         2149                         nb = 8 - np * dst->depth;                /* no. bits used on right side of word */
         2150                         lm = (1<<nb)-1;
         2151 DBG print("np %d x %d nb %d lm %ux ppb %d m %ux\n", np, par->r.min.x, nb, lm, ppb, m);
         2152 
         2153                         /* right edge */
         2154                         np = par->r.max.x&m;        /* no. pixels used on left side of word */
         2155                         dx -= np;
         2156                         nb = 8 - np * dst->depth;                /* no. bits unused on right side of word */
         2157                         rm = ~((1<<nb)-1);
         2158 DBG print("np %d x %d nb %d rm %ux ppb %d m %ux\n", np, par->r.max.x, nb, rm, ppb, m);
         2159 
         2160 DBG print("dx %d Dx %d\n", dx, Dx(par->r));
         2161                         /* lm, rm are masks that are 1 where we should touch the bits */
         2162                         if(dx < 0){        /* just one byte */
         2163                                 lm &= rm;
         2164                                 for(y=0; y<dy; y++, dp+=dwid)
         2165                                         *dp ^= (v ^ *dp) & lm;
         2166                         }else if(dx == 0){        /* no full bytes */
         2167                                 if(lm)
         2168                                         dwid--;
         2169 
         2170                                 for(y=0; y<dy; y++, dp+=dwid){
         2171                                         if(lm){
         2172 DBG print("dp %p v %lux lm %ux (v ^ *dp) & lm %lux\n", dp, v, lm, (v^*dp)&lm);
         2173                                                 *dp ^= (v ^ *dp) & lm;
         2174                                                 dp++;
         2175                                         }
         2176                                         *dp ^= (v ^ *dp) & rm;
         2177                                 }
         2178                         }else{                /* full bytes in middle */
         2179                                 dx /= ppb;
         2180                                 if(lm)
         2181                                         dwid--;
         2182                                 dwid -= dx;
         2183 
         2184                                 for(y=0; y<dy; y++, dp+=dwid){
         2185                                         if(lm){
         2186                                                 *dp ^= (v ^ *dp) & lm;
         2187                                                 dp++;
         2188                                         }
         2189                                         memset(dp, v, dx);
         2190                                         dp += dx;
         2191                                         *dp ^= (v ^ *dp) & rm;
         2192                                 }
         2193                         }
         2194                         return 1;
         2195                 case 8:
         2196                         for(y=0; y<dy; y++, dp+=dwid)
         2197                                 memset(dp, v, dx);
         2198                         return 1;
         2199                 case 16:
         2200                         p[0] = v;                /* make little endian */
         2201                         p[1] = v>>8;
         2202                         memmove(&u16, p, 2);
         2203                         v = u16;
         2204 DBG print("dp=%p; dx=%d; for(y=0; y<%d; y++, dp+=%d)\nmemsets(dp, v, dx);\n",
         2205         dp, dx, dy, dwid);
         2206                         for(y=0; y<dy; y++, dp+=dwid)
         2207                                 memsets(dp, v, dx);
         2208                         return 1;
         2209                 case 24:
         2210                         for(y=0; y<dy; y++, dp+=dwid)
         2211                                 memset24(dp, v, dx);
         2212                         return 1;
         2213                 case 32:
         2214                         p[0] = v;                /* make little endian */
         2215                         p[1] = v>>8;
         2216                         p[2] = v>>16;
         2217                         p[3] = v>>24;
         2218                         memmove(&v, p, 4);
         2219                         for(y=0; y<dy; y++, dp+=dwid)
         2220                                 memsetl(dp, v, dx);
         2221                         return 1;
         2222                 default:
         2223                         assert(0 /* bad dest depth in memoptdraw */);
         2224                 }
         2225         }
         2226 
         2227         /*
         2228          * If no source alpha, an opaque mask, we can just copy the
         2229          * source onto the destination.  If the channels are the same and
         2230          * the source is not replicated, memmove suffices.
         2231          */
         2232         m = Simplemask|Fullmask;
         2233         if((par->state&(m|Replsrc))==m && src->depth >= 8
         2234         && src->chan == dst->chan && !(src->flags&Falpha) && (op == S || op == SoverD)){
         2235                 uchar *sp, *dp;
         2236                 long swid, dwid, nb;
         2237                 int dir;
         2238 
         2239                 if(src->data == dst->data && byteaddr(dst, par->r.min) > byteaddr(src, par->sr.min))
         2240                         dir = -1;
         2241                 else
         2242                         dir = 1;
         2243 
         2244                 swid = src->width*sizeof(u32int);
         2245                 dwid = dst->width*sizeof(u32int);
         2246                 sp = byteaddr(src, par->sr.min);
         2247                 dp = byteaddr(dst, par->r.min);
         2248                 if(dir == -1){
         2249                         sp += (dy-1)*swid;
         2250                         dp += (dy-1)*dwid;
         2251                         swid = -swid;
         2252                         dwid = -dwid;
         2253                 }
         2254                 nb = (dx*src->depth)/8;
         2255                 for(y=0; y<dy; y++, sp+=swid, dp+=dwid)
         2256                         memmove(dp, sp, nb);
         2257                 return 1;
         2258         }
         2259 
         2260         /*
         2261          * If we have a 1-bit mask, 1-bit source, and 1-bit destination, and
         2262          * they're all bit aligned, we can just use bit operators.  This happens
         2263          * when we're manipulating boolean masks, e.g. in the arc code.
         2264          */
         2265         if((par->state&(Simplemask|Simplesrc|Replmask|Replsrc))==0
         2266         && dst->chan==GREY1 && src->chan==GREY1 && par->mask->chan==GREY1
         2267         && (par->r.min.x&7)==(par->sr.min.x&7) && (par->r.min.x&7)==(par->mr.min.x&7)){
         2268                 uchar *sp, *dp, *mp;
         2269                 uchar lm, rm;
         2270                 long swid, dwid, mwid;
         2271                 int i, x, dir;
         2272 
         2273                 sp = byteaddr(src, par->sr.min);
         2274                 dp = byteaddr(dst, par->r.min);
         2275                 mp = byteaddr(par->mask, par->mr.min);
         2276                 swid = src->width*sizeof(u32int);
         2277                 dwid = dst->width*sizeof(u32int);
         2278                 mwid = par->mask->width*sizeof(u32int);
         2279 
         2280                 if(src->data == dst->data && byteaddr(dst, par->r.min) > byteaddr(src, par->sr.min)){
         2281                         dir = -1;
         2282                 }else
         2283                         dir = 1;
         2284 
         2285                 lm = 0xFF>>(par->r.min.x&7);
         2286                 rm = 0xFF<<(8-(par->r.max.x&7));
         2287                 dx -= (8-(par->r.min.x&7)) + (par->r.max.x&7);
         2288 
         2289                 if(dx < 0){        /* one byte wide */
         2290                         lm &= rm;
         2291                         if(dir == -1){
         2292                                 dp += dwid*(dy-1);
         2293                                 sp += swid*(dy-1);
         2294                                 mp += mwid*(dy-1);
         2295                                 dwid = -dwid;
         2296                                 swid = -swid;
         2297                                 mwid = -mwid;
         2298                         }
         2299                         for(y=0; y<dy; y++){
         2300                                 *dp ^= (*dp ^ *sp) & *mp & lm;
         2301                                 dp += dwid;
         2302                                 sp += swid;
         2303                                 mp += mwid;
         2304                         }
         2305                         return 1;
         2306                 }
         2307 
         2308                 dx /= 8;
         2309                 if(dir == 1){
         2310                         i = (lm!=0)+dx+(rm!=0);
         2311                         mwid -= i;
         2312                         swid -= i;
         2313                         dwid -= i;
         2314                         for(y=0; y<dy; y++, dp+=dwid, sp+=swid, mp+=mwid){
         2315                                 if(lm){
         2316                                         *dp ^= (*dp ^ *sp++) & *mp++ & lm;
         2317                                         dp++;
         2318                                 }
         2319                                 for(x=0; x<dx; x++){
         2320                                         *dp ^= (*dp ^ *sp++) & *mp++;
         2321                                         dp++;
         2322                                 }
         2323                                 if(rm){
         2324                                         *dp ^= (*dp ^ *sp++) & *mp++ & rm;
         2325                                         dp++;
         2326                                 }
         2327                         }
         2328                         return 1;
         2329                 }else{
         2330                 /* dir == -1 */
         2331                         i = (lm!=0)+dx+(rm!=0);
         2332                         dp += dwid*(dy-1)+i-1;
         2333                         sp += swid*(dy-1)+i-1;
         2334                         mp += mwid*(dy-1)+i-1;
         2335                         dwid = -dwid+i;
         2336                         swid = -swid+i;
         2337                         mwid = -mwid+i;
         2338                         for(y=0; y<dy; y++, dp+=dwid, sp+=swid, mp+=mwid){
         2339                                 if(rm){
         2340                                         *dp ^= (*dp ^ *sp--) & *mp-- & rm;
         2341                                         dp--;
         2342                                 }
         2343                                 for(x=0; x<dx; x++){
         2344                                         *dp ^= (*dp ^ *sp--) & *mp--;
         2345                                         dp--;
         2346                                 }
         2347                                 if(lm){
         2348                                         *dp ^= (*dp ^ *sp--) & *mp-- & lm;
         2349                                         dp--;
         2350                                 }
         2351                         }
         2352                 }
         2353                 return 1;
         2354         }
         2355         return 0;
         2356 }
         2357 #undef DBG
         2358 
         2359 /*
         2360  * Boolean character drawing.
         2361  * Solid opaque color through a 1-bit greyscale mask.
         2362  */
         2363 #define DBG if(0)
         2364 static int
         2365 chardraw(Memdrawparam *par)
         2366 {
         2367         u32int bits;
         2368         int i, ddepth, dy, dx, x, bx, ex, y, npack, bsh, depth, op;
         2369         u32int v, maskwid, dstwid;
         2370         uchar *wp, *rp, *q, *wc;
         2371         ushort *ws;
         2372         u32int *wl;
         2373         uchar sp[4];
         2374         Rectangle r, mr;
         2375         Memimage *mask, *src, *dst;
         2376         union {
         2377                 // black box to hide pointer conversions from gcc.
         2378                 // we'll see how long this works.
         2379                 uchar *u8;
         2380                 u16int *u16;
         2381                 u32int *u32;
         2382         } gcc_black_box;
         2383 
         2384 if(0) if(drawdebug) iprint("chardraw? mf %lux md %d sf %lux dxs %d dys %d dd %d ddat %p sdat %p\n",
         2385                 par->mask->flags, par->mask->depth, par->src->flags,
         2386                 Dx(par->src->r), Dy(par->src->r), par->dst->depth, par->dst->data, par->src->data);
         2387 
         2388         mask = par->mask;
         2389         src = par->src;
         2390         dst = par->dst;
         2391         r = par->r;
         2392         mr = par->mr;
         2393         op = par->op;
         2394 
         2395         if((par->state&(Replsrc|Simplesrc|Fullsrc|Replmask)) != (Replsrc|Simplesrc|Fullsrc)
         2396         || mask->depth != 1 || dst->depth<8 || dst->data==src->data
         2397         || op != SoverD)
         2398                 return 0;
         2399 
         2400 /*if(drawdebug) iprint("chardraw..."); */
         2401 
         2402         depth = mask->depth;
         2403         maskwid = mask->width*sizeof(u32int);
         2404         rp = byteaddr(mask, mr.min);
         2405         npack = 8/depth;
         2406         bsh = (mr.min.x % npack) * depth;
         2407 
         2408         wp = byteaddr(dst, r.min);
         2409         dstwid = dst->width*sizeof(u32int);
         2410 DBG print("bsh %d\n", bsh);
         2411         dy = Dy(r);
         2412         dx = Dx(r);
         2413 
         2414         ddepth = dst->depth;
         2415 
         2416         /*
         2417          * for loop counts from bsh to bsh+dx
         2418          *
         2419          * we want the bottom bits to be the amount
         2420          * to shift the pixels down, so for n≡0 (mod 8) we want
         2421          * bottom bits 7.  for n≡1, 6, etc.
         2422          * the bits come from -n-1.
         2423          */
         2424 
         2425         bx = -bsh-1;
         2426         ex = -bsh-1-dx;
         2427         SET(bits);
         2428         v = par->sdval;
         2429 
         2430         /* make little endian */
         2431         sp[0] = v;
         2432         sp[1] = v>>8;
         2433         sp[2] = v>>16;
         2434         sp[3] = v>>24;
         2435 
         2436 /*print("sp %x %x %x %x\n", sp[0], sp[1], sp[2], sp[3]); */
         2437         for(y=0; y<dy; y++, rp+=maskwid, wp+=dstwid){
         2438                 q = rp;
         2439                 if(bsh)
         2440                         bits = *q++;
         2441                 switch(ddepth){
         2442                 case 8:
         2443 /*if(drawdebug) iprint("8loop..."); */
         2444                         wc = wp;
         2445                         for(x=bx; x>ex; x--, wc++){
         2446                                 i = x&7;
         2447                                 if(i == 8-1)
         2448                                         bits = *q++;
         2449 DBG print("bits %lux sh %d...", bits, i);
         2450                                 if((bits>>i)&1)
         2451                                         *wc = v;
         2452                         }
         2453                         break;
         2454                 case 16:
         2455                         gcc_black_box.u8 = wp;
         2456                         ws = gcc_black_box.u16;
         2457                         gcc_black_box.u8 = sp;
         2458                         v = *gcc_black_box.u16;
         2459                         for(x=bx; x>ex; x--, ws++){
         2460                                 i = x&7;
         2461                                 if(i == 8-1)
         2462                                         bits = *q++;
         2463 DBG print("bits %lux sh %d...", bits, i);
         2464                                 if((bits>>i)&1)
         2465                                         *ws = v;
         2466                         }
         2467                         break;
         2468                 case 24:
         2469                         wc = wp;
         2470                         for(x=bx; x>ex; x--, wc+=3){
         2471                                 i = x&7;
         2472                                 if(i == 8-1)
         2473                                         bits = *q++;
         2474 DBG print("bits %lux sh %d...", bits, i);
         2475                                 if((bits>>i)&1){
         2476                                         wc[0] = sp[0];
         2477                                         wc[1] = sp[1];
         2478                                         wc[2] = sp[2];
         2479                                 }
         2480                         }
         2481                         break;
         2482                 case 32:
         2483                         gcc_black_box.u8 = wp;
         2484                         wl = gcc_black_box.u32;
         2485                         gcc_black_box.u8 = sp;
         2486                         v = *gcc_black_box.u32;
         2487                         for(x=bx; x>ex; x--, wl++){
         2488                                 i = x&7;
         2489                                 if(i == 8-1)
         2490                                         bits = *q++;
         2491 DBG iprint("bits %lux sh %d...", bits, i);
         2492                                 if((bits>>i)&1)
         2493                                         *wl = v;
         2494                         }
         2495                         break;
         2496                 }
         2497         }
         2498 
         2499 DBG print("\n");
         2500         return 1;
         2501 }
         2502 #undef DBG
         2503 
         2504 
         2505 /*
         2506  * Fill entire byte with replicated (if necessary) copy of source pixel,
         2507  * assuming destination ldepth is >= source ldepth.
         2508  *
         2509  * This code is just plain wrong for >8bpp.
         2510  *
         2511 u32int
         2512 membyteval(Memimage *src)
         2513 {
         2514         int i, val, bpp;
         2515         uchar uc;
         2516 
         2517         unloadmemimage(src, src->r, &uc, 1);
         2518         bpp = src->depth;
         2519         uc <<= (src->r.min.x&(7/src->depth))*src->depth;
         2520         uc &= ~(0xFF>>bpp);
         2521         * pixel value is now in high part of byte. repeat throughout byte
         2522         val = uc;
         2523         for(i=bpp; i<8; i<<=1)
         2524                 val |= val>>i;
         2525         return val;
         2526 }
         2527  *
         2528  */
         2529 
         2530 void
         2531 _memfillcolor(Memimage *i, u32int val)
         2532 {
         2533         u32int bits;
         2534         int d, y;
         2535         uchar p[4];
         2536 
         2537         if(val == DNofill)
         2538                 return;
         2539 
         2540         bits = _rgbatoimg(i, val);
         2541         switch(i->depth){
         2542         case 24:        /* 24-bit images suck */
         2543                 for(y=i->r.min.y; y<i->r.max.y; y++)
         2544                         memset24(byteaddr(i, Pt(i->r.min.x, y)), bits, Dx(i->r));
         2545                 break;
         2546         default:        /* 1, 2, 4, 8, 16, 32 */
         2547                 for(d=i->depth; d<32; d*=2)
         2548                         bits = (bits << d) | bits;
         2549                 p[0] = bits;                /* make little endian */
         2550                 p[1] = bits>>8;
         2551                 p[2] = bits>>16;
         2552                 p[3] = bits>>24;
         2553                 memmove(&bits, p, 4);
         2554                 memsetl(wordaddr(i, i->r.min), bits, i->width*Dy(i->r));
         2555                 break;
         2556         }
         2557 }