/* Copyright (C) 2001-2006 Artifex Software, Inc.
   All Rights Reserved.
  
   This software is provided AS-IS with no warranty, either express or
   implied.

   This software is distributed under license and may not be copied, modified
   or distributed except as expressly authorized under the terms of that
   license.  Refer to licensing information at http://www.artifex.com/
   or contact Artifex Software, Inc.,  7 Mt. Lassen Drive - Suite A-134,
   San Rafael, CA  94903, U.S.A., +1(415)492-9861, for further information.
*/
/*$Id: gswts.c 8250 2007-09-25 13:31:24Z giles $ */
/* Screen generation for Well Tempered Screening. */
#include "stdpre.h"
#include <stdlib.h> /* for malloc */
#include "gx.h"
#include "gp.h" /* for persistent cache */
#include "gxstate.h"
#include "gsht.h"
#include "math_.h"
#include "string_.h"
#include "gserrors.h"
#include "gxfrac.h"
#include "gxwts.h"
#include "gswts.h"

#define noDUMP_WTS
#ifdef DUMP_WTS
#include "fcntl_.h"
#endif

#define noVERBOSE

#ifdef UNIT_TEST
/**
 * This file can be compiled independently for unit testing purposes.
 * Try this invocation:
 *
 * gcc -I../obj -DUNIT_TEST gswts.c gxwts.c -o test_gswts -lm
 * ./test_gswts | sed '/P5/,$!d' | xv -
 **/
#undef printf
#undef stdout
#undef dlprintf1
#define dlprintf1 printf
#undef dlprintf2
#define dlprintf2 printf
#undef dlprintf3
#define dlprintf3 printf
#undef dlprintf4
#define dlprintf4 printf
#undef dlprintf5
#define dlprintf5 printf
#undef dlprintf6
#define dlprintf6 printf
#undef dlprintf7
#define dlprintf7 printf

#endif

/* A datatype used for representing the product of two 32 bit integers.
   If a 64 bit integer type is available, it may be a better choice. */
typedef double wts_bigint;

typedef struct gx_wts_cell_params_j_s gx_wts_cell_params_j_t;
typedef struct gx_wts_cell_params_h_s gx_wts_cell_params_h_t;

struct gx_wts_cell_params_j_s {
    gx_wts_cell_params_t base;
    int shift;

    double ufast_a;
    double vfast_a;
    double uslow_a;
    double vslow_a;

    /* Probabilities of "jumps". A and B jumps can happen when moving
       one pixel to the right. C and D can happen when moving one pixel
       down. */
    double pa;
    double pb;
    double pc;
    double pd;

    int xa;
    int ya;
    int xb;
    int yb;
    int xc;
    int yc;
    int xd;
    int yd;
};

struct gx_wts_cell_params_h_s {
    gx_wts_cell_params_t base;

    /* This is the exact value that x1 and (width-x1) approximates. */
    double px;
    /* Ditto y1 and (height-y1). */
    double py;

    int x1;
    int y1;
};

#define WTS_EXTRA_SORT_BITS 9
#define WTS_SORTED_MAX (((frac_1) << (WTS_EXTRA_SORT_BITS)) - 1)

typedef struct {
    int u;
    int v;
    int k;
    int l;
} wts_vec_t;

static void
wts_vec_set(wts_vec_t *wv, int u, int v, int k, int l)
{
    wv->u = u;
    wv->v = v;
    wv->k = k;
    wv->l = l;
}

static wts_bigint
wts_vec_smag(const wts_vec_t *a)
{
    wts_bigint u = a->u;
    wts_bigint v = a->v;
    return u * u + v * v;
}

static void
wts_vec_add(wts_vec_t *a, const wts_vec_t *b, const wts_vec_t *c)
{
    a->u = b->u + c->u;
    a->v = b->v + c->v;
    a->k = b->k + c->k;
    a->l = b->l + c->l;
}

static void
wts_vec_sub(wts_vec_t *a, const wts_vec_t *b, const wts_vec_t *c)
{
    a->u = b->u - c->u;
    a->v = b->v - c->v;
    a->k = b->k - c->k;
    a->l = b->l - c->l;
}

/**
 * wts_vec_gcd3: Compute 3-way "gcd" of three vectors.
 * @a, @b, @c: Vectors.
 *
 * Compute pair of vectors satisfying three constraints:
 * They are integer combinations of the source vectors.
 * The source vectors are integer combinations of the results.
 * The magnitude of the vectors is minimized.
 *
 * The algorithm for this computation is quite reminiscent of the
 * classic Euclid GCD algorithm for integers.
 *
 * On return, @a and @b contain the result. @c is destroyed.
 **/
static void
wts_vec_gcd3(wts_vec_t *a, wts_vec_t *b, wts_vec_t *c)
{
    wts_vec_t d, e;
    double ra, rb, rc, rd, re;

    wts_vec_set(&d, 0, 0, 0, 0);
    wts_vec_set(&e, 0, 0, 0, 0);
    for (;;) {
	ra = wts_vec_smag(a);
	rb = wts_vec_smag(b);
	rc = wts_vec_smag(c);
	wts_vec_sub(&d, a, b);
	wts_vec_add(&e, a, b);
	rd = wts_vec_smag(&d);
	re = wts_vec_smag(&e);
	if (re && re < rd) {
	    d = e;
	    rd = re;
	}
	if (rd && rd < ra && ra >= rb) *a = d;
	else if (rd < rb && rb > ra) *b = d;
	else {
	    wts_vec_sub(&d, a, c);
	    wts_vec_add(&e, a, c);
	    rd = wts_vec_smag(&d);
	    re = wts_vec_smag(&e);
	    if (re < rd) {
		d = e;
		rd = re;
	    }
	    if (rd && rd < ra && ra >= rc) *a = d;
	    else if (rd < rc && rc > ra) *c = d;
	    else {
		wts_vec_sub(&d, b, c);
		wts_vec_add(&e, b, c);
		rd = wts_vec_smag(&d);
		re = wts_vec_smag(&e);
		if (re < rd) {
		    d = e;
		    rd = re;
		}
		if (rd && rd < rb && rb >= rc) *b = d;
		else if (rd < rc && rc > rb) *c = d;
		else
		    break;
	    }
	}
    }
}

static wts_bigint
wts_vec_cross(const wts_vec_t *a, const wts_vec_t *b)
{
    wts_bigint au = a->u;
    wts_bigint av = a->v;
    wts_bigint bu = b->u;
    wts_bigint bv = b->v;
    return au * bv - av * bu;
}

static void
wts_vec_neg(wts_vec_t *a)
{
    a->u = -a->u;
    a->v = -a->v;
    a->k = -a->k;
    a->l = -a->l;
}

/* compute k mod m */
static void
wts_vec_modk(wts_vec_t *a, int m)
{
    while (a->k < 0) a->k += m;
    while (a->k >= m) a->k -= m;
}

/* Compute modulo in rational cell. */
static void
wts_vec_modkls(wts_vec_t *a, int m, int i, int s)
{
    while (a->l < 0) {
	a->l += i;
	a->k -= s;
    }
    while (a->l >= i) {
	a->l -= i;
	a->k += s;
    }
    while (a->k < 0) a->k += m;
    while (a->k >= m) a->k -= m;
}

static void
wts_set_mat(gx_wts_cell_params_t *wcp, double sratiox, double sratioy,
	    double sangledeg)
{
    double sangle = sangledeg * M_PI / 180;

    wcp->ufast = cos(sangle) / sratiox;
    wcp->vfast = -sin(sangle) / sratiox;
    wcp->uslow = sin(sangle) / sratioy;
    wcp->vslow = cos(sangle) / sratioy;
}


/**
 * Calculate Screen H cell sizes.
 **/
static void
wts_cell_calc_h(double inc, int *px1, int *pxwidth, double *pp1, double memw)
{
    double minrep = pow(2, memw) * 50 / pow(2, 7.5);
    int m1 = 0, m2 = 0;
    double e1, e2;

    int uacc;

    e1 = 1e5;
    e2 = 1e5;
    for (uacc = (int)ceil(minrep * inc); uacc <= floor(2 * minrep * inc); uacc++) {
	int mt;
	double et;

	mt = (int)floor(uacc / inc + 1e-5);
	et = uacc / inc - mt + mt * 0.001;
	if (et < e1) {
	    e1 = et;
	    m1 = mt;
	}
	mt = (int)ceil(uacc / inc - 1e-5);
	et = mt - uacc / inc + mt * 0.001;
	if (et < e2) {
	    e2 = et;
	    m2 = mt;
	}
    }
    if (m1 == m2) {
	*px1 = m1;
	*pxwidth = m1;
	*pp1 = 1.0;
    } else {
	*px1 = m1;
	*pxwidth = m1 + m2;
	e1 = fabs(m1 * inc - floor(0.5 + m1 * inc));
	e2 = fabs(m2 * inc - floor(0.5 + m2 * inc));
	*pp1 = e2 / (e1 + e2);
    }
}

/* Implementation for Screen H. This is optimized for 0 and 45 degree
   rotations. */
static gx_wts_cell_params_t *
wts_pick_cell_size_h(double sratiox, double sratioy, double sangledeg,
		     double memw)
{
    gx_wts_cell_params_h_t *wcph;
    double xinc, yinc;

    wcph = malloc(sizeof(gx_wts_cell_params_h_t));
    if (wcph == NULL)
	return NULL;

    wcph->base.t = WTS_SCREEN_H;
    wts_set_mat(&wcph->base, sratiox, sratioy, sangledeg);

    xinc = fabs(wcph->base.ufast);
    if (xinc == 0)
	xinc = fabs(wcph->base.vfast);

    wts_cell_calc_h(xinc, &wcph->x1, &wcph->base.width, &wcph->px, memw);

    yinc = fabs(wcph->base.uslow);
    if (yinc == 0)
	yinc = fabs(wcph->base.vslow);
    wts_cell_calc_h(yinc, &wcph->y1, &wcph->base.height, &wcph->py, memw);

    return &wcph->base;
}

static double
wts_qart(double r, double rbase, double p, double pbase)
{
   if (p < 1e-5) {
      return ((r + rbase) * p);
   } else {
      return ((r + rbase) * (p + pbase) - rbase * pbase);
   }
}

#ifdef VERBOSE
static void
wts_print_j_jump(const gx_wts_cell_params_j_t *wcpj, const char *name,
		 double pa, int xa, int ya)
{
    dlprintf6("jump %s: (%d, %d) %f, actual (%f, %f)\n",
	      name, xa, ya, pa,
	      wcpj->ufast_a * xa + wcpj->uslow_a * ya,
	      wcpj->vfast_a * xa + wcpj->vslow_a * ya);
}

static void
wts_j_add_jump(const gx_wts_cell_params_j_t *wcpj, double *pu, double *pv, 
	       double pa, int xa, int ya)
{
    double jump_u = wcpj->ufast_a * xa + wcpj->uslow_a * ya;
    double jump_v = wcpj->vfast_a * xa + wcpj->vslow_a * ya;
    jump_u -= floor(jump_u + 0.5);
    jump_v -= floor(jump_v + 0.5);
    *pu += pa * jump_u;
    *pv += pa * jump_v;
}

static void
wts_print_j(const gx_wts_cell_params_j_t *wcpj)
{
    double uf, vf;
    double us, vs;

    dlprintf3("cell = %d x %d, shift = %d\n",
	      wcpj->base.width, wcpj->base.height, wcpj->shift);
    wts_print_j_jump(wcpj, "a", wcpj->pa, wcpj->xa, wcpj->ya);
    wts_print_j_jump(wcpj, "b", wcpj->pb, wcpj->xb, wcpj->yb);
    wts_print_j_jump(wcpj, "c", wcpj->pc, wcpj->xc, wcpj->yc);
    wts_print_j_jump(wcpj, "d", wcpj->pd, wcpj->xd, wcpj->yd);
    uf = wcpj->ufast_a;
    vf = wcpj->vfast_a;
    us = wcpj->uslow_a;
    vs = wcpj->vslow_a;
    wts_j_add_jump(wcpj, &uf, &vf, wcpj->pa, wcpj->xa, wcpj->ya);
    wts_j_add_jump(wcpj, &uf, &vf, wcpj->pb, wcpj->xb, wcpj->yb);
    wts_j_add_jump(wcpj, &us, &vs, wcpj->pc, wcpj->xc, wcpj->yc);
    wts_j_add_jump(wcpj, &us, &vs, wcpj->pd, wcpj->xd, wcpj->yd);
    dlprintf6("d: %f, %f; a: %f, %f; err: %g, %g\n",
	    wcpj->base.ufast, wcpj->base.vfast,
	    wcpj->ufast_a, wcpj->vfast_a,
	    wcpj->base.ufast - uf, wcpj->base.vfast - vf);
    dlprintf6("d: %f, %f; a: %f, %f; err: %g, %g\n",
	    wcpj->base.uslow, wcpj->base.vslow,
	    wcpj->uslow_a, wcpj->vslow_a,
	    wcpj->base.uslow - us, wcpj->base.vslow - vs);
}
#endif

/**
 * wts_set_scr_jxi_try: Try a width for setting Screen J parameters.
 * @wcpj: Screen J parameters.
 * @m: Width to try.
 * @qb: Best quality score seen so far.
 * @jmem: Weight given to memory usage.
 *
 * Evaluate the quality for width @i. If the quality is better than
 * @qb, then set the rest of the parameters in @wcpj.
 *
 * This routine corresponds to SetScrJXITrySP in the WTS source.
 *
 * Return value: Quality score for parameters chosen.
 **/
static double
wts_set_scr_jxi_try(gx_wts_cell_params_j_t *wcpj, int m, double qb,
		    double jmem)
{
    const double uf = wcpj->base.ufast;
    const double vf = wcpj->base.vfast;
    const double us = wcpj->base.uslow;
    const double vs = wcpj->base.vslow;
    wts_vec_t a, b, c;
    double ufj, vfj;
    const double rbase = 0.1;
    const double pbase = 0.001;
    double ug, vg;
    double pp, pa, pb;
    double rf;
    double xa, ya, ra, xb, yb, rb;
    double q, qx, qw, qxl, qbi;
    double pya, pyb;
    int i;
    bool jumpok;

    wts_vec_set(&a, (int)floor(uf * m + 0.5), (int)floor(vf * m + 0.5), 1, 0);
    if (a.u == 0 && a.v == 0)
	return qb + 1;
	
    ufj = a.u / (double)m;
    vfj = a.v / (double)m;
    /* (ufj, vfj) = movement in UV space from (0, 1) in XY space */

    wts_vec_set(&b, m, 0, 0, 0);
    wts_vec_set(&c, 0, m, 0, 0);
    wts_vec_gcd3(&a, &b, &c);
    ug = (uf - ufj) * m;
    vg = (vf - vfj) * m;
    pp = 1.0 / wts_vec_cross(&b, &a);
    pa = (b.u * vg - ug * b.v) * pp;
    pb = (ug * a.v - a.u * vg) * pp;
    if (pa < 0) {
	wts_vec_neg(&a);
	pa = -pa;
    }
    if (pb < 0) {
	wts_vec_neg(&b);
	pb = -pb;
    }
    wts_vec_modk(&a, m);
    wts_vec_modk(&b, m);

    /* Prefer nontrivial jumps. */
    jumpok = (wcpj->xa == 0 && wcpj->ya == 0) ||
      (wcpj->xb == 0 && wcpj->yb == 0) ||
      (a.k != 0 && b.k != 0);

    rf = (uf * uf + vf * vf) * m;
    xa = (a.u * uf + a.v * vf) / rf;
    ya = (a.v * uf - a.u * vf) / rf;
    xb = (b.u * uf + b.v * vf) / rf;
    yb = (b.v * uf - b.u * vf) / rf;
    ra = sqrt(xa * xa + 0.0625 * ya * ya);
    rb = sqrt(xb * xb + 0.0625 * yb * yb);
    qx = 0.5 * (wts_qart(ra, rbase, pa, pbase) +
		wts_qart(rb, rbase, pb, pbase));
    if (qx < 1.0 / 4000.0)
	qx *= 0.25;
    else
	qx -= 0.75 / 4000.0;
    if (m < 7500)
	qw = 0;
    else
	qw = 0.00025; /* cache penalty */
    qxl = qx + qw;
    if (qxl > qb)
	return qxl;

    /* width is ok, now try heights */

    pp = m / (double)wts_vec_cross(&b, &a);
    pya = (b.u * vs - us * b.v) * pp;
    pyb = (us * a.v - a.u * vs) * pp;

    qbi = qb;
    for (i = 1; qxl + m * i * jmem < qbi; i++) {
	int s = m * i;
	int ca, cb;
	double usj, vsj;
	double usg, vsg;
	wts_vec_t a1, b1, a2, b2;
	double pc, pd;
	int ck;
	double qy, qm;

	ca = (int)floor(i * pya + 0.5);
	cb = (int)floor(i * pyb + 0.5);
	wts_vec_set(&c, ca * a.u + cb * b.u, ca * a.v + cb * b.v, 0, 1);
	usj = c.u / (double)s;
	vsj = c.v / (double)s;
	usg = (us - usj);
	vsg = (vs - vsj);

	a1 = a;
	b1 = b;
	a1.u *= i;
	a1.v *= i;
	b1.u *= i;
	b1.v *= i;
	wts_vec_gcd3(&a1, &b1, &c);
	a2 = a1;
	b2 = b1;
	pp = s / (double)wts_vec_cross(&b1, &a1);
	pc = (b1.u * vsg - usg * b1.v) * pp;
	pd = (usg * a1.v - a1.u * vsg) * pp;
	if (pc < 0) {
	    wts_vec_neg(&a1);
	    pc = -pc;
	}
	if (pd < 0) {
	    wts_vec_neg(&b1);
	    pd = -pd;
	}
	ck = ca * a.k + cb * b.k;
	while (ck < 0) ck += m;
	while (ck >= m) ck -= m;
	wts_vec_modkls(&a1, m, i, ck);
	wts_vec_modkls(&b1, m, i, ck);
	rf = (us * us - vs * vs) * s;
	xa = (a1.u * us + a1.v * vs) / rf;
	ya = (a1.v * us - a1.u * vs) / rf;
	xb = (b1.u * us + b1.v * vs) / rf;
	yb = (b1.v * us - b1.u * vs) / rf;
	ra = sqrt(xa * xa + 0.0625 * ya * ya);
	rb = sqrt(xb * xb + 0.0625 * yb * yb);
	qy = 0.5 * (wts_qart(ra, rbase, pc, pbase) +
		    wts_qart(rb, rbase, pd, pbase));
	if (qy < 1.0 / 100.0)
	    qy *= 0.025;
	else
	    qy -= 0.75 / 100.0;
	qm = s * jmem;

	/* first try a and b jumps within the scanline */
	q = qm + qw + qx + qy;
	if (q < qbi && jumpok) {
#ifdef VERBOSE
	    dlprintf7("m = %d, n = %d, q = %d, qx = %d, qy = %d, qm = %d, qw = %d\n",
		      m, i, (int)(q * 1e6), (int)(qx * 1e6), (int)(qy * 1e6), (int)(qm * 1e6), (int)(qw * 1e6));
#endif
	    qbi = q;
	    wcpj->base.width = m;
	    wcpj->base.height = i;
	    wcpj->shift = ck;
	    wcpj->ufast_a = ufj;
	    wcpj->vfast_a = vfj;
	    wcpj->uslow_a = usj;
	    wcpj->vslow_a = vsj;
	    wcpj->xa = a.k;
	    wcpj->ya = 0;
	    wcpj->xb = b.k;
	    wcpj->yb = 0;
	    wcpj->xc = a1.k;
	    wcpj->yc = a1.l;
	    wcpj->xd = b1.k;
	    wcpj->yd = b1.l;
	    wcpj->pa = pa;
	    wcpj->pb = pb;
	    wcpj->pc = pc;
	    wcpj->pd = pd;
#ifdef VERBOSE
	    wts_print_j(wcpj);
#endif
	}

	/* then try unconstrained a and b jumps */
	if (i > 1) {
	    double pa2, pb2, pp2;
	    double qx2, qw2, q2;

	    pp2 = pp;
	    pa2 = (b2.u * vg - ug * b2.v) * pp2;
	    pb2 = (ug * a2.v - a2.u * vg) * pp2;
	    rf = (uf * uf + vf * vf) * s;
	    xa = (a2.u * uf + a2.v * vf) / rf;
	    ya = (a2.v * uf - a2.u * vf) / rf;
	    xb = (b2.u * uf + b2.v * vf) / rf;
	    yb = (b2.v * uf - b2.u * vf) / rf;
	    ra = sqrt(xa * xa + 0.0625 * ya * ya);
	    rb = sqrt(xb * xb + 0.0625 * yb * yb);
	    if (pa2 < 0) {
		pa2 = -pa2;
		wts_vec_neg(&a2);
	    }
	    if (pb2 < 0) {
		pb2 = -pb2;
		wts_vec_neg(&b2);
	    }
	    wts_vec_modkls(&a2, m, i, ck);
	    wts_vec_modkls(&b2, m, i, ck);
	    qx2 = 0.5 * (wts_qart(ra, rbase, pa2, pbase) +
			 wts_qart(rb, rbase, pb2, pbase));
	    if (qx2 < 1.0 / 4000.0)
		qx2 *= 0.25;
	    else
		qx2 -= 0.75 / 4000.0;
	    if (s < 7500)
		qw2 = 0;
	    else
		qw2 = 0.00025; /* cache penalty */
	    q2 = qm + qw2 + qx2 + qy;
	    if (q2 < qbi) {
#ifdef VERBOSE
		dlprintf7("m = %d, n = %d, q = %d, qx2 = %d, qy = %d, qm = %d, qw2 = %d\n",
			  m, i, (int)(q * 1e6), (int)(qx * 1e6), (int)(qy * 1e6), (int)(qm * 1e6), (int)(qw2 * 1e6));
#endif
		if (qxl > qw2 + qx2)
		    qxl = qw2 + qx2;
		qbi = q2;
		wcpj->base.width = m;
		wcpj->base.height = i;
		wcpj->shift = ck;
		wcpj->ufast_a = ufj;
		wcpj->vfast_a = vfj;
		wcpj->uslow_a = usj;
		wcpj->vslow_a = vsj;
		wcpj->xa = a2.k;
		wcpj->ya = a2.l;
		wcpj->xb = b2.k;
		wcpj->yb = a2.l;
		wcpj->xc = a1.k;
		wcpj->yc = a1.l;
		wcpj->xd = b1.k;
		wcpj->yd = b1.l;
		wcpj->pa = pa2;
		wcpj->pb = pb2;
		wcpj->pc = pc;
		wcpj->pd = pd;
#ifdef VERBOSE
		wts_print_j(wcpj);
#endif
	    }
	} /* if (i > 1) */
	if (qx > 10 * qy)
	    break;
    }
    return qbi;
}

static int
wts_double_to_int_cap(double d)
{
    if (d > 0x7fffffff)
	return 0x7fffffff;
    else return (int)d;
}

/**
 * wts_set_scr_jxi: Choose Screen J parameters.
 * @wcpj: Screen J parameters.
 * @jmem: Weight given to memory usage.
 *
 * Chooses a cell size based on the [uv]{fast,slow} parameters,
 * setting the rest of the parameters in @wcpj. Essentially, this
 * algorithm iterates through all plausible widths for the cell.
 *
 * This routine corresponds to SetScrJXISP in the WTS source.
 *
 * Return value: Quality score for parameters chosen.
 **/
static double
wts_set_scr_jxi(gx_wts_cell_params_j_t *wcpj, double jmem)
{
    int i, imax;
    double q, qb;

    wcpj->xa = 0;
    wcpj->ya = 0;
    wcpj->xb = 0;
    wcpj->yb = 0;
    wcpj->xc = 0;
    wcpj->yc = 0;
    wcpj->xd = 0;
    wcpj->yd = 0;

    qb = 1.0;
    imax = wts_double_to_int_cap(qb / jmem);
    for (i = 1; i <= imax; i++) {
	if (i > 1) {
	    q = wts_set_scr_jxi_try(wcpj, i, qb, jmem);
	    if (q < qb) {
		qb = q;
		imax = wts_double_to_int_cap(q / jmem);
		if (imax >= 7500)
		    imax = wts_double_to_int_cap((q - 0.00025) / jmem);
		if (imax < 7500) {
		    imax = 7500;
		}
	    }
	}
    }
    return qb;
}

/* Implementation for Screen J. This is optimized for general angles. */
static gx_wts_cell_params_t *
wts_pick_cell_size_j(double sratiox, double sratioy, double sangledeg,
		     double memw)
{
    gx_wts_cell_params_j_t *wcpj;
    double code;

    wcpj = malloc(sizeof(gx_wts_cell_params_j_t));
    if (wcpj == NULL)
	return NULL;

    wcpj->base.t = WTS_SCREEN_J;
    wts_set_mat(&wcpj->base, sratiox, sratioy, sangledeg);

    code = wts_set_scr_jxi(wcpj, pow(0.1, memw));
    if (code < 0) {
	free(wcpj);
	return NULL;
    }

    return &wcpj->base;
}

typedef struct {
    double sratiox;
    double sratioy;
    double sangledeg;
    double memw;
} wts_cell_size_key;

static void *
wts_cache_alloc_callback(void *data, int bytes)
{
    return malloc(bytes);
}

static void
store_be32(byte *ptr, int x)
{
    ptr[0] = (x >> 24) & 0xff;
    ptr[1] = (x >> 16) & 0xff;
    ptr[2] = (x >> 8) & 0xff;
    ptr[3] = x & 0xff;
}

/**
 * wts_pick_cell_size: Choose cell size for WTS screen.
 * @ph: The halftone parameters.
 * @pmat: Transformation from 1/72" Y-up coords to device coords.
 *
 * Return value: The WTS cell parameters, or NULL on error.
 **/
gx_wts_cell_params_t *
wts_pick_cell_size(gs_screen_halftone *ph, const gs_matrix *pmat)
{
    gx_wts_cell_params_t *result;

    /* todo: deal with landscape and mirrored coordinate systems */
    double sangledeg = ph->angle;
    double sratiox = 72.0 * fabs(pmat->xx) / ph->frequency;
    double sratioy = 72.0 * fabs(pmat->yy) / ph->frequency;
    double octangle;
    double memw = 8.0;
    wts_cell_size_key key;
    int len;
    byte *keyptr = NULL;
    byte intkey[12];
    int keysize;

    octangle = sangledeg;
    while (octangle >= 45.0) octangle -= 45.0;
    while (octangle < -45.0) octangle += 45.0;

    /* try persistent cache */
    if (sangledeg == 45.0) {
	int srxi, sryi;

	srxi = (int)floor(sratiox / sqrt(2) + 0.5);
	sryi = (int)floor(sratioy / sqrt(2) + 0.5);
	if (fabs(srxi * sqrt(2) - sratiox) < 2e-6 &&
	    fabs(sryi * sqrt(2) - sratioy) < 2e-6) {
	    store_be32(intkey, (int)sangledeg);
	    store_be32(intkey + 4, srxi);
	    store_be32(intkey + 8, sryi);
	    keyptr = intkey;
	    keysize = sizeof(intkey);
	}
    }
    if (keyptr == NULL) {
	key.sratiox = sratiox;
	key.sratioy = sratioy;
	key.sangledeg = sangledeg;
	key.memw = memw;
	keyptr = (byte *)&key;
	keysize = sizeof(key);
    }
    len = gp_cache_query(GP_CACHE_TYPE_WTS_SIZE, keyptr, keysize,
			 (void **)&result, wts_cache_alloc_callback, NULL);
    if (len >= 0)
	return result;

    if (fabs(octangle) < 1e-4)
	result = wts_pick_cell_size_h(sratiox, sratioy, sangledeg, memw);
    else
	result = wts_pick_cell_size_j(sratiox, sratioy, sangledeg, memw);

    if (result != NULL) {
	int resultsize = 0;

	/* insert computed cell size into cache */
	if (result->t == WTS_SCREEN_H)
	    resultsize = sizeof(gx_wts_cell_params_h_t);
	else if (result->t == WTS_SCREEN_J)
	    resultsize = sizeof(gx_wts_cell_params_j_t);
	if (resultsize)
	    gp_cache_insert(GP_CACHE_TYPE_WTS_SIZE, (byte *)&key, sizeof(key),
			    (void *)result, resultsize);

	ph->actual_frequency = ph->frequency;
	ph->actual_angle = ph->angle;
    }
    return result;
}

struct gs_wts_screen_enum_s {
    wts_screen_type t;
    bits32 *cell;
    int width;
    int height;
    int size;

    int idx;
};

typedef struct {
    gs_wts_screen_enum_t base;
    const gx_wts_cell_params_j_t *wcpj;
} gs_wts_screen_enum_j_t;

typedef struct {
    gs_wts_screen_enum_t base;
    const gx_wts_cell_params_h_t *wcph;

    /* an argument can be made that these should be in the params */
    double ufast1, vfast1;
    double ufast2, vfast2;
    double uslow1, vslow1;
    double uslow2, vslow2;
} gs_wts_screen_enum_h_t;

static gs_wts_screen_enum_t *
gs_wts_screen_enum_j_new(gx_wts_cell_params_t *wcp)
{
    gs_wts_screen_enum_j_t *wsej;

    wsej = malloc(sizeof(gs_wts_screen_enum_j_t));
    wsej->base.t = WTS_SCREEN_J;
    wsej->wcpj = (const gx_wts_cell_params_j_t *)wcp;
    wsej->base.width = wcp->width;
    wsej->base.height = wcp->height;
    wsej->base.size = wcp->width * wcp->height;
    wsej->base.cell = malloc(wsej->base.size * sizeof(wsej->base.cell[0]));
    wsej->base.idx = 0;

    return (gs_wts_screen_enum_t *)wsej;
}

static int
gs_wts_screen_enum_j_currentpoint(gs_wts_screen_enum_t *self,
				  gs_point *ppt)
{
    gs_wts_screen_enum_j_t *z = (gs_wts_screen_enum_j_t *)self;
    const gx_wts_cell_params_j_t *wcpj = z->wcpj;
    
    int x, y;
    double u, v;

    if (z->base.idx == z->base.size) {
	return 1;
    }
    x = z->base.idx % wcpj->base.width;
    y = z->base.idx / wcpj->base.width;
    u = wcpj->ufast_a * x + wcpj->uslow_a * y;
    v = wcpj->vfast_a * x + wcpj->vslow_a * y;
    u -= floor(u);
    v -= floor(v);
    ppt->x = 2 * u - 1;
    ppt->y = 2 * v - 1;
    return 0;
}

static gs_wts_screen_enum_t *
gs_wts_screen_enum_h_new(gx_wts_cell_params_t *wcp)
{
    gs_wts_screen_enum_h_t *wseh;
    const gx_wts_cell_params_h_t *wcph = (const gx_wts_cell_params_h_t *)wcp;
    int x1 = wcph->x1;
    int x2 = wcp->width - x1;
    int y1 = wcph->y1;
    int y2 = wcp->height - y1;

    wseh = malloc(sizeof(gs_wts_screen_enum_h_t));
    wseh->base.t = WTS_SCREEN_H;
    wseh->wcph = wcph;
    wseh->base.width = wcp->width;
    wseh->base.height = wcp->height;
    wseh->base.size = wcp->width * wcp->height;
    wseh->base.cell = malloc(wseh->base.size * sizeof(wseh->base.cell[0]));
    wseh->base.idx = 0;

    wseh->ufast1 = floor(0.5 + wcp->ufast * x1) / x1;
    wseh->vfast1 = floor(0.5 + wcp->vfast * x1) / x1;
    if (x2 > 0) {
	wseh->ufast2 = floor(0.5 + wcp->ufast * x2) / x2;
	wseh->vfast2 = floor(0.5 + wcp->vfast * x2) / x2;
    }
    wseh->uslow1 = floor(0.5 + wcp->uslow * y1) / y1;
    wseh->vslow1 = floor(0.5 + wcp->vslow * y1) / y1;
    if (y2 > 0) {
	wseh->uslow2 = floor(0.5 + wcp->uslow * y2) / y2;
	wseh->vslow2 = floor(0.5 + wcp->vslow * y2) / y2;
    }

    return &wseh->base;
}

static int
gs_wts_screen_enum_h_currentpoint(gs_wts_screen_enum_t *self,
				  gs_point *ppt)
{
    gs_wts_screen_enum_h_t *z = (gs_wts_screen_enum_h_t *)self;
    const gx_wts_cell_params_h_t *wcph = z->wcph;
    
    int x, y;
    double u, v;

    if (self->idx == self->size) {
	return 1;
    }
    x = self->idx % wcph->base.width;
    y = self->idx / wcph->base.width;
    if (x < wcph->x1) {
	u = z->ufast1 * x;
	v = z->vfast1 * x;
    } else {
	u = z->ufast2 * (x - wcph->x1);
	v = z->vfast2 * (x - wcph->x1);
    }
    if (y < wcph->y1) {
	u += z->uslow1 * y;
	v += z->vslow1 * y;
    } else {
	u += z->uslow2 * (y - wcph->y1);
	v += z->vslow2 * (y - wcph->y1);
    }
    u -= floor(u);
    v -= floor(v);
    ppt->x = 2 * u - 1;
    ppt->y = 2 * v - 1;
    return 0;
}

gs_wts_screen_enum_t *
gs_wts_screen_enum_new(gx_wts_cell_params_t *wcp)
{
    if (wcp->t == WTS_SCREEN_J)
	return gs_wts_screen_enum_j_new(wcp);
    else if (wcp->t == WTS_SCREEN_H)
	return gs_wts_screen_enum_h_new(wcp);
    else
	return NULL;
}

int
gs_wts_screen_enum_currentpoint(gs_wts_screen_enum_t *wse, gs_point *ppt)
{
    if (wse->t == WTS_SCREEN_J)
	return gs_wts_screen_enum_j_currentpoint(wse, ppt);
    if (wse->t == WTS_SCREEN_H)
	return gs_wts_screen_enum_h_currentpoint(wse, ppt);
    else
	return -1;
}

int
gs_wts_screen_enum_next(gs_wts_screen_enum_t *wse, floatp value)
{
    bits32 sample;

    if (value < -1.0 || value > 1.0)
	return_error(gs_error_rangecheck);
    sample = (bits32) ((value + 1) * 0x7fffffff);
    wse->cell[wse->idx] = sample;
    wse->idx++;
    return 0;
}

/* Run the enum with a square dot. This is useful for testing. */
#ifdef UNIT_TEST
static void
wts_run_enum_squaredot(gs_wts_screen_enum_t *wse)
{
    int code;
    gs_point pt;
    floatp spot;

    for (;;) {
	code = gs_wts_screen_enum_currentpoint(wse, &pt);
	if (code)
	    break;
	spot = 0.5 * (cos(pt.x * M_PI) + cos(pt.y * M_PI));
	gs_wts_screen_enum_next(wse, spot);
    }
}
#endif /* UNIT_TEST */

static int
wts_sample_cmp(const void *av, const void *bv) {
    const bits32 *const *a = (const bits32 *const *)av;
    const bits32 *const *b = (const bits32 *const *)bv;

    if (**a > **b) return 1;
    if (**a < **b) return -1;
    return 0;
}

/* This implementation simply sorts the threshold values (evening the
   distribution), without applying any moire reduction. */
int
wts_sort_cell(gs_wts_screen_enum_t *wse)
{
    int size = wse->width * wse->height;
    bits32 *cell = wse->cell;
    bits32 **pcell;
    int i;

    pcell = malloc(size * sizeof(bits32 *));
    if (pcell == NULL)
	return -1;
    for (i = 0; i < size; i++)
	pcell[i] = &cell[i];
    qsort(pcell, size, sizeof(bits32 *), wts_sample_cmp);
    for (i = 0; i < size; i++)
	*pcell[i] = (bits32)floor(WTS_SORTED_MAX * (i + 0.5) / size);
    free(pcell);
    return 0;
}

/**
 * wts_blue_bump: Generate bump function for BlueDot.
 *
 * Return value: newly allocated bump.
 **/
static bits32 *
wts_blue_bump(const gs_wts_screen_enum_t *wse)
{
    const gx_wts_cell_params_t *wcp;
    int width = wse->width;
    int height = wse->height;
    int shift;
    int size = width * height;
    bits32 *bump;
    int i;
    double uf, vf;
    double am, eg;
    int z;
    int x0, y0;
    int x, y;

    if (wse->t == WTS_SCREEN_J) {
	gs_wts_screen_enum_j_t *wsej = (gs_wts_screen_enum_j_t *)wse;
	shift = wsej->wcpj->shift;
	wcp = &wsej->wcpj->base;
    } else if (wse->t == WTS_SCREEN_H) {
	gs_wts_screen_enum_h_t *wseh = (gs_wts_screen_enum_h_t *)wse;
	shift = 0;
	wcp = &wseh->wcph->base;
    } else
	return NULL;

    bump = (bits32 *)malloc(size * sizeof(bits32));
    if (bump == NULL)
	return NULL;

    for (i = 0; i < size; i++)
	bump[i] = 0;
    /* todo: more intelligence for anisotropic scaling */
    uf = wcp->ufast;
    vf = wcp->vfast;

    am = uf * uf + vf * vf;
    eg = (1 << 24) * 2.0 * sqrt (am);
    z = (int)(5 / sqrt (am));

    x0 = -(z / width) * shift - z;
    y0 = -(z % width);
    while (y0 < 0) {
	x0 -= shift;
	y0 += height;
    }
    while (x0 < 0) x0 += width;
    for (y = -z; y <= z; y++) {
	int x1 = x0;
	for (x = -z; x <= z; x++) {
	    bump[y0 * width + x1] += (bits32)(eg * exp (-am * (x * x + y * y)));
	    x1++;
	    if (x1 == width)
		x1 = 0;
	}
	y0++;
	if (y0 == height) {
	    x0 += shift;
	    if (x0 >= width) x0 -= width;
	    y0 = 0;
	}
    }
    return bump;
}

/**
 * wts_sort_blue: Sort cell using BlueDot.
 **/
static int
wts_sort_blue(const gs_wts_screen_enum_t *wse)
{
    bits32 *cell = wse->cell;
    int width = wse->width;
    int height = wse->height;
    int shift;
    int size = width * height;
    bits32 *ref;
    bits32 **pcell;
    bits32 *bump;
    int i;

    if (wse->t == WTS_SCREEN_J) {
	gs_wts_screen_enum_j_t *wsej = (gs_wts_screen_enum_j_t *)wse;
	shift = wsej->wcpj->shift;
    } else
	shift = 0;

    ref = (bits32 *)malloc(size * sizeof(bits32));
    pcell = (bits32 **)malloc(size * sizeof(bits32 *));
    bump = wts_blue_bump(wse);
    if (ref == NULL || pcell == NULL || bump == NULL) {
	free(ref);
	free(pcell);
	free(bump);
	return -1;
    }
    for (i = 0; i < size; i++)
	pcell[i] = &cell[i];
    qsort(pcell, size, sizeof(bits32 *), wts_sample_cmp);
    /* set ref to sorted cell; pcell will now point to ref */
    for (i = 0; i < size; i++) {
	pcell[i] = (pcell[i] - cell) + ref;
	*pcell[i] = (bits32)floor((1 << 24) * (i + 0.5) / size);
	cell[i] = 0;
    }

    for (i = 0; i < size; i++) {
	bits32 gmin = *pcell[i];
	int j;
	int j_end = i + 5000;
	int jmin = i;
	int ix;
	int x0, y0;
	int x, y;
	int ref_ix, bump_ix;

	/* find minimum cell value, but prune search */
	if (j_end > size) j_end = size;
	for (j = i + 1; j < j_end; j++) {
	    if (*pcell[j] < gmin) {
		gmin = *pcell[j];
		jmin = j;
	    }
	}
	ix = pcell[jmin] - ref;
	pcell[jmin] = pcell[i];
	cell[ix] = (bits32)floor(WTS_SORTED_MAX * (i + 0.5) / size);

	x0 = ix % width;
	y0 = ix / width;

	/* Add in bump, centered at (x0, y0) */
	ref_ix = y0 * width;
	bump_ix = 0;
	for (y = 0; y < height; y++) {
	    for (x = x0; x < width; x++)
		ref[ref_ix + x] += bump[bump_ix++];
	    for (x = 0; x < x0; x++)
		ref[ref_ix + x] += bump[bump_ix++];
	    ref_ix += width;
	    y0++;
	    if (y0 == height) {
		x0 += shift;
		if (x0 >= width) x0 -= width;
		y0 = 0;
		ref_ix = 0;
	    }
	}

	/* Remove DC component to avoid integer overflow. */
	if ((i & 255) == 255 && i + 1 < size) {
	    bits32 gmin = *pcell[i + 1];
	    int j;

	    for (j = i + 2; j < size; j++) {
		if (*pcell[j] < gmin) {
		    gmin = *pcell[j];
		}
	    }
#ifdef VERBOSE
	    if_debug1('h', "[h]gmin = %d\n", gmin);
#endif
	    for (j = i + 1; j < size; j++)
		*pcell[j] -= gmin;
	    
	}
    }

    free(ref);
    free(pcell);
    free(bump);
    return 0;
}

static wts_screen_t *
wts_screen_from_enum_j(const gs_wts_screen_enum_t *wse)
{
    const gs_wts_screen_enum_j_t *wsej = (const gs_wts_screen_enum_j_t *)wse;
    wts_screen_j_t *wsj;
    wts_screen_sample_t *samples;
    int size;
    int i;

    wsj = malloc(sizeof(wts_screen_j_t));
    wsj->base.type = WTS_SCREEN_J;
    wsj->base.cell_width = wsej->base.width;
    wsj->base.cell_height = wsej->base.height;
    size = wsj->base.cell_width * wsj->base.cell_height;
    wsj->base.cell_shift = wsej->wcpj->shift;
    wsj->pa = (int)floor(wsej->wcpj->pa * (1 << 16) + 0.5);
    wsj->pb = (int)floor(wsej->wcpj->pb * (1 << 16) + 0.5);
    wsj->pc = (int)floor(wsej->wcpj->pc * (1 << 16) + 0.5);
    wsj->pd = (int)floor(wsej->wcpj->pd * (1 << 16) + 0.5);
    wsj->XA = wsej->wcpj->xa;
    wsj->YA = wsej->wcpj->ya;
    wsj->XB = wsej->wcpj->xb;
    wsj->YB = wsej->wcpj->yb;
    wsj->XC = wsej->wcpj->xc;
    wsj->YC = wsej->wcpj->yc;
    wsj->XD = wsej->wcpj->xd;
    wsj->YD = wsej->wcpj->yd;

    samples = malloc(sizeof(wts_screen_sample_t) * size);
    wsj->base.samples = samples;
    for (i = 0; i < size; i++) {
	samples[i] = wsej->base.cell[i] >> WTS_EXTRA_SORT_BITS;
    }

#ifdef WTS_CACHE_SIZE_X
    for (i = 0; i < WTS_CACHE_SIZE_X; i++)
	wsj->xcache[i].tag = -1;
    for (i = 0; i < WTS_CACHE_SIZE_Y; i++)
	wsj->ycache[i].tag = -1;
#endif

    return &wsj->base;
}

static wts_screen_t *
wts_screen_from_enum_h(const gs_wts_screen_enum_t *wse)
{
    const gs_wts_screen_enum_h_t *wseh = (const gs_wts_screen_enum_h_t *)wse;
    wts_screen_h_t *wsh;
    wts_screen_sample_t *samples;
    int size;
    int i;

    /* factor some of this out into a common init routine? */
    wsh = malloc(sizeof(wts_screen_h_t));
    wsh->base.type = WTS_SCREEN_H;
    wsh->base.cell_width = wseh->base.width;
    wsh->base.cell_height = wseh->base.height;
    size = wsh->base.cell_width * wsh->base.cell_height;
    wsh->base.cell_shift = 0;
    wsh->px = wseh->wcph->px;
    wsh->py = wseh->wcph->py;
    wsh->x1 = wseh->wcph->x1;
    wsh->y1 = wseh->wcph->y1;

    samples = malloc(sizeof(wts_screen_sample_t) * size);
    wsh->base.samples = samples;
    for (i = 0; i < size; i++) {
	samples[i] = wseh->base.cell[i] >> WTS_EXTRA_SORT_BITS;
    }

    return &wsh->base;
}

typedef struct {
    wts_screen_type t;
    int width;
    int height;
} wts_key_j;

typedef struct {
    wts_screen_type t;
    int width;
    int height;
} wts_key_h;

wts_screen_t *
wts_screen_from_enum(const gs_wts_screen_enum_t *wse)
{
    wts_screen_t *result = NULL;
    byte *key = NULL;
    int key_size = 0; /* A stub. Was uninitialized when wse->t != WTS_SCREEN_J */
    int cell_off;
    int cell_len = -1;
    byte *cell_result;

    if (wse->t == WTS_SCREEN_J) {
	wts_key_j k;
	k.t = wse->t;
	k.width = wse->width;
	k.height = wse->height;
	cell_off = sizeof(k);

	key_size = cell_off + wse->size * sizeof(bits32);
	key = (byte *)malloc(key_size);
	/* todo: more params */
	memcpy(key, &k, cell_off);
	memcpy(key + cell_off, wse->cell, wse->size * sizeof(bits32));
    } else if (wse->t == WTS_SCREEN_H) {
	wts_key_h k;
	k.t = wse->t;
	k.width = wse->width;
	k.height = wse->height;
	key_size = sizeof(k);
	key = (byte *)malloc(key_size);
	/* todo: more params */
	memcpy(key, &k, key_size);
    }

    if (key != NULL)
	cell_len = gp_cache_query(GP_CACHE_TYPE_WTS_CELL, key, key_size,
				  (void **)&cell_result,
				  wts_cache_alloc_callback, NULL);
    if (cell_len >= 0) {
	memcpy(wse->cell, cell_result, cell_len);
	free(cell_result);
    } else {
	wts_sort_blue(wse);
	cell_len = wse->size * sizeof(bits32);
	gp_cache_insert(GP_CACHE_TYPE_WTS_CELL, key, key_size,
			(void *)wse->cell, cell_len);
    }
    free(key);

    if (wse->t == WTS_SCREEN_J)
	result = wts_screen_from_enum_j(wse);
    else if (wse->t == WTS_SCREEN_H)
	result = wts_screen_from_enum_h(wse);

#ifdef DUMP_WTS
    {
	static int dump_idx = 0;
	char dump_fn[128];
	int dump_fd;
	byte *buf;
	int size;

	size = gs_wts_to_buf(result, &buf);
	sprintf(dump_fn, "wts_dump_%d", dump_idx++);
	dump_fd = open(dump_fn, O_WRONLY | O_CREAT | O_TRUNC, 0666);
	write(dump_fd, buf, size);
	close(dump_fd);
	free(buf);
    }
#endif

    return result;
}

void
gs_wts_free_enum(gs_wts_screen_enum_t *wse)
{
    free(wse);
}

void
gs_wts_free_screen(wts_screen_t * wts)
{
    /* todo: free cell */
    free(wts);
}

int
wts_size(const wts_screen_t *ws)
{
    int size = 0; /* A stub. Was uninitialized when none of 3 cases below. */

    if (ws->type == WTS_SCREEN_RAT) {
	size = sizeof(wts_screen_t);
    } else if (ws->type == WTS_SCREEN_J) {
	size = sizeof(wts_screen_j_t);
    } else if (ws->type == WTS_SCREEN_H) {
	size = sizeof(wts_screen_h_t);
    }
    return size;
}

wts_screen_t *
gs_wts_from_buf(const byte *buf, int bufsize)
{
    const wts_screen_t *ws = (const wts_screen_t *)buf;
    wts_screen_t *result;
    int size = wts_size(ws);
    int hdr_size;
    int cell_size; /* size of cell in bytes */

    result = (wts_screen_t *)malloc(size);
    if (result == NULL)
	return NULL;
    
#ifdef WTS_SCREEN_J_SIZE_NOCACHE	/* ??? isn't this always defined ??? */
    if (ws->type == WTS_SCREEN_J) {
	hdr_size = WTS_SCREEN_J_SIZE_NOCACHE;
	memcpy(result, ws, hdr_size);
    } else
#endif
    {
	hdr_size = sizeof(wts_screen_t);
        memcpy(result, ws, hdr_size);
    }
    cell_size = ws->cell_width * ws->cell_height * sizeof(wts_screen_sample_t);

    if (bufsize < (cell_size + hdr_size) ||
	(result->samples = (wts_screen_sample_t *)malloc(cell_size)) == NULL) {
	free(result);
	return NULL;
    }
#ifdef WTS_SCREEN_J_SIZE_NOCACHE
    if (ws->type == WTS_SCREEN_J) {
	wts_screen_j_t *wsj = (wts_screen_j_t *)result;
	int i;

	for (i = 0; i < WTS_CACHE_SIZE_X; i++)
	    wsj->xcache[i].tag = -1;
	for (i = 0; i < WTS_CACHE_SIZE_Y; i++)
	    wsj->ycache[i].tag = -1;
    }
#endif
    memcpy(result->samples, buf + hdr_size, cell_size);

    return result;
}

#if 0 /* Never called. */
/* Return value is size of buf in bytes */
static int
gs_wts_to_buf(const wts_screen_t *ws, byte **pbuf)
{
    int size = wts_size(ws);
    int cell_size = ws->cell_width * ws->cell_height * sizeof(wts_screen_sample_t);
    byte *buf;

    buf = (byte *)malloc(size + cell_size);
    if (buf == NULL)
	return -1;
    memcpy(buf, ws, size);
    ((wts_screen_t *)buf)->samples = NULL;
    memcpy(buf + size, ws->samples, cell_size);
    *pbuf = buf;

    return size + cell_size;
}
#endif

#ifdef UNIT_TEST
static int
dump_thresh(const wts_screen_t *ws, int width, int height)
{
    int x, y;
    wts_screen_sample_t *s0;
    int cx, cy
    int dummy;

    wts_get_samples(ws, 0, 0, &cx, &cy, &dummy);
    s0 = ws->samples + cy * ws->cell_width + cx;

    printf("P5\n%d %d\n255\n", width, height);
    for (y = 0; y < height; y++) {
	for (x = 0; x < width;) {
	    wts_screen_sample_t *samples;
	    int n_samples, i;

	    wts_get_samples(ws, x, y, &cx, &cy, &n_samples);
	    samples = ws->samples + cy * ws->cell_width + cx;
#if 1
	    for (i = 0; x + i < width && i < n_samples; i++)
		fputc(samples[i] >> 7, stdout);
#else
	    printf("(%d, %d): %d samples at %d\n",
		   x, y, n_samples, samples - s0);
#endif
	    x += n_samples;
	}
    }
    return 0;
}

int
main (int argc, char **argv)
{
    gs_screen_halftone h;
    gs_matrix mat;
    double xres = 1200;
    double yres = 1200;
    gx_wts_cell_params_t *wcp;
    gs_wts_screen_enum_t *wse;
    wts_screen_t *ws;

    mat.xx = xres / 72.0;
    mat.xy = 0;
    mat.yx = 0;
    mat.yy = yres / 72.0;

    h.frequency = 121;
    h.angle = 45;

    wcp = wts_pick_cell_size(&h, &mat);
    dlprintf2("cell size = %d x %d\n", wcp->width, wcp->height);
    wse = gs_wts_screen_enum_new(wcp);
    wts_run_enum_squaredot(wse);
#if 1
    wts_sort_blue(wse);
#else
    wts_sort_cell(wse);
#endif
    ws = wts_screen_from_enum(wse);

    dump_thresh(ws, 512, 512);
    return 0;
}
#endif
