Annotation of researchv10no/cmd/bcp/Text.h, revision 1.1.1.1

1.1       root        1: /* Copyright (c) 1989, 1990 AT&T --- All Rights Reserved.              */
                      2: /* THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF AT&T.                */
                      3: /* The copyright notice does not imply actual or intended publication. */
                      4: /* AUTHORS:                                            */
                      5: /*     H. S. Baird - ATT-BL MH - first versions        */
                      6: 
                      7: /* Text.h - typedefs, constants, and function declarations for document-images
                      8:        (see Text.c for companion functions.)
                      9:    INCLUDES
                     10:        requires prior:
                     11:                #include "stdocr.h"
                     12:  */
                     13: 
                     14: #define dbg_fwrb_toa (F)       /* err("%s",R_toa()) for each record written */
                     15: #define dbg_frdb_toa (F)       /* err("%s",R_toa()) for each record read */
                     16: 
                     17: #define DIM_VERSION (0)                /* current version no. of Dim-file format */
                     18: 
                     19: #include "Bfeats.h"
                     20: 
                     21: #define Ident int      /* identification bits [MUST BE >=32 bits] */
                     22: 
                     23: #define fwri_Ident(F,V) fwri_uint4((F),(V))
                     24: #define frdi_Ident(F) frdi_uint4(F)
                     25: 
                     26: /* Ident bits */
                     27: /* identifies external file record type(s) */
                     28: #define IsPage         04000000000
                     29: #define IsBlock                02000000000
                     30: #define IsTxtln                01000000000
                     31: #define IsWord         00020000000
                     32: #define IsChar         00400000000
                     33: #define IsBlob         00200000000
                     34: #define IsRun          00100000000     /* should be IsRuns or IsLag */
                     35: #define IsRuns         (IsRun)
                     36: #define IsInterp       00040000000     /* Char interpretation */
                     37: #define IsBdy          00010000000
                     38: #define IsShapes       00004000000
                     39: #define IsLag          00002000000
                     40: #define IsBfeats       00001000000
                     41: #define IsSfeats       00000400000
                     42: #define IsWordInterp   00000200000
                     43: /* Runs:  set in Blob records' ids.
                     44:    At most one of these may be set in main memory.
                     45:    Only Runs_ff or Runs_g4 may be set in peripheral file format. */
                     46: #define Runs_f         00000010000     /* Runs are in list, starting *.r.f */
                     47: #define Runs_ff                00000020000     /* RunFs are in array, at *.r.ff */
                     48: #define Runs_seek      00000040000     /* Runs are still in file, at *.seek */
                     49: #define Runs_g4                00000100000     /* Runs are in CCITT Group 4 format */
                     50: 
                     51: /* CCITT Group 4 format in peripheral files consists of (a) an unsigned long count,
                     52:    then (b) that many bytes of Group 4 encoding exactly as in CCITT
                     53:    Recommendation T.6, except that no EOFB code is appended, since the end of
                     54:    the bitmap can be detected using the bounding box of the owning Blob.
                     55:    Instead, the last scan line of the bitmap is merely padded to the nearest
                     56:    full byte with '0' bits.  This compresses the representation by about
                     57:    a factor of 8 on average, compared to RunF/RunFS encoding. */
                     58: 
                     59: #define IsALL (IsPage|IsBlock|IsTxtln|IsWord|IsChar|IsBlob|IsRun|IsBdy|IsInterp|IsShapes|IsLag|IsBfeats|IsSfeats|IsWordInterp)
                     60: #define IsNONE         0
                     61: 
                     62: 
                     63: /* Enable optional debugging-support code to maintain a count of selected record
                     64:    types that are allocated using alloc_* free_* and dup_* functions.  The counts
                     65:    are in units of records, not bytes.  By design, has no effect at all on
                     66:    correctness.  */
                     67: #define ALLOC_CENSUS (0)
                     68: 
                     69: #if ALLOC_CENSUS
                     70: 
                     71: typedef struct Census {
                     72:        int Page_mny;
                     73:        int Block_mny;
                     74:        int Txtln_mny;
                     75:        int Word_mny;
                     76:        int Char_mny;
                     77:        int Blob_mny;
                     78:        int Run_mny;
                     79:        int Interp_mny;
                     80:        int Bfeats_mny;
                     81:        int BMask_mny;
                     82:        } Census;
                     83: 
                     84: #define Init_Census {0,0,0,0,0,0,0,0,0,0}
                     85: #if MAIN
                     86: Census empty_Census = Init_Census;
                     87: Census _CENSUS = Init_Census;
                     88: #else
                     89: extern Census empty_Census;
                     90: extern Census _CENSUS;
                     91: #endif
                     92: 
                     93: #define alloc_census(id,n) _CENSUS./**/id/**/_mny += (n)
                     94: #define free_census(id,n) _CENSUS./**/id/**/_mny -= (n)
                     95: #define err_census(S,C) err("%s P%d B%d l%d w%d c%d b%d r%d i%d bf%d bm%d",\
                     96:        S,(C)->Page_mny,(C)->Block_mny,(C)->Txtln_mny,(C)->Word_mny,\
                     97:        (C)->Char_mny,(C)->Blob_mny,(C)->Run_mny,(C)->Interp_mny,\
                     98:        (C)->Bfeats_mny,(C)->BMask_mny )
                     99: #define err_census_all err_census("allocated: ",&(_CENSUS))
                    100: #define err_census_rec(rp) { \
                    101:        Census *cs;  cs = (Census *)census_rec((rp)); \
                    102:        err_census(ident_toa((rp)->ident),cs); \
                    103:        }
                    104: #else
                    105: 
                    106: #define alloc_census(i,n)
                    107: #define free_census(i,n)
                    108: #define err_census(s,c)
                    109: #define err_census_all
                    110: #define err_census_rec(r)
                    111: 
                    112: #endif
                    113: 
                    114: 
                    115: /* Most record types can own an ASCII label, which is simply a
                    116:    '\0'-terminated string.  Its uses are varied. */
                    117: #define MAX_LABEL_LEN 128      /* maximum no. characters in a label string */
                    118: 
                    119: #if FWRI
                    120: 
                    121: #if dbg_fwrb_toa
                    122: #define fwrb_label(F,L) { \
                    123:        fwri_str((F),(L)); \
                    124:        err("fwrb_label: \"%s\"",(L)); \
                    125:        }
                    126: #else
                    127: #define fwrb_label(F,L) { \
                    128:        fwri_str((F),(L)); \
                    129:        }
                    130: #endif
                    131: 
                    132: #else
                    133: 
                    134: #if dbg_fwrb_toa
                    135: #define fwrb_label(F,L) { \
                    136:        fputs((L),(F)); \
                    137:        fputc('\0',(F)); \
                    138:        err("fwrb_label: \"%s\"",(L)); \
                    139:        }
                    140: #else
                    141: #define fwrb_label(F,L) { \
                    142:        fputs((L),(F)); \
                    143:        fputc('\0',(F)); \
                    144:        }
                    145: #endif
                    146: 
                    147: #endif
                    148: 
                    149: char *frdb_label();
                    150: 
                    151: /* A boundary is an ordered list of vertices.
                    152:    In some uses, it is assumed to close:  in this case, the first point
                    153:    is not repeated at the end.
                    154:    `vn' counts the no. vertices.  bdy_trace omits consecutive duplicates and
                    155:    compresses horizontal and vertical runs.
                    156:    `per' counts the no. of pixels on the 8-connected boundary.  An attempt has been
                    157:    made not to count consecutive duplicate pixels, but this may be buggy.
                    158:    `ren' counts the number of run-ends touched.  Each run contributes two ends,
                    159:    even if it is one pixel long.  The sum of ren-counts among all bdys for a blob
                    160:    should equal exactly twice the no. runs.
                    161:    Note that vn<=per and vn<=2*ren, and (probably) ren<=per.
                    162:    A ``smoothed'' version (courtesy of John Hobby) may be given in s[],
                    163:    expressed in fractional pixels.
                    164:    */
                    165: typedef struct Bdy {
                    166:        Ident ident;    /* shows type of boundary */
                    167:        Bbx bx;         /* bounding box (usually relative to blob's bx.a) */
                    168:        long per;       /* no. pixels in 8-connected perimeter */
                    169:        int ren;        /* no. run-ends touched */
                    170:        int vn;         /* no. distinct vertices in v[] */
                    171:        Sp *v;          /* array of vn+1 vertices, v[0]==v[vn] (malloc space) */
                    172:        short fr;       /* fraction of pixels used in smoothed outline */
                    173:        int sn;         /* no. distinct smoothed vertices in s[] */
                    174:        Sp *s;          /* array of sn+1 vertices, s[0]==s[sn] (malloc space) */
                    175:        int an;         /* no. vertices in polygonal approximation */
                    176:        Sp **ap;        /* approx'n: array of an ptrs into v[] (in malloc space) */
                    177:        float err;      /* error tolerance used for approximation */
                    178:        int hn;         /* no. vertices in convex hull of polygonal approx'n */
                    179:        Sp ***hpp;      /* convex hull: array of hn ptrs into ap[] (malloc) */
                    180:        struct Bdy *n;  /* for use when a member of a linked-list */
                    181:        } Bdy;
                    182: 
                    183: #define Init_Bdy {IsBdy,Init_Bbx,0,0,0,NULL,1,0,NULL,0,NULL,0.0,0,NULL,NULL}
                    184: #if MAIN
                    185: Bdy empty_Bdy = Init_Bdy;
                    186: #else
                    187: extern Bdy empty_Bdy;
                    188: #endif
                    189: 
                    190: #define Bdy_verts      00000000001     /* vertices */
                    191: #define Bdy_approx     00000000002     /* polygonal approx'n */
                    192: #define Bdy_hull       00000000004     /* convex hull */
                    193: #define Bdy_ALL                (Bdy_verts|Bdy_approx|Bdy_hull)
                    194: #define Bdy_ccw                00000000100     /* winding order is counter-clockwise*/
                    195: #define Bdy_half       00000000200     /* uses half-pixel boundary points */
                    196: 
                    197: /* A boundaries-set is an ordered list of boundaries.
                    198:    In some uses, they are used to enclose a connected region:  in this case,
                    199:    all are closed, and the first is conventionally the exterior and the others
                    200:    are interior boundaries.  The interior is then always to the left
                    201:    of the boundary:  that is, the exterior boundary is oriented counter-
                    202:    clockwise, and the interior boundaries clockwise */
                    203: typedef struct Bdys {
                    204:        int mny;        /* no. boundaries */
                    205:        long per;       /* perimeter of all bdys */
                    206:        Bdy *b;         /* array of boundaries (malloc space) (sometimes first) */
                    207:        } Bdys;
                    208: 
                    209: #define Init_Bdys {0,0,NULL}
                    210: #if MAIN
                    211: Bdys empty_Bdys = Init_Bdys;
                    212: #else
                    213: extern Bdys empty_Bdys;
                    214: #endif
                    215: 
                    216: /* A boundary edge is an ordered pair of vertices along a boundary.  It implicitly
                    217:    describes an 8-connected sequence of pixels from a to b, inclusive.  Also
                    218:    used for straight-line approximations to the set of pixels, and the convex hull
                    219:    of such an approximation. */
                    220: typedef struct BdyEdge {
                    221:        Bdy *byp;       /* boundary to which it belongs */
                    222:        Sp *ap,*bp;     /* ptrs into byp->v */
                    223:        long per;       /* perimeter:  no. 8-connected pixels */
                    224:        Pp ctr;         /* centroid */
                    225:        Radians ang;    /* ls-fitted angle (directed roughly from a to b) */
                    226:        Pp a;           /* endpoints a & b (with sub-pixel precision) */
                    227:        Pp b;
                    228:        } BdyEdge;
                    229: 
                    230: #define Init_BdyEdge {NULL,NULL,NULL,0,Init_Zero_Pp,0.0,Init_Zero_Pp,Init_Zero_Pp}
                    231: #if MAIN
                    232: BdyEdge empty_BdyEdge = Init_BdyEdge;
                    233: #else
                    234: extern BdyEdge empty_BdyEdge;
                    235: #endif
                    236: 
                    237: /* Ordered set of BdyEdges */
                    238: typedef struct BdyEdges {
                    239:        int mny;
                    240:        BdyEdge **pa;   /* NULL-terminated array of pointers to BdyEdges */
                    241:        } BdyEdges;
                    242: 
                    243: #define Init_BdyEdges {0,NULL}
                    244: #if MAIN
                    245: BdyEdges empty_BdyEdges = Init_BdyEdges;
                    246: #else
                    247: extern BdyEdges empty_BdyEdges;
                    248: #endif
                    249: 
                    250: /* Moments of a region (or boundary-list) of pixels */
                    251: 
                    252: typedef struct Moments {
                    253:        /* 0th moment */
                    254:        int M00;        /* area: sum of 1 */
                    255:        /* 1st moments */
                    256:        int M10;        /* sum of xi */
                    257:        int M01;        /* sum of yi */
                    258:        Pp c;           /* centroid: M10/M00, M01/M00 */
                    259:        /* 2nd moments (relative to centroid) */
                    260:        float M20;      /* sum of rxi*rxi */
                    261:        float M11;      /* sum of rxi*ryi */
                    262:        float M02;      /* sum of ryi*ryi */
                    263:        Radians a;      /* orientation angle, in [-PI/2,PI/2) (radians) */
                    264:        Pp d;           /* directional vector of principal axis */
                    265:        } Moments;
                    266: 
                    267: #define Init_Moments {0,0,0,{0.0,0.0},0.0,0.0,0.0,0.0,{0.0,0.0}}
                    268: #if MAIN
                    269: Moments zero_Moments = Init_Moments;
                    270: #else
                    271: extern Moments zero_Moments;
                    272: #endif
                    273: 
                    274: /* functions in Text.c */
                    275: Bdy *alloc_bdy();
                    276: Bdys *alloc_bdys();
                    277: 
                    278: /* functions in Bdy.c */
                    279: Bdys *dup_bdys_etc();
                    280: Bdy *dup_bdy_etc();
                    281: Bdys *boundaries();
                    282: char *moments_toa();
                    283: Moments *bdy_moments();
                    284: boolean fit_bdyedge();
                    285: BdyEdge *dup_bdyedge();
                    286: BdyEdge *append_bdyedge();
                    287: remove_bdyedge();
                    288: free_bdyedges();
                    289: BdyEdges *dup_bdyedges();
                    290: 
                    291: /* Each Run is initially inserted into a `line set', owned by its scan Line.
                    292:    Later, as connections are discovered, it joins a `tree set'.
                    293:    When the forest of trees of which it is a part is finally complete, the Run
                    294:    is removed from its `line set', and added to a `blob set'.
                    295:    */
                    296: 
                    297: typedef struct Run {   /* internal (main memory) record */
                    298:        Scoor y, xs, xe;        /* coordinates of black interval (y,[xs,xe]) */
                    299:        struct Run *n;          /* line & blob sets: next Run */
                    300:        unsigned short ad, bd;  /* tree set: above,below degrees (no. conn'd)*/
                    301:        struct Run *ac, *bc;    /* tree set: above,below leftmost connections */
                    302:        union { struct Tree *o; /* tree set: owner Tree */
                    303:                int no;         /* blob set: sequence no. 0,1,... in set */
                    304:                } u;            /* (overlain fields) */
                    305:        } Run;
                    306: 
                    307: #define Init_Run {0,0,0,NULL,0,0,NULL,NULL}
                    308: #if MAIN
                    309: Run empty_Run = Init_Run;
                    310: #else
                    311: extern Run empty_Run;
                    312: #endif
                    313: 
                    314: /* Peripheral file format.  `ac' and `bc' are relative to the position
                    315:    of this run in the canonical run order. */
                    316: typedef struct RunF {  /* external (peripheral file) record (full size) */
                    317:        Scoor y, xs, xe;        /* coordinates of black interval (y,[xs,xe]) */
                    318:        unsigned short ad, bd;  /* above,below degrees */
                    319:        unsigned short ac, bc;  /* above,below leftmost connections (indices) */
                    320:        } RunF;
                    321: 
                    322: #define Init_RunF {0,0,0,0,0,0,0}
                    323: #if MAIN
                    324: RunF empty_RunF = Init_RunF;
                    325: #else
                    326: extern RunF empty_RunF;
                    327: #endif
                    328: 
                    329: #define fwri_RunF(F,P) { \
                    330:        fwri_Scoor((F),(P)->y); \
                    331:        fwri_Scoor((F),(P)->xs); \
                    332:        fwri_Scoor((F),(P)->xe); \
                    333:        fwri_uint2((F),(P)->ad); \
                    334:        fwri_uint2((F),(P)->bd); \
                    335:        fwri_uint2((F),(P)->ac); \
                    336:        fwri_uint2((F),(P)->bc); \
                    337:        }
                    338: 
                    339: #define frdi_RunF(F,P) ( feof(F)? 0 : ( \
                    340:        (P)->y=frdi_Scoor(F), \
                    341:        (P)->xs=frdi_Scoor(F), \
                    342:        (P)->xe=frdi_Scoor(F), \
                    343:        (P)->ad=frdi_uint2(F), \
                    344:        (P)->bd=frdi_uint2(F), \
                    345:        (P)->ac=frdi_uint2(F), \
                    346:        (P)->bc=frdi_uint2(F), \
                    347:        (ferror(F)? -errno: 1) ) )
                    348: 
                    349: /* In `well-behaved' text, the overwhelming majority of Blobs are small
                    350:    enough that all their Runs can be encoded using character data fields,
                    351:    a factor of two saving, which is important since a dense IEEE proceedings
                    352:    page blob file would otherwise require 2.3Mbytes */
                    353: typedef struct RunFS { /* external (peripheral file) record (small size) */
                    354:        unsigned char y, xs, xe;/* coordinates of black interval (y,[xs,xe]) */
                    355:        unsigned char ad, bd;   /* above,below degrees */
                    356:        unsigned char ac, bc;   /* above,below leftmost connections (indices) */
                    357:        } RunFS;
                    358: 
                    359: #define Init_RunFS {0,0,0,0,0,0,0}
                    360: #if MAIN
                    361: RunFS empty_RunFS = Init_RunFS;
                    362: #else
                    363: extern RunFS empty_RunFS;
                    364: #endif
                    365: 
                    366: #define fwri_RunFS(F,P) { \
                    367:        fwri_uint1((F),(P)->y); \
                    368:        fwri_uint1((F),(P)->xs); \
                    369:        fwri_uint1((F),(P)->xe); \
                    370:        fwri_uint1((F),(P)->ad); \
                    371:        fwri_uint1((F),(P)->bd); \
                    372:        fwri_uint1((F),(P)->ac); \
                    373:        fwri_uint1((F),(P)->bc); \
                    374:        }
                    375: 
                    376: #define frdi_RunFS(F,P) ( feof(F)? 0: ( \
                    377:        (P)->y=frdi_uint1(F), \
                    378:        (P)->xs=frdi_uint1(F), \
                    379:        (P)->xe=frdi_uint1(F), \
                    380:        (P)->ad=frdi_uint1(F), \
                    381:        (P)->bd=frdi_uint1(F), \
                    382:        (P)->ac=frdi_uint1(F), \
                    383:        (P)->bc=frdi_uint1(F), \
                    384:        (ferror(F)? -errno: 1) ) )
                    385: 
                    386: /* Set of runs.  PROPOSED NEW FORMAT.  Not yet incorporated widely. */
                    387: typedef struct Runs {
                    388:        Ident ident;    /* IsRuns & Runs_fi, Runs_ff, Runs_fs, or Runs_sk flags */
                    389:        int mny;        /* no. runs */
                    390:        union { /* access to runs */
                    391:                struct Run *fi;   /* first run of singly-linked list */
                    392:                struct RunF *ff;  /* top of RunF[mny] array */
                    393:                struct RunFS *fs; /* top of RunFS[mny] array */
                    394:                long sk;          /* seek(F,seek,0) will find them in file F */
                    395:                } r;
                    396:        } Runs;
                    397: 
                    398: #define Init_Runs {IsRuns,0}   /* NOTE: can't initialize union */
                    399: #if MAIN
                    400: Runs empty_Runs = Init_Runs;
                    401: #else
                    402: extern Runs empty_Runs;
                    403: #endif
                    404: 
                    405: /* INTERNAL management */
                    406: 
                    407: #if !MAIN
                    408: extern
                    409: #endif
                    410: struct {
                    411:        int incr;               /* size of each pool[i] */
                    412:        int pools;              /* no. of pools allocated */
                    413:        Run **pool;             /* malloc space Run pool[pools][0..incr-1] */
                    414:        int next;               /* the next avail Run is:  pool[pools-1][next] */
                    415:        Run *free;              /* head of free lifo list (NULL if none) */
                    416:        Run *cur;               /* most-recently allocated Run */
                    417:        int total;              /* total no. ever allocated */
                    418:        boolean dbg;
                    419:        } _RunPool
                    420: #if MAIN
                    421:        = {0,0,NULL,0,NULL,NULL,0,F}
                    422: #endif
                    423: ;
                    424: 
                    425: /* Run management routines (Text.c) */
                    426: boolean        alloc_run_pool();
                    427:        free_run_pool();
                    428: Run    *hard_alloc_run();
                    429:        err_run();
                    430:        err_runb();
                    431:        err_runf();
                    432:        err_runfs();
                    433:        err_run_stats();
                    434: 
                    435: /* Allocate a Run from the RunPool (returns (Run *)) -- mostly inline */
                    436: #define alloc_run() ( _RunPool.total++, (_RunPool.free!=NULL)? \
                    437:        (_RunPool.cur=_RunPool.free,_RunPool.free=_RunPool.cur->n, \
                    438:                *(_RunPool.cur)=empty_Run,_RunPool.cur): \
                    439:        ( (_RunPool.next<_RunPool.incr)? \
                    440:          (_RunPool.cur=_RunPool.pool[_RunPool.pools-1]+(_RunPool.next++), \
                    441:                *(_RunPool.cur)=empty_Run,_RunPool.cur): \
                    442:          hard_alloc_run() ) )
                    443: 
                    444: /* Free a Run back into the RunPool -- entirely inline */
                    445: #define free_run(rp) { (rp)->n = _RunPool.free; _RunPool.free = (rp); }
                    446: 
                    447: /* EXTERNAL file format:
                    448:    If BlobF.runs is zero, then conventionally the Runs have simply been omitted.
                    449:    The RunF.y, RunF.xs, & RunF.xe coordinates are offsets from BlobF.bx.a 
                    450:    (their blob's left-top corner).  RunF.ac & RunF.bc index into an array of
                    451:    only those RunF records belonging to the current BlobF, in ascending
                    452:    lexicographic order on (RunF.y,RunF.xs) -- so that they are in the range
                    453:    [0,BlobF.runs-1].
                    454:  IMPROVEMENTS:
                    455:   */
                    456: 
                    457: /* some subroutines are too lazy to handle indefinitely large blobs */
                    458: #define Runs_Max 10000 
                    459: 
                    460: /* A Blob is (formally) a maximal 8-connected set of black pixels.
                    461:    The connectivity algorithm finds them in strictly increasing order on
                    462:    (y,xe) of its Run with highest (y,xe).
                    463:    */
                    464: 
                    465: typedef struct Blob {  /* internal (main memory) record */
                    466:        Ident ident;    /* identification bits */
                    467:        Seq no;         /* blob sequence no */
                    468:        Bbx bx;
                    469:        long area;
                    470:        long per;
                    471:        struct Blob *n; /* free set: next blob */
                    472:        Merit m;        /* Only used locally (not for peripheral file) */
                    473:        Runs *rsp;      /* runs (not yet used) */
                    474:        Bdys *bdsp;     /* boundaries (in malloc space); NULL if none */
                    475:        /* presently in use (but planned to be replaced by Runs) */
                    476:        int runs;
                    477:        union { /* to find runs */
                    478:                struct Run *f;          /* blob set: first run */
                    479:                struct RunF *ff;        /* top of RunF array */
                    480:                long seek;              /* seek(f,seek,0) will find them */
                    481:                } r;
                    482:        } Blob;
                    483: 
                    484: #define Init_Blob {IsBlob,0,Init_Bbx,0,0,NULL,0.0,NULL,NULL,0,}
                    485: #if MAIN
                    486: Blob empty_Blob = Init_Blob;
                    487: #else
                    488: extern Blob empty_Blob;
                    489: #endif
                    490: 
                    491: typedef struct Blobs { /* Blob set */
                    492:        int mny;        /* the number of pointers in set */
                    493:        Blob **bpa;     /* pts to NULL-terminated array[mny+1] of pointers */
                    494:        } Blobs;
                    495: 
                    496: #define Init_Blobs {0,NULL}
                    497: #if MAIN
                    498: Blobs empty_Blobs = Init_Blobs;
                    499: #else
                    500: extern Blobs empty_Blobs;
                    501: #endif
                    502: 
                    503: /* Singly-linked list of Blobs.  Only forward `next' links Blob.n are used. */
                    504: typedef struct Blobl { /* Blob list */
                    505:        int mny;        /* the number in set */
                    506:        Blob *fi;       /* to first */
                    507:        Blob *la;       /* to last */
                    508:        } Blobl;
                    509: 
                    510: #define Init_Blobl {0,NULL,NULL}
                    511: #if MAIN
                    512: Blobl empty_Blobl = Init_Blobl;
                    513: #else
                    514: extern Blobl empty_Blobl;
                    515: #endif
                    516: 
                    517: typedef struct BlobF { /* external file format */
                    518:        Ident ident;    /* identification bits: IsBlob must be set */
                    519:        Bbx bx;
                    520:        long area;
                    521:        long per;
                    522:        int runs;       /* no. runs to follow */
                    523:        short bdys;     /* no. bdys to follow */
                    524:        } BlobF;
                    525: 
                    526: /* Blob identification bits */
                    527: #define Blob_lm                00000000001     /* touches left margin */
                    528: #define Blob_rm                00000000002     /* touches right margin */
                    529: #define Blob_tm                00000000004     /* touches top margin */
                    530: #define Blob_bm                00000000010     /* touches bottom margin */
                    531: #define Blob_chopt     00000000020     /* chopped (at the top) */
                    532: #define Blob_chopb     00000000040     /* chopped (at the bottom) */
                    533: #define Blob_chopl     00000002000     /* chopped (at the left) */
                    534: #define Blob_chopr     00000004000     /* chopped (at the right) */
                    535: #define Blob_small     00000000200     /* its runs (can be) compressed x2 */
                    536: #define Blob_local     00000000400     /* unassigned:  avail for local pgm use */
                    537: 
                    538: /* INTERNAL management: */
                    539: 
                    540: int hi_blob_no;                /* current highest blob no */
                    541: 
                    542: /* Blobs are allocated from a pool of free ones */
                    543: int blob_max;
                    544: Blob *blob_pool;
                    545: Blob blob_fr;          /* head of list of free blobs */
                    546: int blob_fr_mny;
                    547: int blob_hi;           /* high-water mark in blob pool */
                    548: int blob_chopped;      /* total no. of blobs that were chopped */
                    549: boolean blob_debug;    /* debug traces? */
                    550: 
                    551: /* EXTERNAL file format:
                    552:    A Blob file consists of an arbitrary number of:
                    553:        BlobF record, followed by BlobF.runs instances of:
                    554:            RunF record
                    555:    If BlobF.runs is zero, then conventionally the Runs have simply been omitted.
                    556:    The RunF.y, RunF.xs, & RunF.xe coordinates are relative offsets from BlobF.bx.a 
                    557:    (their blob's left-top corner).  RunF.ac & RunF.bc index into an array of
                    558:    only those RunF records belonging to the current BlobF, in ascending
                    559:    lexicographic order on (RunF.y,RunF.xs) -- so that they are in the range
                    560:    [0,BlobF.runs-1].  If ad(or, bd)==0, the ac(or, bc) is undefined (conn sets
                    561:    them conventionally to 0).
                    562:    */
                    563: 
                    564: /* Blob management routines (conBlob.c) */
                    565: Blob   *alloc_blob();
                    566:        free_blob();
                    567: boolean        alloc_blob_pool();
                    568:        free_blob_pool();
                    569: Blob   *alloc_pool_blob();
                    570:        free_pool_blob();
                    571:        out_blob();
                    572:         fwrb_blob_etc();
                    573: boolean frdb_blob_etc();
                    574: boolean        frdb_runfs();
                    575:        err_blob();
                    576:        err_blob_runs();
                    577:        err_blob_runfs();
                    578:        err_blob_briefly();
                    579:        err_blobf();
                    580:        err_blob_stats();
                    581: boolean        blob_small();
                    582: 
                    583: /* Compute height-above-baseline in ems of Char *cp w.r.t. Txtln *lp,
                    584:    on a page of y-resolution res.  The txtln's `basl' & `size' must be set up. */
                    585: #define char_bhgt(cp,lp,res) \
                    586:        ((((cp)->bx.b.y - (lp)->basl)/(double)(res)*INS_PER_PT*(lp)->size))
                    587: 
                    588: /* an Interpretation of a Char */
                    589: typedef struct Interp {
                    590:        Ident ident;
                    591:        struct Cl *clp;         
                    592:        struct Class *clsp;
                    593:        ClassId ci;             /* class id (font, size, name, variant) */
                    594:        Merit mshap;            /* shape merit in [0,1] */
                    595:        Pts size;               /* implied text size */
                    596:        Merit msize;            /* size merit in [0,1] */
                    597:        Scoor basl;             /* implied absolute baseline location */
                    598:        Merit mbhgt;            /* height-above-baseline merit in [0,1] */
                    599:        Merit m;                /* match merit (due to mshap, msize, & mbhgt) */
                    600:        Prob p;                 /* approximate probability */
                    601:        struct Interp *n;       /* next in singly-linked list */
                    602:        } Interp;
                    603: 
                    604: #define Init_Interp {IsInterp,NULL,NULL,Init_ClassId,0.0,0.0,0.0,0,0.0,0.0,0.0,NULL}
                    605: #if MAIN
                    606: Interp empty_Interp = Init_Interp;
                    607: #else
                    608: extern Interp empty_Interp;
                    609: #endif
                    610: 
                    611: /* Interp.ident flags: */
                    612: #define Interp_spelled 00000000001     /* has passed a spelling check */
                    613: 
                    614: #define free_interp(i) {free((i)); free_census(Interp,1); }
                    615: 
                    616: typedef struct InterpF {
                    617:        Ident ident;
                    618:        ClassId ci;             /* class id (font, size, name, variant) */
                    619:        Merit mshap;            /* shape merit */
                    620:        Pts size;               /* implied text size (in points) */
                    621:        Merit msize;            /* size merit */
                    622:        Scoor basl;             /* implied baseline location */
                    623:        Merit mbhgt;            /* height-above-baseline merit */
                    624:        Merit m;                /* match merit (due to shp, siz, hgt) */
                    625:        } InterpF;
                    626: 
                    627: /* a list of interpretations */
                    628: typedef struct Interpl {
                    629:        short mny;              /* no. in list */
                    630:        struct Interp *fi;      /* first in list (mny==0 -> fi==NULL) */
                    631:        } Interpl;
                    632: 
                    633: #define Init_Interpl {0,NULL}
                    634: #if MAIN
                    635: Interpl empty_Interpl = Init_Interpl;
                    636: #else
                    637: extern Interpl empty_Interpl;
                    638: #endif
                    639: 
                    640: /* a set of interpretations */
                    641: typedef struct Interps {
                    642:        short mny;              /* no. in set */
                    643:        struct Interp **pa;     /* NULL-terminated array of ptrs (malloc spc) */
                    644:        Merit m;                /* combined merit (normalized product) */
                    645:        } Interps;
                    646: 
                    647: #define Init_Interps {0,NULL,0.0}
                    648: #if MAIN
                    649: Interps empty_Interps = Init_Interps;
                    650: #else
                    651: extern Interps empty_Interps;
                    652: #endif
                    653: 
                    654: typedef struct Shapes {
                    655:        short mny;      /* no. items in set */
                    656:        short alloc;    /* no. items that can fit in array (alloc>=mny) */
                    657:        Nb_s *sa;       /* ptr to contiguous array (malloc space) */
                    658:        } Shapes;
                    659: 
                    660: #define Init_Shapes {0,0,NULL}
                    661: #if MAIN
                    662: Shapes empty_Shapes = Init_Shapes;
                    663: #else
                    664: extern Shapes empty_Shapes;
                    665: #endif
                    666: 
                    667: #define SH_INCR (20)   /* Shapes are allocated by this increment */
                    668: 
                    669: #define init_sh(sh) { \
                    670:        (sh)->alloc = SH_INCR; \
                    671:        if(((sh)->sa=(Nb_s *)malloc((sh)->alloc*sizeof(Nb_s)))==NULL) \
                    672:                abort("can't alloc sh->sa[%d]",(sh)->alloc); \
                    673:        (sh)->mny = 0; \
                    674:        }
                    675: 
                    676: #define add_sh(s,sh) { \
                    677:        if((sh)->mny==(sh)->alloc) { \
                    678:                (sh)->alloc += SH_INCR; \
                    679:                if(((sh)->sa=(Nb_s *)realloc((sh)->sa,(sh)->alloc*sizeof(Nb_s)))==NULL) \
                    680:                        abort("can't alloc (sh)->sa[%d]",(sh)->alloc); \
                    681:                }; \
                    682:        (sh)->sa[(sh)->mny++] = *(s); \
                    683:        }
                    684: 
                    685: /* Parameters governing the pseudo-random generation of a Char image
                    686:    using a 1st-order statistical model of imaging. */ 
                    687: typedef struct RanParms {
                    688:        short res_x;    /* -r resolution (scanner pels/inch) */
                    689:        short res_y;    /*    (for now, equal to res_x) */
                    690:        Pts size;       /* -p size of text */
                    691:        Radians skew;   /* -a skew angle */
                    692:        Ems bhgt;       /* -b height above baseline */
                    693:        float blur;     /* -e blurring std err (scanner pels) */
                    694:        float jitter;   /* -j jitter std err (scanner pels) */
                    695:        float kern;     /* -k kerning std err (scanner pels) */
                    696:        float speckle;  /* -s pel-wise additive noise std err (scanner pels) */
                    697:        float thresh;   /* -t threshold for binarization */
                    698:        float xscale;   /* -x horizontal scaling (dimensionless) */
                    699:        float yscale;   /* -y vertical scaling (dimensionless) */
                    700:        } RanParms;
                    701: #define Init_RanParms {0,0,0.0,0.0,0.0,0.7,0.0,0.0,0.125,0.25,1.0,1.0}
                    702: #if MAIN
                    703: RanParms empty_RanParms = Init_RanParms;
                    704: #else
                    705: extern RanParms empty_RanParms;
                    706: #endif
                    707: 
                    708: RanParms *alloc_ranparms();
                    709: RanParms *dup_ranparms();
                    710: char *ranparms_toa();
                    711: RanParms *ato_ranparms();
                    712: fwrb_ranparms();
                    713: int frdb_ranparms();
                    714: 
                    715: /* Char - a character:  isolated, elementary symbol of the writing system;
                    716:    linguists might call this a `graph' */
                    717: typedef struct Char {
                    718:        Ident ident;            /* feature bits */
                    719:        Bbx bx;
                    720:        Scoor csp;              /* space before character in Txtln (abs. coords) */
                    721:        long area;              /* no. pixels */
                    722:        long per;               /* perimeter (all bdys) */
                    723:        Scoor basl;             /* baseline (absolute coordinates, local copy) */
                    724:        /* next should be Blobl */
                    725:        int bmny;               /* no. blobs in Char */
                    726:        struct Blob *fi;        /* 1st in list (p.n ptrs) (bmny==0 -> fi==NULL) */
                    727:        Pval *sfv;              /* scalar-features (SFv) */
                    728:        Shapes sh;              /* set of shapes (size- & loc'n-invariant) */
                    729:        Bfeats *bfsp;           /* binary features */
                    730:        RanParms *rp;           /* randomizing parameters */
                    731:        Interpl il;             /* interpretations */
                    732:        char *l;                /* label (ASCII string in malloc space) */
                    733:        } Char;
                    734: 
                    735: #define Init_Char {IsChar,Init_Bbx,0,0L,0L,Scoor_MIN,0,NULL,NULL,Init_Shapes,NULL,NULL,Init_Interpl,NULL}
                    736: #if MAIN
                    737: Char empty_Char = Init_Char;
                    738: #else
                    739: extern Char empty_Char;
                    740: #endif
                    741: 
                    742: /* Char.ident flags: */
                    743: #define Char_spelled   00000000001     /* Its 1st Interp is in correct spelling */
                    744: #define Char_confused  00000000002     /* The classifier may have confused this */
                    745: #define Char_termhyp   00000000004     /* a terminal hyphen of its Word */
                    746: #define Char_omit      00000000010     /* can be omitted */
                    747: #define Char_label     00000000020     /* has an ASCII string label */
                    748: #define Char_ranparms  00000000040     /* has RanParms */
                    749: #define Char_split     00000000100     /* resulted from splitting a Char */
                    750: #define Char_merged    00000000200     /* resulted from merging Chars */
                    751: 
                    752: typedef struct Chars {
                    753:        int mny;        /* mny==0 -> cpa==NULL */
                    754:        Char **cpa;     /* pts to NULL-term'd array of ptrs (in malloc space) */
                    755:        } Chars;
                    756: 
                    757: #define Init_Chars {0,NULL}
                    758: #if MAIN
                    759: Chars empty_Chars = Init_Chars;
                    760: #else
                    761: extern Chars empty_Chars;
                    762: #endif
                    763: 
                    764: /* CharF - Char external file format */
                    765: typedef struct CharF {
                    766:        Ident ident;            /* feature bits: IsChar must be set */
                    767:        Bbx bx;
                    768:        Scoor csp;              /* space before character in Txtln (abs. coords) */
                    769:        long area;
                    770:        long per;
                    771:        short bmny;             /* no. blobs to follow */
                    772:        short imny;             /* no. interpretations (follows immediately) */
                    773:        short sfmny;            /* no. scalar features (follows immediately) */
                    774:        short shmny;            /* no. shape features (follows immediately) */
                    775:        short bfmny;            /* no. binary features (follows immediately) */
                    776:        /* if ident&Char_ranparms, RanParms follows CharF */
                    777:        /* if ident&Char_label, label follows CharF ('\0'-terminated string) */
                    778:        } CharF;
                    779: 
                    780: Char *alloc_char();            /* in Text.c */
                    781: Char *append_char();
                    782: Char *insert_char();
                    783: Char *insert_char_word();
                    784: Char *dup_char();
                    785: Char *dup_char_etc();
                    786: Chars *append_chars();
                    787: Chars *dup_chars_etc();
                    788: 
                    789: typedef struct Words {
                    790:        int mny;                /* mny==0 -> wpa==NULL */
                    791:        struct Word **wpa;      /* pts to NULL-term'd array of ptrs */
                    792:        } Words;
                    793: 
                    794: #define Init_Words {0,NULL}
                    795: #if MAIN
                    796: Words empty_Words = Init_Words;
                    797: #else
                    798: extern Words empty_Words;
                    799: #endif
                    800: 
                    801: /* Word - one or more Chars lying in a textline close together.
                    802:    wsp (word space) is always >=0 and is scaled by xheight (of Txtln) */
                    803: typedef struct Word {
                    804:        Ident ident;            /* feature bits */
                    805:        Bbx bx;
                    806:        float wsp;              /* space before word (multiple of wst*em) */
                    807:        Merit m;                /* Word merit (function of its Char's merits) */
                    808:        Prob p;                 /* probability */
                    809:        Words ws;               /* set of alternative segmentations */
                    810:        Chars cs;
                    811:        Blobs bs;
                    812:        char *l;                /* label (ASCII string in malloc space) */
                    813:        int hash;               /* hash key for fast equality checking */
                    814:        } Word;
                    815: 
                    816: #define Init_Word {IsWord,Init_Bbx,0.0,0.0,0.0,Init_Words,Init_Chars,Init_Blobs,NULL,0}
                    817: #if MAIN
                    818: Word empty_Word = Init_Word;
                    819: #else
                    820: extern Word empty_Word;
                    821: #endif
                    822: 
                    823: /* Word-interpretation (as printable ASCII string).
                    824:    All string fields must point to malloc-space strings.
                    825:    */
                    826: typedef struct WordInterp {
                    827:        Ident ident;    /* identifies word type */
                    828:        char *s;        /* entire string = pp+by+po+ps */
                    829:        char *pp;       /* punctuation prefix */
                    830:        char *by;       /* body of word */
                    831:        char *po;       /* possessive ('s 'S) or negative (n't N'T) suffix */
                    832:        char *ps;       /* punctuation suffix */
                    833:        } WordInterp;
                    834: 
                    835: #define Init_WordInterp {IsWordInterp,NULL,NULL,NULL,NULL,NULL}
                    836: #if MAIN
                    837: WordInterp empty_WordInterp = Init_WordInterp;
                    838: #else
                    839: extern WordInterp empty_WordInterp;
                    840: #endif
                    841: 
                    842: WordInterp *dup_wordinterp_etc();
                    843: 
                    844: /* Word.ident & WordInterp.ident flags: */
                    845: #define Word_spelled   00000000001     /* by spells correctly */
                    846: #define Word_wf                00000000002     /* s is well-formed */
                    847: #define Word_ok                00000000004     /* s is ok ("acceptable") */
                    848: #define Word_numeric   00000000010     /* by is numeric */
                    849: #define Word_initcap   00000000020     /* by has initial capital */
                    850: #define Word_allcaps   00000000040     /* by is all caps */
                    851: #define Word_hyphens   00000000100     /* by has imbedded hyphens */
                    852: #define Word_slashes   00000000200     /* by has imbedded slashes */
                    853: #define Word_endsent   00000000400     /* end of sentence: ps has .!? */
                    854: #define Word_termhyp   00000001000     /* some interpretation ends with hyphen */
                    855: #define Word_label     00000002000     /* has an ASCII string label */
                    856: #define Word_allalp    00000004000     /* s is all alphabetic */
                    857: #define Word_bodalp    00000010000     /* by is all alphabetic */
                    858: 
                    859: /* WordF - Word external file format */
                    860: typedef struct WordF {
                    861:        Ident ident;            /* feature bits: IsWord must be set */
                    862:        Bbx bx;
                    863:        float wsp;              /* space before word (multiple of thr) */
                    864:        float m;                /* merit */
                    865:        short wmny;             /* no. Words (in Word.ws) to follow this Word */
                    866:        short cmny;             /* no. Chars to follow this Word */
                    867:        short bmny;             /* no. Blobs to follow this Word */
                    868:        /* if ident&Word_label, label follows WordF ('\0'-terminated string) */
                    869:        } WordF;
                    870: 
                    871: /* constant pitch model for a Txtln */
                    872: typedef struct ConstPitch {
                    873:        Ems w;          /* Character pitch */
                    874:        Scoor o;        /* origin (one of the character break points) */
                    875:        float r;        /* max/min autocorrelation ratio - the larger the better */
                    876:        } ConstPitch;
                    877: 
                    878: #define Init_ConstPitch {0.0,0,0.0}
                    879: #if MAIN
                    880: ConstPitch empty_ConstPitch = Init_ConstPitch;
                    881: #else
                    882: extern ConstPitch empty_ConstPitch;
                    883: #endif
                    884: 
                    885: typedef struct Txtlns {
                    886:        short mny;
                    887:        struct Txtln **lpa;     /* to array of Txtln's (if mny==0, lpa==NULL) */
                    888:        } Txtlns;
                    889: 
                    890: #define Init_Txtlns {0,NULL}
                    891: #if MAIN
                    892: Txtlns empty_Txtlns = Init_Txtlns;
                    893: #else
                    894: extern Txtlns empty_Txtlns;
                    895: #endif
                    896: 
                    897: /* Text Line */
                    898: #define Txtln_label    00000000004     /* has an ASCII string label */
                    899: #define Txtln_size     00000000002     /* dominant text size chosen */
                    900: #define Txtln_basl     00000000001     /* dominant baseline chosen */
                    901: 
                    902: typedef struct Txtln {
                    903:        Ident ident;
                    904:        Bbx bx;
                    905:        Pts size;       /* dominant text size (0 means unknown) */
                    906:        Scoor basl;     /* dominant baseline (absolute y coordinate) */
                    907:        short *proj;    /* ptr to malloc space projection array */
                    908:        ConstPitch *cp; /* ptr to malloc space constant-pitch model */
                    909:        Merit m;        /* merit */
                    910:        Txtlns ls;      /* alternative Txtln segmentations */
                    911:        Words ws;       /* sorted asc. on x */
                    912:        Chars cs;       /* sorted asc. on bx.a.x */
                    913:        Blobs bs;       /* misc. non-char blobs */
                    914:        char *l;        /* label (ASCII string in malloc space) */
                    915:        } Txtln;
                    916: 
                    917: #define Init_Txtln {IsTxtln,Init_Bbx,0.0,0,NULL,NULL,0.0,Init_Txtlns,Init_Words,Init_Chars,Init_Blobs,NULL}
                    918: #if MAIN
                    919: Txtln empty_Txtln = Init_Txtln;
                    920: #else
                    921: extern Txtln empty_Txtln;
                    922: #endif
                    923: 
                    924: /* Text Line */
                    925: typedef struct TxtlnF {
                    926:        Ident ident;    /* IsTxtln must be set */
                    927:        Bbx bx;
                    928:        Scoor basl;     /* baseline (absolute y coordinate) */
                    929:        Pts size;       /* text size (<=0.0 means none is known) */
                    930:        short pmny;     /* no. shorts in projection array to follow */
                    931:        float m;        /* merit */
                    932:        short lmny;     /* no. alternative txtlns to follow */
                    933:        short wmny;     /* no. words to follow */
                    934:        short cmny;     /* no. chars to follow */
                    935:        int bmny;       /* no. blobs to follow */
                    936:        /* if ident&Txtln_label, label follows TxtlnF ('\0'-terminated string) */
                    937:        } TxtlnF;
                    938: 
                    939: /* blocks of text */
                    940: typedef struct Blocks {
                    941:        short mny;              /* if mny==0, bpa==NULL */
                    942:        struct Block **bpa;     /* to NULL-term'd array of ptrs */
                    943:        } Blocks;
                    944: 
                    945: #define Init_Blocks {0,NULL}
                    946: #if MAIN
                    947: Blocks empty_Blocks = Init_Blocks;
                    948: #else
                    949: extern Blocks empty_Blocks;
                    950: #endif
                    951: 
                    952: /* block of text */
                    953: typedef struct Block {
                    954:        Ident ident;
                    955:        Bbx bx;         /* bounding box of block */
                    956:        Radians skew;   /* skew angle (as correction to Page.skew) */
                    957:        Radians shear;  /* shear angle (as correction to Page.shear) */
                    958:        Ems wst;        /* word-space threshhold */
                    959:        Merit m;        /* merit */
                    960:        Blocks bks;     /* Blocks nested within this one */
                    961:        Txtlns ls;
                    962:        Words ws;
                    963:        Chars cs;
                    964:        Blobs bs;
                    965:        char *l;        /* label (ASCII string in malloc space) */
                    966:        } Block;
                    967: 
                    968: #define Init_Block {IsBlock,Init_Bbx,0.0,0.0,0.0,0.0,Init_Blocks,Init_Txtlns,Init_Words,Init_Chars,Init_Blobs,NULL}
                    969: #if MAIN
                    970: Block empty_Block = Init_Block;
                    971: #else
                    972: extern Block empty_Block;
                    973: #endif
                    974: 
                    975: /* Ident bits for Blocks */
                    976: #define Block_wst      00000000001     /* word-space-thresh set up */
                    977: #define Block_label    00000000002     /* has an ASCII string label */
                    978: 
                    979: #define Block_mb (1)
                    980: 
                    981: /* block of text */
                    982: typedef struct BlockF {
                    983:        Ident ident;    /* IsBlock must be set */
                    984:        Bbx bx;
                    985:        Ems wst;        /* word-space threshold */
                    986:        float skew;
                    987:        float shear;
                    988: #if Block_mb
                    989:        float m;
                    990:        short bkmny;
                    991: #endif
                    992:        short lmny;
                    993:        short wmny;
                    994:        int cmny;
                    995:        int bmny;
                    996:        /* if ident&Block_label, label follows BlockF ('\0'-terminated string) */
                    997:        } BlockF;
                    998: 
                    999: /* page */
                   1000: typedef struct Page {
                   1001:        Ident ident;
                   1002:        Bbx bx;                 /* extreme indices in pixels */
                   1003:        short res_x;            /* resolution in pixels/inch:  x & y */
                   1004:        short res_y;
                   1005:        Radians skew;           /* skew angle */
                   1006:        Radians shear;          /* shear correction */
                   1007:        Blocks bks;             /* blocks */
                   1008:        Txtlns ls;              /* textlines (those not in any block) */
                   1009:        Words ws;               /* words (not in any textline) */
                   1010:        Chars cs;               /* chars (not in any word) */
                   1011:        Blobs bs;               /* blobs (not in any char) */
                   1012:        char *l;                /* label (ASCII string in malloc space) */
                   1013:        } Page;
                   1014: 
                   1015: #define Init_Page {IsPage,Init_Bbx,0,0,0.0,0.0,Init_Blocks,Init_Txtlns,Init_Words,Init_Chars,Init_Blobs,NULL}
                   1016: #if MAIN
                   1017: Page empty_Page = Init_Page;
                   1018: #else
                   1019: extern Page empty_Page;
                   1020: #endif
                   1021: 
                   1022: #define Page_label     00000000001     /* has a label */
                   1023: 
                   1024: /* Pages of text */
                   1025: typedef struct Pages {
                   1026:        unsigned short mny;     /* if mny==0, pa==NULL */
                   1027:        struct Page **pa;       /* to NULL-term'd array of ptrs */
                   1028:        } Pages;
                   1029: 
                   1030: #define Init_Pages {0,NULL}
                   1031: #if MAIN
                   1032: Pages empty_Pages = Init_Pages;
                   1033: #else
                   1034: extern Pages empty_Pages;
                   1035: #endif
                   1036: 
                   1037: typedef struct PageF {
                   1038:        Ident ident;            /* IsPage bit must be set */
                   1039:        short res_x,res_y;      /* resolution in pixels/inch:  x & y */
                   1040:        Bbx bx;                 /* extreme indices in pixels */
                   1041:        float skew;             /* original page skew angle, radians */
                   1042:        float shear;            /* original page shear angle, radians */
                   1043:        short bkmny;            /* no. blocks */
                   1044:        short lmny;             /* no. textlines (not in any block) */
                   1045:        short wmny;             /* no. words (not in any textline) */
                   1046:        int cmny;               /* no. chars (not in any word) */
                   1047:        int bmny;               /* no. blobs (not in any char)) */
                   1048:        /* if ident&Page_label, label follows PageF ('\0'-terminated string) */
                   1049:        } PageF;
                   1050: 
                   1051: #define Page_new       00000000001     /* Page is in ``new'' format */
                   1052: 
                   1053: /* Each Dim-file begins with a Doc record */
                   1054: typedef struct Doc {
                   1055:        unsigned short version;         /* file format version number */
                   1056:        Pages ps;
                   1057:        char *l;                /* ASCII label */
                   1058:        } Doc;
                   1059: 
                   1060: #define Init_Doc {0,Init_Pages,NULL}
                   1061: #if MAIN
                   1062: Doc empty_Doc = Init_Doc;
                   1063: #else
                   1064: extern Doc empty_Doc;
                   1065: #endif
                   1066: 
                   1067: char *ident_toa();
                   1068: Ident cto_ident();
                   1069: Ident cto_flag();
                   1070: char *merit_toa();
                   1071: Page *alloc_page();
                   1072: char *page_toa();
                   1073: Page *dup_page();
                   1074: Page *dup_page_etc();
                   1075: Block *alloc_block();
                   1076: char *block_toa();
                   1077: Block *dup_block();
                   1078: Block *dup_block_etc();
                   1079: Block *append_block();
                   1080: Blocks *dup_blocks_etc();
                   1081: ConstPitch *alloc_constpitch();
                   1082: char *constpitch_toa();
                   1083: Txtln *alloc_txtln();
                   1084: char *txtln_toa();
                   1085: Word *alloc_word();
                   1086: char *word_toa();
                   1087: boolean eq_word();
                   1088: int hash_word();
                   1089: Char *alloc_char();
                   1090: char *char_toa();
                   1091: Pp *char_centroid();
                   1092: Char *char_of_blob();
                   1093: char *interp_toa();
                   1094: char *blob_toa();
                   1095: Pp *blob_centroid();
                   1096: char *runf_toa();
                   1097: char *runfs_toa();
                   1098: char *pp_toa();
                   1099: char *bdyedge_toa();
                   1100: Blob *dup_blob();
                   1101: Blob *dup_blob_etc();
                   1102: Blob *dup_blobl_etc();
                   1103: Blob *runs_to_runs();
                   1104: Blobs *dup_blobs_etc();
                   1105: Blobs *blobl_to_blobs();
                   1106: Interp *alloc_interp();
                   1107: Interp *dup_interp();
                   1108: Interpl *dup_interpl_etc();
                   1109: Interps *dup_interps_etc();
                   1110: Word *append_word();
                   1111: Word *insert_word();
                   1112: Word *insert_word_txtln();
                   1113: Word *dup_word();
                   1114: Word *dup_word_etc();
                   1115: Words *dup_words_etc();
                   1116: Txtln *dup_txtln();
                   1117: Txtln *dup_txtln_etc();
                   1118: Txtlns *dup_txtlns_etc();
                   1119: Block *dup_block();
                   1120: Block *dup_block_etc();
                   1121: Radians add_ang();
                   1122: Radians subtract_ang();
                   1123: 
                   1124: /* in-line macroes */
                   1125: 
                   1126: /* Merge the `source' Bbx into the `destination' Bbx, expanding the dest Bbx
                   1127:    as required.  The source Bbx is unmodified.  Usage:
                   1128:        merge_bbx(s,d)
                   1129:            Bbx *s,*d;
                   1130: */
                   1131: #define merge_bbx(s,d) { \
                   1132:        if((s)->a.x < (d)->a.x) (d)->a.x = (s)->a.x; \
                   1133:        if((s)->a.y < (d)->a.y) (d)->a.y = (s)->a.y; \
                   1134:        if((s)->b.x > (d)->b.x) (d)->b.x = (s)->b.x; \
                   1135:        if((s)->b.y > (d)->b.y) (d)->b.y = (s)->b.y; \
                   1136:        }
                   1137: 
                   1138: /* Experimental implementation of a data structure for maintaining a set
                   1139:    of distinct Words whose merit falls within a dynamically-varying range.
                   1140:    This implementation is worst-case quadratic time.
                   1141:    BUGS:  insert_wordset shouldn't duplicate the word.
                   1142:    */
                   1143: 
                   1144: #define dbg_ws (0)     /* if !=0, enable WordSet debugging tracing */
                   1145: 
                   1146: typedef struct WordSet {
                   1147:        double cut;     /* cut-fraction */
                   1148:        int cap;        /* capacity: maximum number permitted at any time */
                   1149:        double top;     /* maximum merit seen since allocation */
                   1150:        Words ws;       /* sorted by top-choice merit */
                   1151:        Word *max,*min; /* maximum/minimum entries currently in ws */
                   1152:        int high;       /* high-water:  max. no. entries in history */
                   1153:        } WordSet;
                   1154: #define Init_WordSet {1.0,INT_MAX,0.0,Init_Words,NULL,NULL,0}
                   1155: #if MAIN
                   1156: WordSet empty_WordSet = Init_WordSet;
                   1157: #else
                   1158: extern WordSet empty_WordSet;
                   1159: #endif
                   1160: 
                   1161: #define size_wordset(s) ((s)->ws.mny)
                   1162: #define top_wordset(s) ((s)->top)
                   1163: #define max_wordset(s) ((s)->max)
                   1164: #define min_wordset(s) ((s)->min)
                   1165: #define max_wordmerit(s) ((max_wordset((s))!=NULL)? (max_wordset((s)))->m: 0.0)
                   1166: #define min_wordmerit(s) ((min_wordset((s))!=NULL)? (min_wordset((s)))->m: 0.0)
                   1167: 
                   1168: Merit wordmerit();
                   1169: WordSet *alloc_wordset();
                   1170: boolean insert_wordset();
                   1171: Word *remove_wordset();
                   1172: int free_wordset_etc();
                   1173: err_wordset();

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.