mirror of http://192.168.1.51:8099/lmh188/twain3.0
1229 lines
42 KiB
C
1229 lines
42 KiB
C
|
/*====================================================================*
|
||
|
- Copyright (C) 2001 Leptonica. All rights reserved.
|
||
|
-
|
||
|
- Redistribution and use in source and binary forms, with or without
|
||
|
- modification, are permitted provided that the following conditions
|
||
|
- are met:
|
||
|
- 1. Redistributions of source code must retain the above copyright
|
||
|
- notice, this list of conditions and the following disclaimer.
|
||
|
- 2. Redistributions in binary form must reproduce the above
|
||
|
- copyright notice, this list of conditions and the following
|
||
|
- disclaimer in the documentation and/or other materials
|
||
|
- provided with the distribution.
|
||
|
-
|
||
|
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||
|
- ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||
|
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||
|
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
|
||
|
- CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||
|
- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||
|
- PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||
|
- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||
|
- OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||
|
- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||
|
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||
|
*====================================================================*/
|
||
|
|
||
|
/*!
|
||
|
* \file recogbasic.c
|
||
|
* <pre>
|
||
|
*
|
||
|
* Recog creation, destruction and access
|
||
|
* L_RECOG *recogCreateFromRecog()
|
||
|
* L_RECOG *recogCreateFromPixa()
|
||
|
* L_RECOG *recogCreateFromPixaNoFinish()
|
||
|
* L_RECOG *recogCreate()
|
||
|
* void recogDestroy()
|
||
|
*
|
||
|
* Recog accessors
|
||
|
* l_int32 recogGetCount()
|
||
|
* l_int32 recogSetParams()
|
||
|
* static l_int32 recogGetCharsetSize()
|
||
|
*
|
||
|
* Character/index lookup
|
||
|
* l_int32 recogGetClassIndex()
|
||
|
* l_int32 recogStringToIndex()
|
||
|
* l_int32 recogGetClassString()
|
||
|
* l_int32 l_convertCharstrToInt()
|
||
|
*
|
||
|
* Serialization
|
||
|
* L_RECOG *recogRead()
|
||
|
* L_RECOG *recogReadStream()
|
||
|
* L_RECOG *recogReadMem()
|
||
|
* l_int32 recogWrite()
|
||
|
* l_int32 recogWriteStream()
|
||
|
* l_int32 recogWriteMem()
|
||
|
* PIXA *recogExtractPixa()
|
||
|
* static l_int32 recogAddCharstrLabels()
|
||
|
* static l_int32 recogAddAllSamples()
|
||
|
*
|
||
|
* The recognizer functionality is split into four files:
|
||
|
* recogbasic.c: create, destroy, access, serialize
|
||
|
* recogtrain.c: training on labeled and unlabeled data
|
||
|
* recogident.c: running the recognizer(s) on input
|
||
|
* recogdid.c: running the recognizer(s) on input using a
|
||
|
* document image decoding (DID) hidden markov model
|
||
|
*
|
||
|
* This is a content-adapted (or book-adapted) recognizer (BAR) application.
|
||
|
* The recognizers here are typically assembled from data that has
|
||
|
* been labeled by a generic recognition system, such as Tesseract.
|
||
|
* The general procedure to create a recognizer (recog) from labeled data is
|
||
|
* to add the labeled character bitmaps, either one at a time or
|
||
|
* all together from a pixa with labeled pix.
|
||
|
*
|
||
|
* The suggested use for a BAR that consists of labeled templates drawn
|
||
|
* from a single source (e.g., a book) is to identify unlabeled samples
|
||
|
* by using unscaled character templates in the BAR, picking the
|
||
|
* template closest to the unlabeled sample.
|
||
|
*
|
||
|
* Outliers can be removed from a pixa of labeled pix. This is one of
|
||
|
* two methods that use averaged templates (the other is greedy splitting
|
||
|
* of characters). See recogtrain.c for a discussion and the implementation.
|
||
|
*
|
||
|
* A special bootstrap recognizer (BSR) can be used to make a BAR from
|
||
|
* unlabeled book data. This is done by comparing character images
|
||
|
* from the book with labeled templates in the BSR, where all images
|
||
|
* are scaled to h = 40. The templates can be either the scanned images
|
||
|
* or images consisting of width-normalized strokes derived from
|
||
|
* the skeleton of the character bitmaps.
|
||
|
*
|
||
|
* Two BARs of labeled character data, that have been made by
|
||
|
* different recognizers, can be joined by extracting a pixa of the
|
||
|
* labeled templates from each, joining the two pixa, and then
|
||
|
* and regenerating a BAR from the joined set of templates.
|
||
|
* If all the labeled character data is from a single source (e.g, a book),
|
||
|
* identification can proceed using unscaled templates (either the input
|
||
|
* image or width-normalized lines). But if the labeled data comes from
|
||
|
* more than one source, (a "hybrid" recognizer), the templates should
|
||
|
* be scaled, and we recommend scaling to a fixed height.
|
||
|
*
|
||
|
* Suppose it is not possible to generate a BAR with a sufficient number
|
||
|
* of templates of each class taken from a single source. In that case,
|
||
|
* templates from the BSR itself can be added. This is the condition
|
||
|
* described above, where the labeled templates come from multiple
|
||
|
* sources, and it is necessary to do all character matches using
|
||
|
* templates that have been scaled to a fixed height (e.g., 40).
|
||
|
* Likewise, the samples to be identified using this hybrid recognizer
|
||
|
* must be modified in the same way. See prog/recogtest3.c for an
|
||
|
* example of the steps that can be taken in the construction of a BAR
|
||
|
* using a BSR.
|
||
|
*
|
||
|
* For training numeric input, an example set of calls that scales
|
||
|
* each training input to fixed h and will use the line templates of
|
||
|
* width linew for identifying unknown characters is:
|
||
|
* L_Recog *rec = recogCreate(0, h, linew, 128, 1);
|
||
|
* for (i = 0; i < n; i++) { // read in n training digits
|
||
|
* Pix *pix = ...
|
||
|
* recogTrainLabeled(rec, pix, NULL, text[i], 0);
|
||
|
* }
|
||
|
* recogTrainingFinished(&rec, 1, -1, -1.0); // required
|
||
|
*
|
||
|
* It is an error if any function that computes averages, removes
|
||
|
* outliers or requests identification of an unlabeled character,
|
||
|
* such as:
|
||
|
* (1) computing the sample averages: recogAverageSamples()
|
||
|
* (2) removing outliers: recogRemoveOutliers1() or recogRemoveOutliers2()
|
||
|
* (3) requesting identification of an unlabeled character:
|
||
|
* recogIdentifyPix()
|
||
|
* is called before an explicit call to finish training. Note that
|
||
|
* to do further training on a "finished" recognizer, you can set
|
||
|
* recog->train_done = FALSE;
|
||
|
* add the new training samples, and again call
|
||
|
* recogTrainingFinished(&rec, 1, -1, -1.0); // required
|
||
|
*
|
||
|
* If not scaling, using the images directly for identification, and
|
||
|
* removing outliers, do something like this:
|
||
|
* L_Recog *rec = recogCreate(0, 0, 0, 128, 1);
|
||
|
* for (i = 0; i < n; i++) { // read in n training characters
|
||
|
* Pix *pix = ...
|
||
|
* recogTrainLabeled(rec, pix, NULL, text[i], 0);
|
||
|
* }
|
||
|
* recogTrainingFinished(&rec, 1, -1, -1.0);
|
||
|
* if (!rec) ... [return]
|
||
|
* // remove outliers
|
||
|
* recogRemoveOutliers1(&rec, 0.7, 2, NULL, NULL);
|
||
|
*
|
||
|
* You can generate a recognizer from a pixa where the text field in
|
||
|
* each pix is the character string label for the pix. For example,
|
||
|
* the following recognizer will store unscaled line images:
|
||
|
* L_Recog *rec = recogCreateFromPixa(pixa, 0, 0, linew, 128, 1);
|
||
|
* and in use, it is fed unscaled line images to identify.
|
||
|
*
|
||
|
* For the following, assume that you have a pixa of labeled templates.
|
||
|
* If it is likely that some of the input templates are mislabeled,
|
||
|
* there are several things that can be done to remove them.
|
||
|
* The first is to put a size and quantity filter on them; e.g.
|
||
|
* Pixa *pixa2 = recogFilterPixaBySize(pixa1, 10, 15, 2.6);
|
||
|
* Then you can remove outliers; e.g.,
|
||
|
* Pixa *pixa3 = pixaRemoveOutliers2(pixa2, -1.0, -1, NULL, NULL);
|
||
|
*
|
||
|
* To this point, all templates are from a single source, so you
|
||
|
* can make a recognizer that uses the unscaled templates and optionally
|
||
|
* attempts to split touching characters:
|
||
|
* L_Recog *recog1 = recogCreateFromPixa(pixa3, ...);
|
||
|
* Alternatively, if you need more templates for some of the classes,
|
||
|
* you can pad with templates from a "bootstrap" recognizer (BSR).
|
||
|
* If you pad, it is necessary to scale the templates and input
|
||
|
* samples to a fixed height, and no attempt will be made to split
|
||
|
* the input sample connected components:
|
||
|
* L_Recog *recog1 = recogCreateFromPixa(pixa3, 0, 40, 0, 128, 0);
|
||
|
* recogPadDigitTrainingSet(&recog1, 40, 0);
|
||
|
*
|
||
|
* A special case is a pure BSR, that contains images scaled to a fixed
|
||
|
* height (we use 40 in these examples).
|
||
|
* For this,use either the scanned bitmap:
|
||
|
* L_Recog *recboot = recogCreateFromPixa(pixa, 0, 40, 0, 128, 1);
|
||
|
* or width-normalized lines (use width of 5 here):
|
||
|
* L_Recog *recboot = recogCreateFromPixa(pixa, 0, 40, 5, 128, 1);
|
||
|
*
|
||
|
* This can be used to train a new book adapted recognizer (BAC), on
|
||
|
* unlabeled data from, e.g., a book. To do this, the following is required:
|
||
|
* (1) the input images from the book must be scaled in the same
|
||
|
* way as those in the BSR, and
|
||
|
* (2) both the BSR and the input images must be set up to be either
|
||
|
* input scanned images or width-normalized lines.
|
||
|
*
|
||
|
* </pre>
|
||
|
*/
|
||
|
|
||
|
#include <string.h>
|
||
|
#include "allheaders.h"
|
||
|
|
||
|
static const l_int32 MaxExamplesInClass = 256;
|
||
|
|
||
|
/* Default recog parameters that can be changed */
|
||
|
static const l_int32 DefaultCharsetType = L_ARABIC_NUMERALS;
|
||
|
static const l_int32 DefaultMinNopad = 1;
|
||
|
static const l_float32 DefaultMaxWHRatio = 3.0; /* max allowed w/h
|
||
|
ratio for a component to be split */
|
||
|
static const l_float32 DefaultMaxHTRatio = 2.6; /* max allowed ratio of
|
||
|
max/min unscaled averaged template heights */
|
||
|
static const l_int32 DefaultThreshold = 150; /* for binarization */
|
||
|
static const l_int32 DefaultMaxYShift = 1; /* for identification */
|
||
|
|
||
|
/* Static functions */
|
||
|
static l_int32 recogGetCharsetSize(l_int32 type);
|
||
|
static l_int32 recogAddCharstrLabels(L_RECOG *recog);
|
||
|
static l_int32 recogAddAllSamples(L_RECOG **precog, PIXAA *paa, l_int32 debug);
|
||
|
|
||
|
|
||
|
/*------------------------------------------------------------------------*
|
||
|
* Recog: initialization and destruction *
|
||
|
*------------------------------------------------------------------------*/
|
||
|
/*!
|
||
|
* \brief recogCreateFromRecog()
|
||
|
*
|
||
|
* \param[in] recs source recog with arbitrary input parameters
|
||
|
* \param[in] scalew scale all widths to this; use 0 otherwise
|
||
|
* \param[in] scaleh scale all heights to this; use 0 otherwise
|
||
|
* \param[in] linew width of normalized strokes; use 0 to skip
|
||
|
* \param[in] threshold for binarization; typically ~128
|
||
|
* \param[in] maxyshift from nominal centroid alignment; default is 1
|
||
|
* \return recd, or NULL on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This is a convenience function that generates a recog using
|
||
|
* the unscaled training data in an existing recog.
|
||
|
* (2) It is recommended to use %maxyshift = 1 (the default value)
|
||
|
* (3) See recogCreate() for use of %scalew, %scaleh and %linew.
|
||
|
* </pre>
|
||
|
*/
|
||
|
L_RECOG *
|
||
|
recogCreateFromRecog(L_RECOG *recs,
|
||
|
l_int32 scalew,
|
||
|
l_int32 scaleh,
|
||
|
l_int32 linew,
|
||
|
l_int32 threshold,
|
||
|
l_int32 maxyshift)
|
||
|
{
|
||
|
L_RECOG *recd;
|
||
|
PIXA *pixa;
|
||
|
|
||
|
PROCNAME("recogCreateFromRecog");
|
||
|
|
||
|
if (!recs)
|
||
|
return (L_RECOG *)ERROR_PTR("recs not defined", procName, NULL);
|
||
|
|
||
|
pixa = recogExtractPixa(recs);
|
||
|
recd = recogCreateFromPixa(pixa, scalew, scaleh, linew, threshold,
|
||
|
maxyshift);
|
||
|
pixaDestroy(&pixa);
|
||
|
return recd;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief recogCreateFromPixa()
|
||
|
*
|
||
|
* \param[in] pixa of labeled, 1 bpp images
|
||
|
* \param[in] scalew scale all widths to this; use 0 otherwise
|
||
|
* \param[in] scaleh scale all heights to this; use 0 otherwise
|
||
|
* \param[in] linew width of normalized strokes; use 0 to skip
|
||
|
* \param[in] threshold for binarization; typically ~150
|
||
|
* \param[in] maxyshift from nominal centroid alignment; default is 1
|
||
|
* \return recog, or NULL on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This is a convenience function for training from labeled data.
|
||
|
* The pixa can be read from file.
|
||
|
* (2) The pixa should contain the unscaled bitmaps used for training.
|
||
|
* (3) See recogCreate() for use of %scalew, %scaleh and %linew.
|
||
|
* (4) It is recommended to use %maxyshift = 1 (the default value)
|
||
|
* (5) All examples in the same class (i.e., with the same character
|
||
|
* label) should be similar. They can be made similar by invoking
|
||
|
* recogRemoveOutliers[1,2]() on %pixa before calling this function.
|
||
|
* </pre>
|
||
|
*/
|
||
|
L_RECOG *
|
||
|
recogCreateFromPixa(PIXA *pixa,
|
||
|
l_int32 scalew,
|
||
|
l_int32 scaleh,
|
||
|
l_int32 linew,
|
||
|
l_int32 threshold,
|
||
|
l_int32 maxyshift)
|
||
|
{
|
||
|
L_RECOG *recog;
|
||
|
|
||
|
PROCNAME("recogCreateFromPixa");
|
||
|
|
||
|
if (!pixa)
|
||
|
return (L_RECOG *)ERROR_PTR("pixa not defined", procName, NULL);
|
||
|
|
||
|
recog = recogCreateFromPixaNoFinish(pixa, scalew, scaleh, linew,
|
||
|
threshold, maxyshift);
|
||
|
if (!recog)
|
||
|
return (L_RECOG *)ERROR_PTR("recog not made", procName, NULL);
|
||
|
|
||
|
recogTrainingFinished(&recog, 1, -1, -1.0);
|
||
|
if (!recog)
|
||
|
return (L_RECOG *)ERROR_PTR("bad templates", procName, NULL);
|
||
|
return recog;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief recogCreateFromPixaNoFinish()
|
||
|
*
|
||
|
* \param[in] pixa of labeled, 1 bpp images
|
||
|
* \param[in] scalew scale all widths to this; use 0 otherwise
|
||
|
* \param[in] scaleh scale all heights to this; use 0 otherwise
|
||
|
* \param[in] linew width of normalized strokes; use 0 to skip
|
||
|
* \param[in] threshold for binarization; typically ~150
|
||
|
* \param[in] maxyshift from nominal centroid alignment; default is 1
|
||
|
* \return recog, or NULL on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) See recogCreateFromPixa() for details.
|
||
|
* (2) This is also used to generate a pixaa with templates
|
||
|
* in each class within a pixa. For that, all args except for
|
||
|
* %pixa are ignored.
|
||
|
* </pre>
|
||
|
*/
|
||
|
L_RECOG *
|
||
|
recogCreateFromPixaNoFinish(PIXA *pixa,
|
||
|
l_int32 scalew,
|
||
|
l_int32 scaleh,
|
||
|
l_int32 linew,
|
||
|
l_int32 threshold,
|
||
|
l_int32 maxyshift)
|
||
|
{
|
||
|
char *text;
|
||
|
l_int32 full, n, i, ntext, same, maxd;
|
||
|
PIX *pix;
|
||
|
L_RECOG *recog;
|
||
|
|
||
|
PROCNAME("recogCreateFromPixaNoFinish");
|
||
|
|
||
|
if (!pixa)
|
||
|
return (L_RECOG *)ERROR_PTR("pixa not defined", procName, NULL);
|
||
|
pixaVerifyDepth(pixa, &same, &maxd);
|
||
|
if (maxd > 1)
|
||
|
return (L_RECOG *)ERROR_PTR("not all pix are 1 bpp", procName, NULL);
|
||
|
|
||
|
pixaIsFull(pixa, &full, NULL);
|
||
|
if (!full)
|
||
|
return (L_RECOG *)ERROR_PTR("not all pix are present", procName, NULL);
|
||
|
|
||
|
n = pixaGetCount(pixa);
|
||
|
pixaCountText(pixa, &ntext);
|
||
|
if (ntext == 0)
|
||
|
return (L_RECOG *)ERROR_PTR("no pix have text strings", procName, NULL);
|
||
|
if (ntext < n)
|
||
|
L_ERROR("%d text strings < %d pix\n", procName, ntext, n);
|
||
|
|
||
|
recog = recogCreate(scalew, scaleh, linew, threshold, maxyshift);
|
||
|
if (!recog)
|
||
|
return (L_RECOG *)ERROR_PTR("recog not made", procName, NULL);
|
||
|
for (i = 0; i < n; i++) {
|
||
|
pix = pixaGetPix(pixa, i, L_CLONE);
|
||
|
text = pixGetText(pix);
|
||
|
if (!text || strlen(text) == 0) {
|
||
|
L_ERROR("pix[%d] has no text\n", procName, i);
|
||
|
pixDestroy(&pix);
|
||
|
continue;
|
||
|
}
|
||
|
recogTrainLabeled(recog, pix, NULL, text, 0);
|
||
|
pixDestroy(&pix);
|
||
|
}
|
||
|
|
||
|
return recog;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief recogCreate()
|
||
|
*
|
||
|
* \param[in] scalew scale all widths to this; use 0 otherwise
|
||
|
* \param[in] scaleh scale all heights to this; use 0 otherwise
|
||
|
* \param[in] linew width of normalized strokes; use 0 to skip
|
||
|
* \param[in] threshold for binarization; typically ~128; 0 for default
|
||
|
* \param[in] maxyshift from nominal centroid alignment; default is 1
|
||
|
* \return recog, or NULL on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) If %scalew == 0 and %scaleh == 0, no scaling is done.
|
||
|
* If one of these is 0 and the other is > 0, scaling is isotropic
|
||
|
* to the requested size. We typically do not set both > 0.
|
||
|
* (2) Use linew > 0 to convert the templates to images with fixed
|
||
|
* width strokes. linew == 0 skips the conversion.
|
||
|
* (3) The only valid values for %maxyshift are 0, 1 and 2.
|
||
|
* It is recommended to use %maxyshift == 1 (default value).
|
||
|
* Using %maxyshift == 0 is much faster than %maxyshift == 1, but
|
||
|
* it is much less likely to find the template with the best
|
||
|
* correlation. Use of anything but 1 results in a warning.
|
||
|
* (4) Scaling is used for finding outliers and for training a
|
||
|
* book-adapted recognizer (BAR) from a bootstrap recognizer (BSR).
|
||
|
* Scaling the height to a fixed value and scaling the width
|
||
|
* accordingly (e.g., %scaleh = 40, %scalew = 0) is recommended.
|
||
|
* (5) The storage for most of the arrays is allocated when training
|
||
|
* is finished.
|
||
|
* </pre>
|
||
|
*/
|
||
|
L_RECOG *
|
||
|
recogCreate(l_int32 scalew,
|
||
|
l_int32 scaleh,
|
||
|
l_int32 linew,
|
||
|
l_int32 threshold,
|
||
|
l_int32 maxyshift)
|
||
|
{
|
||
|
L_RECOG *recog;
|
||
|
|
||
|
PROCNAME("recogCreate");
|
||
|
|
||
|
if (scalew < 0 || scaleh < 0)
|
||
|
return (L_RECOG *)ERROR_PTR("invalid scalew or scaleh", procName, NULL);
|
||
|
if (linew > 10)
|
||
|
return (L_RECOG *)ERROR_PTR("invalid linew > 10", procName, NULL);
|
||
|
if (threshold == 0) threshold = DefaultThreshold;
|
||
|
if (threshold < 0 || threshold > 255) {
|
||
|
L_WARNING("invalid threshold; using default\n", procName);
|
||
|
threshold = DefaultThreshold;
|
||
|
}
|
||
|
if (maxyshift < 0 || maxyshift > 2) {
|
||
|
L_WARNING("invalid maxyshift; using default value\n", procName);
|
||
|
maxyshift = DefaultMaxYShift;
|
||
|
} else if (maxyshift == 0) {
|
||
|
L_WARNING("Using maxyshift = 0; faster, worse correlation results\n",
|
||
|
procName);
|
||
|
} else if (maxyshift == 2) {
|
||
|
L_WARNING("Using maxyshift = 2; slower\n", procName);
|
||
|
}
|
||
|
|
||
|
recog = (L_RECOG *)LEPT_CALLOC(1, sizeof(L_RECOG));
|
||
|
recog->templ_use = L_USE_ALL_TEMPLATES; /* default */
|
||
|
recog->threshold = threshold;
|
||
|
recog->scalew = scalew;
|
||
|
recog->scaleh = scaleh;
|
||
|
recog->linew = linew;
|
||
|
recog->maxyshift = maxyshift;
|
||
|
recogSetParams(recog, 1, -1, -1.0, -1.0);
|
||
|
recog->bmf = bmfCreate(NULL, 6);
|
||
|
recog->bmf_size = 6;
|
||
|
recog->maxarraysize = MaxExamplesInClass;
|
||
|
|
||
|
/* Generate the LUTs */
|
||
|
recog->centtab = makePixelCentroidTab8();
|
||
|
recog->sumtab = makePixelSumTab8();
|
||
|
recog->sa_text = sarrayCreate(0);
|
||
|
recog->dna_tochar = l_dnaCreate(0);
|
||
|
|
||
|
/* Input default values for min component size for splitting.
|
||
|
* These are overwritten when pixTrainingFinished() is called. */
|
||
|
recog->min_splitw = 6;
|
||
|
recog->max_splith = 60;
|
||
|
|
||
|
/* Allocate the paa for the unscaled training bitmaps */
|
||
|
recog->pixaa_u = pixaaCreate(recog->maxarraysize);
|
||
|
|
||
|
/* Generate the storage for debugging */
|
||
|
recog->pixadb_boot = pixaCreate(2);
|
||
|
recog->pixadb_split = pixaCreate(2);
|
||
|
return recog;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief recogDestroy()
|
||
|
*
|
||
|
* \param[in,out] precog will be set to null before returning
|
||
|
* \return void
|
||
|
*/
|
||
|
void
|
||
|
recogDestroy(L_RECOG **precog)
|
||
|
{
|
||
|
L_RECOG *recog;
|
||
|
|
||
|
PROCNAME("recogDestroy");
|
||
|
|
||
|
if (!precog) {
|
||
|
L_WARNING("ptr address is null\n", procName);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
if ((recog = *precog) == NULL) return;
|
||
|
|
||
|
LEPT_FREE(recog->centtab);
|
||
|
LEPT_FREE(recog->sumtab);
|
||
|
sarrayDestroy(&recog->sa_text);
|
||
|
l_dnaDestroy(&recog->dna_tochar);
|
||
|
pixaaDestroy(&recog->pixaa_u);
|
||
|
pixaDestroy(&recog->pixa_u);
|
||
|
ptaaDestroy(&recog->ptaa_u);
|
||
|
ptaDestroy(&recog->pta_u);
|
||
|
numaDestroy(&recog->nasum_u);
|
||
|
numaaDestroy(&recog->naasum_u);
|
||
|
pixaaDestroy(&recog->pixaa);
|
||
|
pixaDestroy(&recog->pixa);
|
||
|
ptaaDestroy(&recog->ptaa);
|
||
|
ptaDestroy(&recog->pta);
|
||
|
numaDestroy(&recog->nasum);
|
||
|
numaaDestroy(&recog->naasum);
|
||
|
pixaDestroy(&recog->pixa_tr);
|
||
|
pixaDestroy(&recog->pixadb_ave);
|
||
|
pixaDestroy(&recog->pixa_id);
|
||
|
pixDestroy(&recog->pixdb_ave);
|
||
|
pixDestroy(&recog->pixdb_range);
|
||
|
pixaDestroy(&recog->pixadb_boot);
|
||
|
pixaDestroy(&recog->pixadb_split);
|
||
|
bmfDestroy(&recog->bmf);
|
||
|
rchDestroy(&recog->rch);
|
||
|
rchaDestroy(&recog->rcha);
|
||
|
recogDestroyDid(recog);
|
||
|
LEPT_FREE(recog);
|
||
|
*precog = NULL;
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*------------------------------------------------------------------------*
|
||
|
* Recog accessors *
|
||
|
*------------------------------------------------------------------------*/
|
||
|
/*!
|
||
|
* \brief recogGetCount()
|
||
|
*
|
||
|
* \param[in] recog
|
||
|
* \return count of classes in recog; 0 if no recog or on error
|
||
|
*/
|
||
|
l_int32
|
||
|
recogGetCount(L_RECOG *recog)
|
||
|
{
|
||
|
PROCNAME("recogGetCount");
|
||
|
|
||
|
if (!recog)
|
||
|
return ERROR_INT("recog not defined", procName, 0);
|
||
|
return recog->setsize;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief recogSetParams()
|
||
|
*
|
||
|
* \param[in] recog to be padded, if necessary
|
||
|
* \param[in] type type of char set; -1 for default;
|
||
|
* see enum in recog.h
|
||
|
* \param[in] min_nopad min number in a class without padding;
|
||
|
* use -1 for default
|
||
|
* \param[in] max_wh_ratio max width/height ratio allowed for splitting;
|
||
|
* use -1.0 for default
|
||
|
* \param[in] max_ht_ratio max of max/min averaged template height ratio;
|
||
|
* use -1.0 for default
|
||
|
* \return 0 if OK, 1 on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This is called when a recog is created.
|
||
|
* (2) Default %min_nopad value allows for some padding.
|
||
|
* To disable padding, set %min_nopad = 0. To pad only when
|
||
|
* no samples are available for the class, set %min_nopad = 1.
|
||
|
* (3) The %max_wh_ratio limits the width/height ratio for components
|
||
|
* that we attempt to split. Splitting long components is expensive.
|
||
|
* (4) The %max_ht_ratio is a quality requirement on the training data.
|
||
|
* The recognizer will not run if the averages are computed and
|
||
|
* the templates do not satisfy it.
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_ok
|
||
|
recogSetParams(L_RECOG *recog,
|
||
|
l_int32 type,
|
||
|
l_int32 min_nopad,
|
||
|
l_float32 max_wh_ratio,
|
||
|
l_float32 max_ht_ratio)
|
||
|
{
|
||
|
PROCNAME("recogSetParams");
|
||
|
|
||
|
if (!recog)
|
||
|
return ERROR_INT("recog not defined", procName, 1);
|
||
|
|
||
|
recog->charset_type = (type >= 0) ? type : DefaultCharsetType;
|
||
|
recog->charset_size = recogGetCharsetSize(recog->charset_type);
|
||
|
recog->min_nopad = (min_nopad >= 0) ? min_nopad : DefaultMinNopad;
|
||
|
recog->max_wh_ratio = (max_wh_ratio > 0.0) ? max_wh_ratio :
|
||
|
DefaultMaxWHRatio;
|
||
|
recog->max_ht_ratio = (max_ht_ratio > 1.0) ? max_ht_ratio :
|
||
|
DefaultMaxHTRatio;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief recogGetCharsetSize()
|
||
|
*
|
||
|
* \param[in] type of charset
|
||
|
* \return size of charset, or 0 if unknown or on error
|
||
|
*/
|
||
|
static l_int32
|
||
|
recogGetCharsetSize(l_int32 type)
|
||
|
{
|
||
|
PROCNAME("recogGetCharsetSize");
|
||
|
|
||
|
switch (type) {
|
||
|
case L_UNKNOWN:
|
||
|
return 0;
|
||
|
case L_ARABIC_NUMERALS:
|
||
|
return 10;
|
||
|
case L_LC_ROMAN_NUMERALS:
|
||
|
return 7;
|
||
|
case L_UC_ROMAN_NUMERALS:
|
||
|
return 7;
|
||
|
case L_LC_ALPHA:
|
||
|
return 26;
|
||
|
case L_UC_ALPHA:
|
||
|
return 26;
|
||
|
default:
|
||
|
L_ERROR("invalid charset_type %d\n", procName, type);
|
||
|
return 0;
|
||
|
}
|
||
|
return 0; /* shouldn't happen */
|
||
|
}
|
||
|
|
||
|
|
||
|
/*------------------------------------------------------------------------*
|
||
|
* Character/index lookup *
|
||
|
*------------------------------------------------------------------------*/
|
||
|
/*!
|
||
|
* \brief recogGetClassIndex()
|
||
|
*
|
||
|
* \param[in] recog with LUT's pre-computed
|
||
|
* \param[in] val integer value; can be up to 3 bytes for UTF-8
|
||
|
* \param[in] text text from which %val was derived; used if not found
|
||
|
* \param[out] pindex index into dna_tochar
|
||
|
* \return 0 if found; 1 if not found and added; 2 on error.
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This is used during training. There is one entry in
|
||
|
* recog->dna_tochar (integer value, e.g., ascii) and
|
||
|
* one in recog->sa_text (e.g, ascii letter in a string)
|
||
|
* for each character class.
|
||
|
* (2) This searches the dna character array for %val. If it is
|
||
|
* not found, the template represents a character class not
|
||
|
* already seen: it increments setsize (the number of character
|
||
|
* classes) by 1, and augments both the index (dna_tochar)
|
||
|
* and text (sa_text) arrays.
|
||
|
* (3) Returns the index in &index, except on error.
|
||
|
* (4) Caller must check the function return value.
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_int32
|
||
|
recogGetClassIndex(L_RECOG *recog,
|
||
|
l_int32 val,
|
||
|
char *text,
|
||
|
l_int32 *pindex)
|
||
|
{
|
||
|
l_int32 i, n, ival;
|
||
|
|
||
|
PROCNAME("recogGetClassIndex");
|
||
|
|
||
|
if (!pindex)
|
||
|
return ERROR_INT("&index not defined", procName, 2);
|
||
|
*pindex = -1;
|
||
|
if (!recog)
|
||
|
return ERROR_INT("recog not defined", procName, 2);
|
||
|
if (!text)
|
||
|
return ERROR_INT("text not defined", procName, 2);
|
||
|
|
||
|
/* Search existing characters */
|
||
|
n = l_dnaGetCount(recog->dna_tochar);
|
||
|
for (i = 0; i < n; i++) {
|
||
|
l_dnaGetIValue(recog->dna_tochar, i, &ival);
|
||
|
if (val == ival) { /* found */
|
||
|
*pindex = i;
|
||
|
return 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* If not found... */
|
||
|
l_dnaAddNumber(recog->dna_tochar, val);
|
||
|
sarrayAddString(recog->sa_text, text, L_COPY);
|
||
|
recog->setsize++;
|
||
|
*pindex = n;
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief recogStringToIndex()
|
||
|
*
|
||
|
* \param[in] recog
|
||
|
* \param[in] text text string for some class
|
||
|
* \param[out] pindex index for that class; -1 if not found
|
||
|
* \return 0 if OK, 1 on error not finding the string is an error
|
||
|
*/
|
||
|
l_ok
|
||
|
recogStringToIndex(L_RECOG *recog,
|
||
|
char *text,
|
||
|
l_int32 *pindex)
|
||
|
{
|
||
|
char *charstr;
|
||
|
l_int32 i, n, diff;
|
||
|
|
||
|
PROCNAME("recogStringtoIndex");
|
||
|
|
||
|
if (!pindex)
|
||
|
return ERROR_INT("&index not defined", procName, 1);
|
||
|
*pindex = -1;
|
||
|
if (!recog)
|
||
|
return ERROR_INT("recog not defined", procName, 1);
|
||
|
if (!text)
|
||
|
return ERROR_INT("text not defined", procName, 1);
|
||
|
|
||
|
/* Search existing characters */
|
||
|
n = recog->setsize;
|
||
|
for (i = 0; i < n; i++) {
|
||
|
recogGetClassString(recog, i, &charstr);
|
||
|
if (!charstr) {
|
||
|
L_ERROR("string not found for index %d\n", procName, i);
|
||
|
continue;
|
||
|
}
|
||
|
diff = strcmp(text, charstr);
|
||
|
LEPT_FREE(charstr);
|
||
|
if (diff) continue;
|
||
|
*pindex = i;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
return 1; /* not found */
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief recogGetClassString()
|
||
|
*
|
||
|
* \param[in] recog
|
||
|
* \param[in] index into array of char types
|
||
|
* \param[out] pcharstr string representation;
|
||
|
* returns an empty string on error
|
||
|
* \return 0 if found, 1 on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) Extracts a copy of the string from sa_text, which
|
||
|
* the caller must free.
|
||
|
* (2) Caller must check the function return value.
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_int32
|
||
|
recogGetClassString(L_RECOG *recog,
|
||
|
l_int32 index,
|
||
|
char **pcharstr)
|
||
|
{
|
||
|
PROCNAME("recogGetClassString");
|
||
|
|
||
|
if (!pcharstr)
|
||
|
return ERROR_INT("&charstr not defined", procName, 1);
|
||
|
*pcharstr = stringNew("");
|
||
|
if (!recog)
|
||
|
return ERROR_INT("recog not defined", procName, 2);
|
||
|
|
||
|
if (index < 0 || index >= recog->setsize)
|
||
|
return ERROR_INT("invalid index", procName, 1);
|
||
|
LEPT_FREE(*pcharstr);
|
||
|
*pcharstr = sarrayGetString(recog->sa_text, index, L_COPY);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief l_convertCharstrToInt()
|
||
|
*
|
||
|
* \param[in] str input string representing one UTF-8 character;
|
||
|
* not more than 4 bytes
|
||
|
* \param[out] pval integer value for the input. Think of it
|
||
|
* as a 1-to-1 hash code.
|
||
|
* \return 0 if OK, 1 on error
|
||
|
*/
|
||
|
l_ok
|
||
|
l_convertCharstrToInt(const char *str,
|
||
|
l_int32 *pval)
|
||
|
{
|
||
|
l_int32 size, val;
|
||
|
|
||
|
PROCNAME("l_convertCharstrToInt");
|
||
|
|
||
|
if (!pval)
|
||
|
return ERROR_INT("&val not defined", procName, 1);
|
||
|
*pval = 0;
|
||
|
if (!str)
|
||
|
return ERROR_INT("str not defined", procName, 1);
|
||
|
size = strlen(str);
|
||
|
if (size == 0)
|
||
|
return ERROR_INT("empty string", procName, 1);
|
||
|
if (size > 4)
|
||
|
return ERROR_INT("invalid string: > 4 bytes", procName, 1);
|
||
|
|
||
|
val = (l_int32)str[0];
|
||
|
if (size > 1)
|
||
|
val = (val << 8) + (l_int32)str[1];
|
||
|
if (size > 2)
|
||
|
val = (val << 8) + (l_int32)str[2];
|
||
|
if (size > 3)
|
||
|
val = (val << 8) + (l_int32)str[3];
|
||
|
*pval = val;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*------------------------------------------------------------------------*
|
||
|
* Serialization *
|
||
|
*------------------------------------------------------------------------*/
|
||
|
/*!
|
||
|
* \brief recogRead()
|
||
|
*
|
||
|
* \param[in] filename
|
||
|
* \return recog, or NULL on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) When a recog is serialized, a pixaa of the templates that are
|
||
|
* actually used for correlation is saved in the pixaa_u array
|
||
|
* of the recog. These can be different from the templates that
|
||
|
* were used to generate the recog, because those original templates
|
||
|
* can be scaled and turned into normalized lines. When recog1
|
||
|
* is deserialized to recog2, these templates are put in both the
|
||
|
* unscaled array (pixaa_u) and the modified array (pixaa) in recog2.
|
||
|
* Why not put it in only the unscaled array and let
|
||
|
* recogTrainingFinalized() regenerate the modified templates?
|
||
|
* The reason is that with normalized lines, the operation of
|
||
|
* thinning to a skeleton and dilating back to a fixed width
|
||
|
* is not idempotent. Thinning to a skeleton saves pixels at
|
||
|
* the end of a line segment, and thickening the skeleton puts
|
||
|
* additional pixels at the end of the lines. This tends to
|
||
|
* close gaps.
|
||
|
* </pre>
|
||
|
*/
|
||
|
L_RECOG *
|
||
|
recogRead(const char *filename)
|
||
|
{
|
||
|
FILE *fp;
|
||
|
L_RECOG *recog;
|
||
|
|
||
|
PROCNAME("recogRead");
|
||
|
|
||
|
if (!filename)
|
||
|
return (L_RECOG *)ERROR_PTR("filename not defined", procName, NULL);
|
||
|
if ((fp = fopenReadStream(filename)) == NULL)
|
||
|
return (L_RECOG *)ERROR_PTR("stream not opened", procName, NULL);
|
||
|
|
||
|
if ((recog = recogReadStream(fp)) == NULL) {
|
||
|
fclose(fp);
|
||
|
return (L_RECOG *)ERROR_PTR("recog not read", procName, NULL);
|
||
|
}
|
||
|
|
||
|
fclose(fp);
|
||
|
return recog;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief recogReadStream()
|
||
|
*
|
||
|
* \param[in] fp file stream
|
||
|
* \return recog, or NULL on error
|
||
|
*/
|
||
|
L_RECOG *
|
||
|
recogReadStream(FILE *fp)
|
||
|
{
|
||
|
l_int32 version, setsize, threshold, scalew, scaleh, linew;
|
||
|
l_int32 maxyshift, nc;
|
||
|
L_DNA *dna_tochar;
|
||
|
PIXAA *paa;
|
||
|
L_RECOG *recog;
|
||
|
SARRAY *sa_text;
|
||
|
|
||
|
PROCNAME("recogReadStream");
|
||
|
|
||
|
if (!fp)
|
||
|
return (L_RECOG *)ERROR_PTR("stream not defined", procName, NULL);
|
||
|
|
||
|
if (fscanf(fp, "\nRecog Version %d\n", &version) != 1)
|
||
|
return (L_RECOG *)ERROR_PTR("not a recog file", procName, NULL);
|
||
|
if (version != RECOG_VERSION_NUMBER)
|
||
|
return (L_RECOG *)ERROR_PTR("invalid recog version", procName, NULL);
|
||
|
if (fscanf(fp, "Size of character set = %d\n", &setsize) != 1)
|
||
|
return (L_RECOG *)ERROR_PTR("setsize not read", procName, NULL);
|
||
|
if (fscanf(fp, "Binarization threshold = %d\n", &threshold) != 1)
|
||
|
return (L_RECOG *)ERROR_PTR("binary thresh not read", procName, NULL);
|
||
|
if (fscanf(fp, "Maxyshift = %d\n", &maxyshift) != 1)
|
||
|
return (L_RECOG *)ERROR_PTR("maxyshift not read", procName, NULL);
|
||
|
if (fscanf(fp, "Scale to width = %d\n", &scalew) != 1)
|
||
|
return (L_RECOG *)ERROR_PTR("width not read", procName, NULL);
|
||
|
if (fscanf(fp, "Scale to height = %d\n", &scaleh) != 1)
|
||
|
return (L_RECOG *)ERROR_PTR("height not read", procName, NULL);
|
||
|
if (fscanf(fp, "Normalized line width = %d\n", &linew) != 1)
|
||
|
return (L_RECOG *)ERROR_PTR("line width not read", procName, NULL);
|
||
|
if ((recog = recogCreate(scalew, scaleh, linew, threshold,
|
||
|
maxyshift)) == NULL)
|
||
|
return (L_RECOG *)ERROR_PTR("recog not made", procName, NULL);
|
||
|
|
||
|
if (fscanf(fp, "\nLabels for character set:\n") != 0) {
|
||
|
recogDestroy(&recog);
|
||
|
return (L_RECOG *)ERROR_PTR("label intro not read", procName, NULL);
|
||
|
}
|
||
|
l_dnaDestroy(&recog->dna_tochar);
|
||
|
if ((dna_tochar = l_dnaReadStream(fp)) == NULL) {
|
||
|
recogDestroy(&recog);
|
||
|
return (L_RECOG *)ERROR_PTR("dna_tochar not read", procName, NULL);
|
||
|
}
|
||
|
recog->dna_tochar = dna_tochar;
|
||
|
sarrayDestroy(&recog->sa_text);
|
||
|
if ((sa_text = sarrayReadStream(fp)) == NULL) {
|
||
|
recogDestroy(&recog);
|
||
|
return (L_RECOG *)ERROR_PTR("sa_text not read", procName, NULL);
|
||
|
}
|
||
|
recog->sa_text = sa_text;
|
||
|
|
||
|
if (fscanf(fp, "\nPixaa of all samples in the training set:\n") != 0) {
|
||
|
recogDestroy(&recog);
|
||
|
return (L_RECOG *)ERROR_PTR("pixaa intro not read", procName, NULL);
|
||
|
}
|
||
|
if ((paa = pixaaReadStream(fp)) == NULL) {
|
||
|
recogDestroy(&recog);
|
||
|
return (L_RECOG *)ERROR_PTR("pixaa not read", procName, NULL);
|
||
|
}
|
||
|
recog->setsize = setsize;
|
||
|
nc = pixaaGetCount(paa, NULL);
|
||
|
if (nc != setsize) {
|
||
|
recogDestroy(&recog);
|
||
|
pixaaDestroy(&paa);
|
||
|
L_ERROR("(setsize = %d) != (paa count = %d)\n", procName,
|
||
|
setsize, nc);
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
recogAddAllSamples(&recog, paa, 0); /* this finishes */
|
||
|
pixaaDestroy(&paa);
|
||
|
if (!recog)
|
||
|
return (L_RECOG *)ERROR_PTR("bad templates", procName, NULL);
|
||
|
return recog;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief recogReadMem()
|
||
|
*
|
||
|
* \param[in] data serialization of recog (not ascii)
|
||
|
* \param[in] size of data in bytes
|
||
|
* \return recog, or NULL on error
|
||
|
*/
|
||
|
L_RECOG *
|
||
|
recogReadMem(const l_uint8 *data,
|
||
|
size_t size)
|
||
|
{
|
||
|
FILE *fp;
|
||
|
L_RECOG *recog;
|
||
|
|
||
|
PROCNAME("recogReadMem");
|
||
|
|
||
|
if (!data)
|
||
|
return (L_RECOG *)ERROR_PTR("data not defined", procName, NULL);
|
||
|
if ((fp = fopenReadFromMemory(data, size)) == NULL)
|
||
|
return (L_RECOG *)ERROR_PTR("stream not opened", procName, NULL);
|
||
|
|
||
|
recog = recogReadStream(fp);
|
||
|
fclose(fp);
|
||
|
if (!recog) L_ERROR("recog not read\n", procName);
|
||
|
return recog;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief recogWrite()
|
||
|
*
|
||
|
* \param[in] filename
|
||
|
* \param[in] recog
|
||
|
* \return 0 if OK, 1 on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) The pixaa of templates that is written is the modified one
|
||
|
* in the pixaa field. It is the pixaa that is actually used
|
||
|
* for correlation. This is not the unscaled array of labeled
|
||
|
* bitmaps, in pixaa_u, that was used to generate the recog in the
|
||
|
* first place. See the notes in recogRead() for the rationale.
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_ok
|
||
|
recogWrite(const char *filename,
|
||
|
L_RECOG *recog)
|
||
|
{
|
||
|
l_int32 ret;
|
||
|
FILE *fp;
|
||
|
|
||
|
PROCNAME("recogWrite");
|
||
|
|
||
|
if (!filename)
|
||
|
return ERROR_INT("filename not defined", procName, 1);
|
||
|
if (!recog)
|
||
|
return ERROR_INT("recog not defined", procName, 1);
|
||
|
|
||
|
if ((fp = fopenWriteStream(filename, "wb")) == NULL)
|
||
|
return ERROR_INT("stream not opened", procName, 1);
|
||
|
ret = recogWriteStream(fp, recog);
|
||
|
fclose(fp);
|
||
|
if (ret)
|
||
|
return ERROR_INT("recog not written to stream", procName, 1);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief recogWriteStream()
|
||
|
*
|
||
|
* \param[in] fp file stream opened for "wb"
|
||
|
* \param[in] recog
|
||
|
* \return 0 if OK, 1 on error
|
||
|
*/
|
||
|
l_ok
|
||
|
recogWriteStream(FILE *fp,
|
||
|
L_RECOG *recog)
|
||
|
{
|
||
|
PROCNAME("recogWriteStream");
|
||
|
|
||
|
if (!fp)
|
||
|
return ERROR_INT("stream not defined", procName, 1);
|
||
|
if (!recog)
|
||
|
return ERROR_INT("recog not defined", procName, 1);
|
||
|
|
||
|
fprintf(fp, "\nRecog Version %d\n", RECOG_VERSION_NUMBER);
|
||
|
fprintf(fp, "Size of character set = %d\n", recog->setsize);
|
||
|
fprintf(fp, "Binarization threshold = %d\n", recog->threshold);
|
||
|
fprintf(fp, "Maxyshift = %d\n", recog->maxyshift);
|
||
|
fprintf(fp, "Scale to width = %d\n", recog->scalew);
|
||
|
fprintf(fp, "Scale to height = %d\n", recog->scaleh);
|
||
|
fprintf(fp, "Normalized line width = %d\n", recog->linew);
|
||
|
fprintf(fp, "\nLabels for character set:\n");
|
||
|
l_dnaWriteStream(fp, recog->dna_tochar);
|
||
|
sarrayWriteStream(fp, recog->sa_text);
|
||
|
fprintf(fp, "\nPixaa of all samples in the training set:\n");
|
||
|
pixaaWriteStream(fp, recog->pixaa);
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief recogWriteMem()
|
||
|
*
|
||
|
* \param[out] pdata data of serialized recog (not ascii)
|
||
|
* \param[out] psize size of returned data
|
||
|
* \param[in] recog
|
||
|
* \return 0 if OK, 1 on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) Serializes a recog in memory and puts the result in a buffer.
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_ok
|
||
|
recogWriteMem(l_uint8 **pdata,
|
||
|
size_t *psize,
|
||
|
L_RECOG *recog)
|
||
|
{
|
||
|
l_int32 ret;
|
||
|
FILE *fp;
|
||
|
|
||
|
PROCNAME("recogWriteMem");
|
||
|
|
||
|
if (pdata) *pdata = NULL;
|
||
|
if (psize) *psize = 0;
|
||
|
if (!pdata)
|
||
|
return ERROR_INT("&data not defined", procName, 1);
|
||
|
if (!psize)
|
||
|
return ERROR_INT("&size not defined", procName, 1);
|
||
|
if (!recog)
|
||
|
return ERROR_INT("recog not defined", procName, 1);
|
||
|
|
||
|
#if HAVE_FMEMOPEN
|
||
|
if ((fp = open_memstream((char **)pdata, psize)) == NULL)
|
||
|
return ERROR_INT("stream not opened", procName, 1);
|
||
|
ret = recogWriteStream(fp, recog);
|
||
|
#else
|
||
|
L_INFO("work-around: writing to a temp file\n", procName);
|
||
|
#ifdef _WIN32
|
||
|
if ((fp = fopenWriteWinTempfile()) == NULL)
|
||
|
return ERROR_INT("tmpfile stream not opened", procName, 1);
|
||
|
#else
|
||
|
if ((fp = tmpfile()) == NULL)
|
||
|
return ERROR_INT("tmpfile stream not opened", procName, 1);
|
||
|
#endif /* _WIN32 */
|
||
|
ret = recogWriteStream(fp, recog);
|
||
|
rewind(fp);
|
||
|
*pdata = l_binaryReadStream(fp, psize);
|
||
|
#endif /* HAVE_FMEMOPEN */
|
||
|
fclose(fp);
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief recogExtractPixa()
|
||
|
*
|
||
|
* \param[in] recog
|
||
|
* \return pixa if OK, NULL on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This generates a pixa of all the unscaled images in the
|
||
|
* recognizer, where each one has its character class label in
|
||
|
* the pix text field, by flattening pixaa_u to a pixa.
|
||
|
* </pre>
|
||
|
*/
|
||
|
PIXA *
|
||
|
recogExtractPixa(L_RECOG *recog)
|
||
|
{
|
||
|
PROCNAME("recogExtractPixa");
|
||
|
|
||
|
if (!recog)
|
||
|
return (PIXA *)ERROR_PTR("recog not defined", procName, NULL);
|
||
|
|
||
|
recogAddCharstrLabels(recog);
|
||
|
return pixaaFlattenToPixa(recog->pixaa_u, NULL, L_CLONE);
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief recogAddCharstrLabels()
|
||
|
*
|
||
|
* \param[in] recog
|
||
|
* \return 0 if OK, 1 on error
|
||
|
*/
|
||
|
static l_int32
|
||
|
recogAddCharstrLabels(L_RECOG *recog)
|
||
|
{
|
||
|
char *text;
|
||
|
l_int32 i, j, n1, n2;
|
||
|
PIX *pix;
|
||
|
PIXA *pixa;
|
||
|
PIXAA *paa;
|
||
|
|
||
|
PROCNAME("recogAddCharstrLabels");
|
||
|
|
||
|
if (!recog)
|
||
|
return ERROR_INT("recog not defined", procName, 1);
|
||
|
|
||
|
/* Add the labels to each unscaled pix */
|
||
|
paa = recog->pixaa_u;
|
||
|
n1 = pixaaGetCount(paa, NULL);
|
||
|
for (i = 0; i < n1; i++) {
|
||
|
pixa = pixaaGetPixa(paa, i, L_CLONE);
|
||
|
text = sarrayGetString(recog->sa_text, i, L_NOCOPY);
|
||
|
n2 = pixaGetCount(pixa);
|
||
|
for (j = 0; j < n2; j++) {
|
||
|
pix = pixaGetPix(pixa, j, L_CLONE);
|
||
|
pixSetText(pix, text);
|
||
|
pixDestroy(&pix);
|
||
|
}
|
||
|
pixaDestroy(&pixa);
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief recogAddAllSamples()
|
||
|
*
|
||
|
* \param[in] precog addr of recog
|
||
|
* \param[in] paa pixaa from previously trained recog
|
||
|
* \param[in] debug
|
||
|
* \return 0 if OK, 1 on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) On error, the input recog is destroyed.
|
||
|
* (2) This is used with the serialization routine recogRead(),
|
||
|
* where each pixa in the pixaa represents a set of characters
|
||
|
* in a different class. Before calling this function, we have
|
||
|
* verified that the number of character classes, given by the
|
||
|
* setsize field in %recog, equals the number of pixa in the paa.
|
||
|
* The character labels for each set are in the sa_text field.
|
||
|
* </pre>
|
||
|
*/
|
||
|
static l_int32
|
||
|
recogAddAllSamples(L_RECOG **precog,
|
||
|
PIXAA *paa,
|
||
|
l_int32 debug)
|
||
|
{
|
||
|
char *text;
|
||
|
l_int32 i, j, nc, ns;
|
||
|
PIX *pix;
|
||
|
PIXA *pixa, *pixa1;
|
||
|
L_RECOG *recog;
|
||
|
|
||
|
PROCNAME("recogAddAllSamples");
|
||
|
|
||
|
if (!precog)
|
||
|
return ERROR_INT("&recog not defined", procName, 1);
|
||
|
if ((recog = *precog) == NULL)
|
||
|
return ERROR_INT("recog not defined", procName, 1);
|
||
|
if (!paa) {
|
||
|
recogDestroy(&recog);
|
||
|
return ERROR_INT("paa not defined", procName, 1);
|
||
|
}
|
||
|
|
||
|
nc = pixaaGetCount(paa, NULL);
|
||
|
for (i = 0; i < nc; i++) {
|
||
|
pixa = pixaaGetPixa(paa, i, L_CLONE);
|
||
|
ns = pixaGetCount(pixa);
|
||
|
text = sarrayGetString(recog->sa_text, i, L_NOCOPY);
|
||
|
pixa1 = pixaCreate(ns);
|
||
|
pixaaAddPixa(recog->pixaa_u, pixa1, L_INSERT);
|
||
|
for (j = 0; j < ns; j++) {
|
||
|
pix = pixaGetPix(pixa, j, L_CLONE);
|
||
|
if (debug) fprintf(stderr, "pix[%d,%d]: text = %s\n", i, j, text);
|
||
|
pixaaAddPix(recog->pixaa_u, i, pix, NULL, L_INSERT);
|
||
|
}
|
||
|
pixaDestroy(&pixa);
|
||
|
}
|
||
|
|
||
|
recogTrainingFinished(&recog, 0, -1, -1.0); /* For second parameter,
|
||
|
see comment in recogRead() */
|
||
|
if (!recog)
|
||
|
return ERROR_INT("bad templates; recog destroyed", procName, 1);
|
||
|
return 0;
|
||
|
}
|