mirror of http://192.168.1.51:8099/lmh188/twain3.0
1014 lines
40 KiB
C
1014 lines
40 KiB
C
/*====================================================================*
|
|
- Copyright (C) 2001 Leptonica. All rights reserved.
|
|
-
|
|
- Redistribution and use in source and binary forms, with or without
|
|
- modification, are permitted provided that the following conditions
|
|
- are met:
|
|
- 1. Redistributions of source code must retain the above copyright
|
|
- notice, this list of conditions and the following disclaimer.
|
|
- 2. Redistributions in binary form must reproduce the above
|
|
- copyright notice, this list of conditions and the following
|
|
- disclaimer in the documentation and/or other materials
|
|
- provided with the distribution.
|
|
-
|
|
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
- ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
|
|
- CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
- PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
- OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*====================================================================*/
|
|
|
|
/*!
|
|
* \file binarize.c
|
|
* <pre>
|
|
*
|
|
* ===================================================================
|
|
* Image binarization algorithms are found in:
|
|
* grayquant.c: standard, simple, general grayscale quantization
|
|
* adaptmap.c: local adaptive; mostly gray-to-gray in preparation
|
|
* for binarization
|
|
* binarize.c: special binarization methods, locally adaptive and
|
|
* global.
|
|
* ===================================================================
|
|
*
|
|
* Adaptive Otsu-based thresholding
|
|
* l_int32 pixOtsuAdaptiveThreshold() 8 bpp
|
|
*
|
|
* Otsu thresholding on adaptive background normalization
|
|
* PIX *pixOtsuThreshOnBackgroundNorm() 8 bpp
|
|
*
|
|
* Masking and Otsu estimate on adaptive background normalization
|
|
* PIX *pixMaskedThreshOnBackgroundNorm() 8 bpp
|
|
*
|
|
* Sauvola local thresholding
|
|
* l_int32 pixSauvolaBinarizeTiled()
|
|
* l_int32 pixSauvolaBinarize()
|
|
* static PIX *pixSauvolaGetThreshold()
|
|
* static PIX *pixApplyLocalThreshold();
|
|
*
|
|
* Thresholding using connected components
|
|
* PIX *pixThresholdByConnComp()
|
|
*
|
|
* Notes:
|
|
* (1) pixOtsuAdaptiveThreshold() computes a global threshold over each
|
|
* tile and performs the threshold operation, resulting in a
|
|
* binary image for each tile. These are stitched into the
|
|
* final result.
|
|
* (2) pixOtsuThreshOnBackgroundNorm() and
|
|
* pixMaskedThreshOnBackgroundNorm() are binarization functions
|
|
* that use background normalization with other techniques.
|
|
* (3) Sauvola binarization computes a local threshold based on
|
|
* the local average and square average. It takes two constants:
|
|
* the window size for the measurement at each pixel and a
|
|
* parameter that determines the amount of normalized local
|
|
* standard deviation to subtract from the local average value.
|
|
* (4) pixThresholdByCC() uses the numbers of 4 and 8 connected
|
|
* components at different thresholding to determine if a
|
|
* global threshold can be used (for text or line-art) and the
|
|
* value it should have.
|
|
* </pre>
|
|
*/
|
|
|
|
#include <math.h>
|
|
#include "allheaders.h"
|
|
|
|
static PIX *pixSauvolaGetThreshold(PIX *pixm, PIX *pixms, l_float32 factor,
|
|
PIX **ppixsd);
|
|
static PIX *pixApplyLocalThreshold(PIX *pixs, PIX *pixth);
|
|
|
|
/*------------------------------------------------------------------*
|
|
* Adaptive Otsu-based thresholding *
|
|
*------------------------------------------------------------------*/
|
|
/*!
|
|
* \brief pixOtsuAdaptiveThreshold()
|
|
*
|
|
* \param[in] pixs 8 bpp
|
|
* \param[in] sx, sy desired tile dimensions; actual size may vary
|
|
* \param[in] smoothx, smoothy half-width of convolution kernel applied to
|
|
* threshold array: use 0 for no smoothing
|
|
* \param[in] scorefract fraction of the max Otsu score; typ. 0.1;
|
|
* use 0.0 for standard Otsu
|
|
* \param[out] ppixth [optional] array of threshold values
|
|
* found for each tile
|
|
* \param[out] ppixd [optional] thresholded input pixs,
|
|
* based on the threshold array
|
|
* \return 0 if OK, 1 on error
|
|
*
|
|
* <pre>
|
|
* Notes:
|
|
* (1) The Otsu method finds a single global threshold for an image.
|
|
* This function allows a locally adapted threshold to be
|
|
* found for each tile into which the image is broken up.
|
|
* (2) The array of threshold values, one for each tile, constitutes
|
|
* a highly downscaled image. This array is optionally
|
|
* smoothed using a convolution. The full width and height of the
|
|
* convolution kernel are (2 * %smoothx + 1) and (2 * %smoothy + 1).
|
|
* (3) The minimum tile dimension allowed is 16. If such small
|
|
* tiles are used, it is recommended to use smoothing, because
|
|
* without smoothing, each small tile determines the splitting
|
|
* threshold independently. A tile that is entirely in the
|
|
* image bg will then hallucinate fg, resulting in a very noisy
|
|
* binarization. The smoothing should be large enough that no
|
|
* tile is only influenced by one type (fg or bg) of pixels,
|
|
* because it will force a split of its pixels.
|
|
* (4) To get a single global threshold for the entire image, use
|
|
* input values of %sx and %sy that are larger than the image.
|
|
* For this situation, the smoothing parameters are ignored.
|
|
* (5) The threshold values partition the image pixels into two classes:
|
|
* one whose values are less than the threshold and another
|
|
* whose values are greater than or equal to the threshold.
|
|
* This is the same use of 'threshold' as in pixThresholdToBinary().
|
|
* (6) The scorefract is the fraction of the maximum Otsu score, which
|
|
* is used to determine the range over which the histogram minimum
|
|
* is searched. See numaSplitDistribution() for details on the
|
|
* underlying method of choosing a threshold.
|
|
* (7) This uses enables a modified version of the Otsu criterion for
|
|
* splitting the distribution of pixels in each tile into a
|
|
* fg and bg part. The modification consists of searching for
|
|
* a minimum in the histogram over a range of pixel values where
|
|
* the Otsu score is within a defined fraction, %scorefract,
|
|
* of the max score. To get the original Otsu algorithm, set
|
|
* %scorefract == 0.
|
|
* (8) N.B. This method is NOT recommended for images with weak text
|
|
* and significant background noise, such as bleedthrough, because
|
|
* of the problem noted in (3) above for tiling. Use Sauvola.
|
|
* </pre>
|
|
*/
|
|
l_ok
|
|
pixOtsuAdaptiveThreshold(PIX *pixs,
|
|
l_int32 sx,
|
|
l_int32 sy,
|
|
l_int32 smoothx,
|
|
l_int32 smoothy,
|
|
l_float32 scorefract,
|
|
PIX **ppixth,
|
|
PIX **ppixd)
|
|
{
|
|
l_int32 w, h, nx, ny, i, j, thresh;
|
|
l_uint32 val;
|
|
PIX *pixt, *pixb, *pixthresh, *pixth, *pixd;
|
|
PIXTILING *pt;
|
|
|
|
PROCNAME("pixOtsuAdaptiveThreshold");
|
|
|
|
if (!ppixth && !ppixd)
|
|
return ERROR_INT("neither &pixth nor &pixd defined", procName, 1);
|
|
if (ppixth) *ppixth = NULL;
|
|
if (ppixd) *ppixd = NULL;
|
|
if (!pixs || pixGetDepth(pixs) != 8)
|
|
return ERROR_INT("pixs not defined or not 8 bpp", procName, 1);
|
|
if (sx < 16 || sy < 16)
|
|
return ERROR_INT("sx and sy must be >= 16", procName, 1);
|
|
|
|
/* Compute the threshold array for the tiles */
|
|
pixGetDimensions(pixs, &w, &h, NULL);
|
|
nx = L_MAX(1, w / sx);
|
|
ny = L_MAX(1, h / sy);
|
|
smoothx = L_MIN(smoothx, (nx - 1) / 2);
|
|
smoothy = L_MIN(smoothy, (ny - 1) / 2);
|
|
pt = pixTilingCreate(pixs, nx, ny, 0, 0, 0, 0);
|
|
pixthresh = pixCreate(nx, ny, 8);
|
|
for (i = 0; i < ny; i++) {
|
|
for (j = 0; j < nx; j++) {
|
|
pixt = pixTilingGetTile(pt, i, j);
|
|
pixSplitDistributionFgBg(pixt, scorefract, 1, &thresh,
|
|
NULL, NULL, NULL);
|
|
pixSetPixel(pixthresh, j, i, thresh); /* see note (4) */
|
|
pixDestroy(&pixt);
|
|
}
|
|
}
|
|
|
|
/* Optionally smooth the threshold array */
|
|
if (smoothx > 0 || smoothy > 0)
|
|
pixth = pixBlockconv(pixthresh, smoothx, smoothy);
|
|
else
|
|
pixth = pixClone(pixthresh);
|
|
pixDestroy(&pixthresh);
|
|
|
|
/* Optionally apply the threshold array to binarize pixs */
|
|
if (ppixd) {
|
|
pixd = pixCreate(w, h, 1);
|
|
pixCopyResolution(pixd, pixs);
|
|
for (i = 0; i < ny; i++) {
|
|
for (j = 0; j < nx; j++) {
|
|
pixt = pixTilingGetTile(pt, i, j);
|
|
pixGetPixel(pixth, j, i, &val);
|
|
pixb = pixThresholdToBinary(pixt, val);
|
|
pixTilingPaintTile(pixd, i, j, pixb, pt);
|
|
pixDestroy(&pixt);
|
|
pixDestroy(&pixb);
|
|
}
|
|
}
|
|
*ppixd = pixd;
|
|
}
|
|
|
|
if (ppixth)
|
|
*ppixth = pixth;
|
|
else
|
|
pixDestroy(&pixth);
|
|
|
|
pixTilingDestroy(&pt);
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*------------------------------------------------------------------*
|
|
* Otsu thresholding on adaptive background normalization *
|
|
*------------------------------------------------------------------*/
|
|
/*!
|
|
* \brief pixOtsuThreshOnBackgroundNorm()
|
|
*
|
|
* \param[in] pixs 8 bpp grayscale; not colormapped
|
|
* \param[in] pixim [optional] 1 bpp 'image' mask; can be null
|
|
* \param[in] sx, sy tile size in pixels
|
|
* \param[in] thresh threshold for determining foreground
|
|
* \param[in] mincount min threshold on counts in a tile
|
|
* \param[in] bgval target bg val; typ. > 128
|
|
* \param[in] smoothx half-width of block convolution kernel width
|
|
* \param[in] smoothy half-width of block convolution kernel height
|
|
* \param[in] scorefract fraction of the max Otsu score; typ. 0.1
|
|
* \param[out] pthresh [optional] threshold value that was
|
|
* used on the normalized image
|
|
* \return pixd 1 bpp thresholded image, or NULL on error
|
|
*
|
|
* <pre>
|
|
* Notes:
|
|
* (1) This does background normalization followed by Otsu
|
|
* thresholding. Otsu binarization attempts to split the
|
|
* image into two roughly equal sets of pixels, and it does
|
|
* a very poor job when there are large amounts of dark
|
|
* background. By doing a background normalization first,
|
|
* to get the background near 255, we remove this problem.
|
|
* Then we use a modified Otsu to estimate the best global
|
|
* threshold on the normalized image.
|
|
* (2) See pixBackgroundNorm() for meaning and typical values
|
|
* of input parameters. For a start, you can try:
|
|
* sx, sy = 10, 15
|
|
* thresh = 100
|
|
* mincount = 50
|
|
* bgval = 255
|
|
* smoothx, smoothy = 2
|
|
* </pre>
|
|
*/
|
|
PIX *
|
|
pixOtsuThreshOnBackgroundNorm(PIX *pixs,
|
|
PIX *pixim,
|
|
l_int32 sx,
|
|
l_int32 sy,
|
|
l_int32 thresh,
|
|
l_int32 mincount,
|
|
l_int32 bgval,
|
|
l_int32 smoothx,
|
|
l_int32 smoothy,
|
|
l_float32 scorefract,
|
|
l_int32 *pthresh)
|
|
{
|
|
l_int32 w, h;
|
|
l_uint32 val;
|
|
PIX *pixn, *pixt, *pixd;
|
|
|
|
PROCNAME("pixOtsuThreshOnBackgroundNorm");
|
|
|
|
if (pthresh) *pthresh = 0;
|
|
if (!pixs || pixGetDepth(pixs) != 8)
|
|
return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL);
|
|
if (pixGetColormap(pixs))
|
|
return (PIX *)ERROR_PTR("pixs is colormapped", procName, NULL);
|
|
if (sx < 4 || sy < 4)
|
|
return (PIX *)ERROR_PTR("sx and sy must be >= 4", procName, NULL);
|
|
if (mincount > sx * sy) {
|
|
L_WARNING("mincount too large for tile size\n", procName);
|
|
mincount = (sx * sy) / 3;
|
|
}
|
|
|
|
pixn = pixBackgroundNorm(pixs, pixim, NULL, sx, sy, thresh,
|
|
mincount, bgval, smoothx, smoothy);
|
|
if (!pixn)
|
|
return (PIX *)ERROR_PTR("pixn not made", procName, NULL);
|
|
|
|
/* Just use 1 tile for a global threshold, which is stored
|
|
* as a single pixel in pixt. */
|
|
pixGetDimensions(pixn, &w, &h, NULL);
|
|
pixOtsuAdaptiveThreshold(pixn, w, h, 0, 0, scorefract, &pixt, &pixd);
|
|
pixDestroy(&pixn);
|
|
|
|
if (pixt && pthresh) {
|
|
pixGetPixel(pixt, 0, 0, &val);
|
|
*pthresh = val;
|
|
}
|
|
pixDestroy(&pixt);
|
|
|
|
if (!pixd)
|
|
return (PIX *)ERROR_PTR("pixd not made", procName, NULL);
|
|
else
|
|
return pixd;
|
|
}
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------*
|
|
* Masking and Otsu estimate on adaptive background normalization *
|
|
*----------------------------------------------------------------------*/
|
|
/*!
|
|
* \brief pixMaskedThreshOnBackgroundNorm()
|
|
*
|
|
* \param[in] pixs 8 bpp grayscale; not colormapped
|
|
* \param[in] pixim [optional] 1 bpp 'image' mask; can be null
|
|
* \param[in] sx, sy tile size in pixels
|
|
* \param[in] thresh threshold for determining foreground
|
|
* \param[in] mincount min threshold on counts in a tile
|
|
* \param[in] smoothx half-width of block convolution kernel width
|
|
* \param[in] smoothy half-width of block convolution kernel height
|
|
* \param[in] scorefract fraction of the max Otsu score; typ. ~ 0.1
|
|
* \param[out] pthresh [optional] threshold value that was
|
|
* used on the normalized image
|
|
* \return pixd 1 bpp thresholded image, or NULL on error
|
|
*
|
|
* <pre>
|
|
* Notes:
|
|
* (1) This begins with a standard background normalization.
|
|
* Additionally, there is a flexible background norm, that
|
|
* will adapt to a rapidly varying background, and this
|
|
* puts white pixels in the background near regions with
|
|
* significant foreground. The white pixels are turned into
|
|
* a 1 bpp selection mask by binarization followed by dilation.
|
|
* Otsu thresholding is performed on the input image to get an
|
|
* estimate of the threshold in the non-mask regions.
|
|
* The background normalized image is thresholded with two
|
|
* different values, and the result is combined using
|
|
* the selection mask.
|
|
* (2) Note that the numbers 255 (for bgval target) and 190 (for
|
|
* thresholding on pixn) are tied together, and explicitly
|
|
* defined in this function.
|
|
* (3) See pixBackgroundNorm() for meaning and typical values
|
|
* of input parameters. For a start, you can try:
|
|
* sx, sy = 10, 15
|
|
* thresh = 100
|
|
* mincount = 50
|
|
* smoothx, smoothy = 2
|
|
* </pre>
|
|
*/
|
|
PIX *
|
|
pixMaskedThreshOnBackgroundNorm(PIX *pixs,
|
|
PIX *pixim,
|
|
l_int32 sx,
|
|
l_int32 sy,
|
|
l_int32 thresh,
|
|
l_int32 mincount,
|
|
l_int32 smoothx,
|
|
l_int32 smoothy,
|
|
l_float32 scorefract,
|
|
l_int32 *pthresh)
|
|
{
|
|
l_int32 w, h, highthresh;
|
|
l_uint32 val;
|
|
PIX *pixn, *pixm, *pixd, *pix1, *pix2, *pix3, *pix4;
|
|
|
|
PROCNAME("pixMaskedThreshOnBackgroundNorm");
|
|
|
|
if (pthresh) *pthresh = 0;
|
|
if (!pixs || pixGetDepth(pixs) != 8)
|
|
return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL);
|
|
if (pixGetColormap(pixs))
|
|
return (PIX *)ERROR_PTR("pixs is colormapped", procName, NULL);
|
|
if (sx < 4 || sy < 4)
|
|
return (PIX *)ERROR_PTR("sx and sy must be >= 4", procName, NULL);
|
|
if (mincount > sx * sy) {
|
|
L_WARNING("mincount too large for tile size\n", procName);
|
|
mincount = (sx * sy) / 3;
|
|
}
|
|
|
|
/* Standard background normalization */
|
|
pixn = pixBackgroundNorm(pixs, pixim, NULL, sx, sy, thresh,
|
|
mincount, 255, smoothx, smoothy);
|
|
if (!pixn)
|
|
return (PIX *)ERROR_PTR("pixn not made", procName, NULL);
|
|
|
|
/* Special background normalization for adaptation to quickly
|
|
* varying background. Threshold on the very light parts,
|
|
* which tend to be near significant edges, and dilate to
|
|
* form a mask over regions that are typically text. The
|
|
* dilation size is chosen to cover the text completely,
|
|
* except for very thick fonts. */
|
|
pix1 = pixBackgroundNormFlex(pixs, 7, 7, 1, 1, 20);
|
|
pix2 = pixThresholdToBinary(pix1, 240);
|
|
pixInvert(pix2, pix2);
|
|
pixm = pixMorphSequence(pix2, "d21.21", 0);
|
|
pixDestroy(&pix1);
|
|
pixDestroy(&pix2);
|
|
|
|
/* Use Otsu to get a global threshold estimate for the image,
|
|
* which is stored as a single pixel in pix3. */
|
|
pixGetDimensions(pixs, &w, &h, NULL);
|
|
pixOtsuAdaptiveThreshold(pixs, w, h, 0, 0, scorefract, &pix3, NULL);
|
|
pixGetPixel(pix3, 0, 0, &val);
|
|
if (pthresh) *pthresh = val;
|
|
pixDestroy(&pix3);
|
|
|
|
/* Threshold the background normalized images differentially,
|
|
* using a high value correlated with the background normalization
|
|
* for the part of the image under the mask (i.e., near the
|
|
* darker, thicker foreground), and a value that depends on the Otsu
|
|
* threshold for the rest of the image. This gives a solid
|
|
* (high) thresholding for the foreground parts of the image,
|
|
* while allowing the background and light foreground to be
|
|
* reasonably well cleaned using a threshold adapted to the
|
|
* input image. */
|
|
highthresh = L_MIN(256, val + 30);
|
|
pixd = pixThresholdToBinary(pixn, highthresh); /* for bg and light fg */
|
|
pix4 = pixThresholdToBinary(pixn, 190); /* for heavier fg */
|
|
pixCombineMasked(pixd, pix4, pixm);
|
|
pixDestroy(&pix4);
|
|
pixDestroy(&pixm);
|
|
pixDestroy(&pixn);
|
|
|
|
if (!pixd)
|
|
return (PIX *)ERROR_PTR("pixd not made", procName, NULL);
|
|
else
|
|
return pixd;
|
|
}
|
|
|
|
|
|
/*----------------------------------------------------------------------*
|
|
* Sauvola binarization *
|
|
*----------------------------------------------------------------------*/
|
|
/*!
|
|
* \brief pixSauvolaBinarizeTiled()
|
|
*
|
|
* \param[in] pixs 8 bpp grayscale, not colormapped
|
|
* \param[in] whsize window half-width for measuring local statistics
|
|
* \param[in] factor factor for reducing threshold due to variance; >= 0
|
|
* \param[in] nx, ny subdivision into tiles; >= 1
|
|
* \param[out] ppixth [optional] Sauvola threshold values
|
|
* \param[out] ppixd [optional] thresholded image
|
|
* \return 0 if OK, 1 on error
|
|
*
|
|
* <pre>
|
|
* Notes:
|
|
* (1) The window width and height are 2 * %whsize + 1. The minimum
|
|
* value for %whsize is 2; typically it is >= 7..
|
|
* (2) For nx == ny == 1, this defaults to pixSauvolaBinarize().
|
|
* (3) Why a tiled version?
|
|
* (a) Because the mean value accumulator is a uint32, overflow
|
|
* can occur for an image with more than 16M pixels.
|
|
* (b) The mean value accumulator array for 16M pixels is 64 MB.
|
|
* The mean square accumulator array for 16M pixels is 128 MB.
|
|
* Using tiles reduces the size of these arrays.
|
|
* (c) Each tile can be processed independently, in parallel,
|
|
* on a multicore processor.
|
|
* (4) The Sauvola threshold is determined from the formula:
|
|
* t = m * (1 - k * (1 - s / 128))
|
|
* See pixSauvolaBinarize() for details.
|
|
* </pre>
|
|
*/
|
|
l_ok
|
|
pixSauvolaBinarizeTiled(PIX *pixs,
|
|
l_int32 whsize,
|
|
l_float32 factor,
|
|
l_int32 nx,
|
|
l_int32 ny,
|
|
PIX **ppixth,
|
|
PIX **ppixd)
|
|
{
|
|
l_int32 i, j, w, h, xrat, yrat;
|
|
PIX *pixth, *pixd, *tileth, *tiled, *pixt;
|
|
PIX **ptileth, **ptiled;
|
|
PIXTILING *pt;
|
|
|
|
PROCNAME("pixSauvolaBinarizeTiled");
|
|
|
|
if (!ppixth && !ppixd)
|
|
return ERROR_INT("no outputs", procName, 1);
|
|
if (ppixth) *ppixth = NULL;
|
|
if (ppixd) *ppixd = NULL;
|
|
if (!pixs || pixGetDepth(pixs) != 8)
|
|
return ERROR_INT("pixs undefined or not 8 bpp", procName, 1);
|
|
if (pixGetColormap(pixs))
|
|
return ERROR_INT("pixs is cmapped", procName, 1);
|
|
pixGetDimensions(pixs, &w, &h, NULL);
|
|
if (whsize < 2)
|
|
return ERROR_INT("whsize must be >= 2", procName, 1);
|
|
if (w < 2 * whsize + 3 || h < 2 * whsize + 3)
|
|
return ERROR_INT("whsize too large for image", procName, 1);
|
|
if (factor < 0.0)
|
|
return ERROR_INT("factor must be >= 0", procName, 1);
|
|
|
|
if (nx <= 1 && ny <= 1)
|
|
return pixSauvolaBinarize(pixs, whsize, factor, 1, NULL, NULL,
|
|
ppixth, ppixd);
|
|
|
|
/* Test to see if the tiles are too small. The required
|
|
* condition is that the tile dimensions must be at least
|
|
* (whsize + 2) x (whsize + 2). */
|
|
xrat = w / nx;
|
|
yrat = h / ny;
|
|
if (xrat < whsize + 2) {
|
|
nx = w / (whsize + 2);
|
|
L_WARNING("tile width too small; nx reduced to %d\n", procName, nx);
|
|
}
|
|
if (yrat < whsize + 2) {
|
|
ny = h / (whsize + 2);
|
|
L_WARNING("tile height too small; ny reduced to %d\n", procName, ny);
|
|
}
|
|
if (nx <= 1 && ny <= 1)
|
|
return pixSauvolaBinarize(pixs, whsize, factor, 1, NULL, NULL,
|
|
ppixth, ppixd);
|
|
|
|
/* We can use pixtiling for painting both outputs, if requested */
|
|
if (ppixth) {
|
|
pixth = pixCreateNoInit(w, h, 8);
|
|
*ppixth = pixth;
|
|
}
|
|
if (ppixd) {
|
|
pixd = pixCreateNoInit(w, h, 1);
|
|
*ppixd = pixd;
|
|
}
|
|
pt = pixTilingCreate(pixs, nx, ny, 0, 0, whsize + 1, whsize + 1);
|
|
pixTilingNoStripOnPaint(pt); /* pixSauvolaBinarize() does the stripping */
|
|
|
|
for (i = 0; i < ny; i++) {
|
|
for (j = 0; j < nx; j++) {
|
|
pixt = pixTilingGetTile(pt, i, j);
|
|
ptileth = (ppixth) ? &tileth : NULL;
|
|
ptiled = (ppixd) ? &tiled : NULL;
|
|
pixSauvolaBinarize(pixt, whsize, factor, 0, NULL, NULL,
|
|
ptileth, ptiled);
|
|
if (ppixth) { /* do not strip */
|
|
pixTilingPaintTile(pixth, i, j, tileth, pt);
|
|
pixDestroy(&tileth);
|
|
}
|
|
if (ppixd) {
|
|
pixTilingPaintTile(pixd, i, j, tiled, pt);
|
|
pixDestroy(&tiled);
|
|
}
|
|
pixDestroy(&pixt);
|
|
}
|
|
}
|
|
|
|
pixTilingDestroy(&pt);
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*!
|
|
* \brief pixSauvolaBinarize()
|
|
*
|
|
* \param[in] pixs 8 bpp grayscale; not colormapped
|
|
* \param[in] whsize window half-width for measuring local statistics
|
|
* \param[in] factor factor for reducing threshold due to variance; >= 0
|
|
* \param[in] addborder 1 to add border of width (%whsize + 1) on all sides
|
|
* \param[out] ppixm [optional] local mean values
|
|
* \param[out] ppixsd [optional] local standard deviation values
|
|
* \param[out] ppixth [optional] threshold values
|
|
* \param[out] ppixd [optional] thresholded image
|
|
* \return 0 if OK, 1 on error
|
|
*
|
|
* <pre>
|
|
* Notes:
|
|
* (1) The window width and height are 2 * %whsize + 1. The minimum
|
|
* value for %whsize is 2; typically it is >= 7..
|
|
* (2) The local statistics, measured over the window, are the
|
|
* average and standard deviation.
|
|
* (3) The measurements of the mean and standard deviation are
|
|
* performed inside a border of (%whsize + 1) pixels. If pixs does
|
|
* not have these added border pixels, use %addborder = 1 to add
|
|
* it here; otherwise use %addborder = 0.
|
|
* (4) The Sauvola threshold is determined from the formula:
|
|
* t = m * (1 - k * (1 - s / 128))
|
|
* where:
|
|
* t = local threshold
|
|
* m = local mean
|
|
* k = %factor (>= 0) [ typ. 0.35 ]
|
|
* s = local standard deviation, which is maximized at
|
|
* 127.5 when half the samples are 0 and half are 255.
|
|
* (5) The basic idea of Niblack and Sauvola binarization is that
|
|
* the local threshold should be less than the median value,
|
|
* and the larger the variance, the closer to the median
|
|
* it should be chosen. Typical values for k are between
|
|
* 0.2 and 0.5.
|
|
* </pre>
|
|
*/
|
|
l_ok
|
|
pixSauvolaBinarize(PIX *pixs,
|
|
l_int32 whsize,
|
|
l_float32 factor,
|
|
l_int32 addborder,
|
|
PIX **ppixm,
|
|
PIX **ppixsd,
|
|
PIX **ppixth,
|
|
PIX **ppixd)
|
|
{
|
|
l_int32 w, h;
|
|
PIX *pixg, *pixsc, *pixm, *pixms, *pixth, *pixd;
|
|
|
|
PROCNAME("pixSauvolaBinarize");
|
|
|
|
if (ppixm) *ppixm = NULL;
|
|
if (ppixsd) *ppixsd = NULL;
|
|
if (ppixth) *ppixth = NULL;
|
|
if (ppixd) *ppixd = NULL;
|
|
if (!ppixm && !ppixsd && !ppixth && !ppixd)
|
|
return ERROR_INT("no outputs", procName, 1);
|
|
if (!pixs || pixGetDepth(pixs) != 8)
|
|
return ERROR_INT("pixs undefined or not 8 bpp", procName, 1);
|
|
if (pixGetColormap(pixs))
|
|
return ERROR_INT("pixs is cmapped", procName, 1);
|
|
pixGetDimensions(pixs, &w, &h, NULL);
|
|
if (whsize < 2)
|
|
return ERROR_INT("whsize must be >= 2", procName, 1);
|
|
if (w < 2 * whsize + 3 || h < 2 * whsize + 3)
|
|
return ERROR_INT("whsize too large for image", procName, 1);
|
|
if (factor < 0.0)
|
|
return ERROR_INT("factor must be >= 0", procName, 1);
|
|
|
|
if (addborder) {
|
|
pixg = pixAddMirroredBorder(pixs, whsize + 1, whsize + 1,
|
|
whsize + 1, whsize + 1);
|
|
pixsc = pixClone(pixs);
|
|
} else {
|
|
pixg = pixClone(pixs);
|
|
pixsc = pixRemoveBorder(pixs, whsize + 1);
|
|
}
|
|
if (!pixg || !pixsc)
|
|
return ERROR_INT("pixg and pixsc not made", procName, 1);
|
|
|
|
/* All these functions strip off the border pixels. */
|
|
if (ppixm || ppixth || ppixd)
|
|
pixm = pixWindowedMean(pixg, whsize, whsize, 1, 1);
|
|
if (ppixsd || ppixth || ppixd)
|
|
pixms = pixWindowedMeanSquare(pixg, whsize, whsize, 1);
|
|
if (ppixth || ppixd)
|
|
pixth = pixSauvolaGetThreshold(pixm, pixms, factor, ppixsd);
|
|
if (ppixd) {
|
|
pixd = pixApplyLocalThreshold(pixsc, pixth);
|
|
pixCopyResolution(pixd, pixs);
|
|
}
|
|
|
|
if (ppixm)
|
|
*ppixm = pixm;
|
|
else
|
|
pixDestroy(&pixm);
|
|
pixDestroy(&pixms);
|
|
if (ppixth)
|
|
*ppixth = pixth;
|
|
else
|
|
pixDestroy(&pixth);
|
|
if (ppixd)
|
|
*ppixd = pixd;
|
|
pixDestroy(&pixg);
|
|
pixDestroy(&pixsc);
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*!
|
|
* \brief pixSauvolaGetThreshold()
|
|
*
|
|
* \param[in] pixm 8 bpp grayscale; not colormapped
|
|
* \param[in] pixms 32 bpp
|
|
* \param[in] factor factor for reducing threshold due to variance; >= 0
|
|
* \param[out] ppixsd [optional] local standard deviation
|
|
* \return pixd 8 bpp, sauvola threshold values, or NULL on error
|
|
*
|
|
* <pre>
|
|
* Notes:
|
|
* (1) The Sauvola threshold is determined from the formula:
|
|
* t = m * (1 - k * (1 - s / 128))
|
|
* where:
|
|
* t = local threshold
|
|
* m = local mean
|
|
* k = %factor (>= 0) [ typ. 0.35 ]
|
|
* s = local standard deviation, which is maximized at
|
|
* 127.5 when half the samples are 0 and half are 255.
|
|
* (2) See pixSauvolaBinarize() for other details.
|
|
* (3) Important definitions and relations for computing averages:
|
|
* v == pixel value
|
|
* E(p) == expected value of p == average of p over some pixel set
|
|
* S(v) == square of v == v * v
|
|
* mv == E(v) == expected pixel value == mean value
|
|
* ms == E(S(v)) == expected square of pixel values
|
|
* == mean square value
|
|
* var == variance == expected square of deviation from mean
|
|
* == E(S(v - mv)) = E(S(v) - 2 * S(v * mv) + S(mv))
|
|
* = E(S(v)) - S(mv)
|
|
* = ms - mv * mv
|
|
* s == standard deviation = sqrt(var)
|
|
* So for evaluating the standard deviation in the Sauvola
|
|
* threshold, we take
|
|
* s = sqrt(ms - mv * mv)
|
|
* </pre>
|
|
*/
|
|
static PIX *
|
|
pixSauvolaGetThreshold(PIX *pixm,
|
|
PIX *pixms,
|
|
l_float32 factor,
|
|
PIX **ppixsd)
|
|
{
|
|
l_int32 i, j, w, h, tabsize, wplm, wplms, wplsd, wpld, usetab;
|
|
l_int32 mv, ms, var, thresh;
|
|
l_uint32 *datam, *datams, *datasd, *datad;
|
|
l_uint32 *linem, *linems, *linesd, *lined;
|
|
l_float32 sd;
|
|
l_float32 *tab; /* of 2^16 square roots */
|
|
PIX *pixsd, *pixd;
|
|
|
|
PROCNAME("pixSauvolaGetThreshold");
|
|
|
|
if (ppixsd) *ppixsd = NULL;
|
|
if (!pixm || pixGetDepth(pixm) != 8)
|
|
return (PIX *)ERROR_PTR("pixm undefined or not 8 bpp", procName, NULL);
|
|
if (pixGetColormap(pixm))
|
|
return (PIX *)ERROR_PTR("pixm is colormapped", procName, NULL);
|
|
if (!pixms || pixGetDepth(pixms) != 32)
|
|
return (PIX *)ERROR_PTR("pixms undefined or not 32 bpp",
|
|
procName, NULL);
|
|
if (factor < 0.0)
|
|
return (PIX *)ERROR_PTR("factor must be >= 0", procName, NULL);
|
|
|
|
/* Only make a table of 2^16 square roots if there
|
|
* are enough pixels to justify it. */
|
|
pixGetDimensions(pixm, &w, &h, NULL);
|
|
usetab = (w * h > 100000) ? 1 : 0;
|
|
if (usetab) {
|
|
tabsize = 1 << 16;
|
|
tab = (l_float32 *)LEPT_CALLOC(tabsize, sizeof(l_float32));
|
|
for (i = 0; i < tabsize; i++)
|
|
tab[i] = sqrtf((l_float32)i);
|
|
}
|
|
|
|
pixd = pixCreate(w, h, 8);
|
|
if (ppixsd) {
|
|
pixsd = pixCreate(w, h, 8);
|
|
*ppixsd = pixsd;
|
|
}
|
|
datam = pixGetData(pixm);
|
|
datams = pixGetData(pixms);
|
|
if (ppixsd) datasd = pixGetData(pixsd);
|
|
datad = pixGetData(pixd);
|
|
wplm = pixGetWpl(pixm);
|
|
wplms = pixGetWpl(pixms);
|
|
if (ppixsd) wplsd = pixGetWpl(pixsd);
|
|
wpld = pixGetWpl(pixd);
|
|
for (i = 0; i < h; i++) {
|
|
linem = datam + i * wplm;
|
|
linems = datams + i * wplms;
|
|
if (ppixsd) linesd = datasd + i * wplsd;
|
|
lined = datad + i * wpld;
|
|
for (j = 0; j < w; j++) {
|
|
mv = GET_DATA_BYTE(linem, j);
|
|
ms = linems[j];
|
|
var = ms - mv * mv;
|
|
if (usetab)
|
|
sd = tab[var];
|
|
else
|
|
sd = sqrtf((l_float32)var);
|
|
if (ppixsd) SET_DATA_BYTE(linesd, j, (l_int32)sd);
|
|
thresh = (l_int32)(mv * (1.0 - factor * (1.0 - sd / 128.)));
|
|
SET_DATA_BYTE(lined, j, thresh);
|
|
}
|
|
}
|
|
|
|
if (usetab) LEPT_FREE(tab);
|
|
return pixd;
|
|
}
|
|
|
|
|
|
/*!
|
|
* \brief pixApplyLocalThreshold()
|
|
*
|
|
* \param[in] pixs 8 bpp grayscale; not colormapped
|
|
* \param[in] pixth 8 bpp array of local thresholds
|
|
* \return pixd 1 bpp, thresholded image, or NULL on error
|
|
*/
|
|
static PIX *
|
|
pixApplyLocalThreshold(PIX *pixs,
|
|
PIX *pixth)
|
|
{
|
|
l_int32 i, j, w, h, wpls, wplt, wpld, vals, valt;
|
|
l_uint32 *datas, *datat, *datad, *lines, *linet, *lined;
|
|
PIX *pixd;
|
|
|
|
PROCNAME("pixApplyLocalThreshold");
|
|
|
|
if (!pixs || pixGetDepth(pixs) != 8)
|
|
return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL);
|
|
if (pixGetColormap(pixs))
|
|
return (PIX *)ERROR_PTR("pixs is colormapped", procName, NULL);
|
|
if (!pixth || pixGetDepth(pixth) != 8)
|
|
return (PIX *)ERROR_PTR("pixth undefined or not 8 bpp", procName, NULL);
|
|
|
|
pixGetDimensions(pixs, &w, &h, NULL);
|
|
pixd = pixCreate(w, h, 1);
|
|
datas = pixGetData(pixs);
|
|
datat = pixGetData(pixth);
|
|
datad = pixGetData(pixd);
|
|
wpls = pixGetWpl(pixs);
|
|
wplt = pixGetWpl(pixth);
|
|
wpld = pixGetWpl(pixd);
|
|
for (i = 0; i < h; i++) {
|
|
lines = datas + i * wpls;
|
|
linet = datat + i * wplt;
|
|
lined = datad + i * wpld;
|
|
for (j = 0; j < w; j++) {
|
|
vals = GET_DATA_BYTE(lines, j);
|
|
valt = GET_DATA_BYTE(linet, j);
|
|
if (vals < valt)
|
|
SET_DATA_BIT(lined, j);
|
|
}
|
|
}
|
|
|
|
return pixd;
|
|
}
|
|
|
|
|
|
/*----------------------------------------------------------------------*
|
|
* Thresholding using connected components *
|
|
*----------------------------------------------------------------------*/
|
|
/*!
|
|
* \brief pixThresholdByConnComp()
|
|
*
|
|
* \param[in] pixs depth > 1, colormap OK
|
|
* \param[in] pixm [optional] 1 bpp mask giving region to ignore
|
|
* by setting pixels to white; use NULL if no mask
|
|
* \param[in] start, end, incr binarization threshold levels to test
|
|
* \param[in] thresh48 threshold on normalized difference between the
|
|
* numbers of 4 and 8 connected components
|
|
* \param[in] threshdiff threshold on normalized difference between the
|
|
* number of 4 cc at successive iterations
|
|
* \param[out] pglobthresh [optional] best global threshold; 0
|
|
* if no threshold is found
|
|
* \param[out] ppixd [optional] image thresholded to binary, or
|
|
* null if no threshold is found
|
|
* \param[in] debugflag 1 for plotted results
|
|
* \return 0 if OK, 1 on error or if no threshold is found
|
|
*
|
|
* <pre>
|
|
* Notes:
|
|
* (1) This finds a global threshold based on connected components.
|
|
* Although slow, it is reasonable to use it in a situation where
|
|
* (a) the background in the image is relatively uniform, and
|
|
* (b) the result will be fed to an OCR program that accepts 1 bpp
|
|
* images and works best with easily segmented characters.
|
|
* The reason for (b) is that this selects a threshold with a
|
|
* minimum number of both broken characters and merged characters.
|
|
* (2) If the pix has color, it is converted to gray using the
|
|
* max component.
|
|
* (3) Input 0 to use default values for any of these inputs:
|
|
* %start, %end, %incr, %thresh48, %threshdiff.
|
|
* (4) This approach can be understood as follows. When the
|
|
* binarization threshold is varied, the numbers of c.c. identify
|
|
* four regimes:
|
|
* (a) For low thresholds, text is broken into small pieces, and
|
|
* the number of c.c. is large, with the 4 c.c. significantly
|
|
* exceeding the 8 c.c.
|
|
* (b) As the threshold rises toward the optimum value, the text
|
|
* characters coalesce and there is very little difference
|
|
* between the numbers of 4 and 8 c.c, which both go
|
|
* through a minimum.
|
|
* (c) Above this, the image background gets noisy because some
|
|
* pixels are(thresholded to foreground, and the numbers
|
|
* of c.c. quickly increase, with the 4 c.c. significantly
|
|
* larger than the 8 c.c.
|
|
* (d) At even higher thresholds, the image background noise
|
|
* coalesces as it becomes mostly foreground, and the
|
|
* number of c.c. drops quickly.
|
|
* (5) If there is no global threshold that distinguishes foreground
|
|
* text from background (e.g., weak text over a background that
|
|
* has significant variation and/or bleedthrough), this returns 1,
|
|
* which the caller should check.
|
|
* </pre>
|
|
*/
|
|
l_ok
|
|
pixThresholdByConnComp(PIX *pixs,
|
|
PIX *pixm,
|
|
l_int32 start,
|
|
l_int32 end,
|
|
l_int32 incr,
|
|
l_float32 thresh48,
|
|
l_float32 threshdiff,
|
|
l_int32 *pglobthresh,
|
|
PIX **ppixd,
|
|
l_int32 debugflag)
|
|
{
|
|
l_int32 i, thresh, n, n4, n8, mincounts, found, globthresh;
|
|
l_float32 count4, count8, firstcount4, prevcount4, diff48, diff4;
|
|
GPLOT *gplot;
|
|
NUMA *na4, *na8;
|
|
PIX *pix1, *pix2, *pix3;
|
|
|
|
PROCNAME("pixThresholdByConnComp");
|
|
|
|
if (pglobthresh) *pglobthresh = 0;
|
|
if (ppixd) *ppixd = NULL;
|
|
if (!pixs || pixGetDepth(pixs) == 1)
|
|
return ERROR_INT("pixs undefined or 1 bpp", procName, 1);
|
|
if (pixm && pixGetDepth(pixm) != 1)
|
|
return ERROR_INT("pixm must be 1 bpp", procName, 1);
|
|
|
|
/* Assign default values if requested */
|
|
if (start <= 0) start = 80;
|
|
if (end <= 0) end = 200;
|
|
if (incr <= 0) incr = 10;
|
|
if (thresh48 <= 0.0) thresh48 = 0.01;
|
|
if (threshdiff <= 0.0) threshdiff = 0.01;
|
|
if (start > end)
|
|
return ERROR_INT("invalid start,end", procName, 1);
|
|
|
|
/* Make 8 bpp, using the max component if color. */
|
|
if (pixGetColormap(pixs))
|
|
pix1 = pixRemoveColormap(pixs, REMOVE_CMAP_BASED_ON_SRC);
|
|
else
|
|
pix1 = pixClone(pixs);
|
|
if (pixGetDepth(pix1) == 32)
|
|
pix2 = pixConvertRGBToGrayMinMax(pix1, L_CHOOSE_MAX);
|
|
else
|
|
pix2 = pixConvertTo8(pix1, 0);
|
|
pixDestroy(&pix1);
|
|
|
|
/* Mask out any non-text regions. Do this in-place, because pix2
|
|
* can never be the same pix as pixs. */
|
|
if (pixm)
|
|
pixSetMasked(pix2, pixm, 255);
|
|
|
|
/* Make sure there are enough components to get a valid signal */
|
|
pix3 = pixConvertTo1(pix2, start);
|
|
pixCountConnComp(pix3, 4, &n4);
|
|
pixDestroy(&pix3);
|
|
mincounts = 500;
|
|
if (n4 < mincounts) {
|
|
L_INFO("Insufficient component count: %d\n", procName, n4);
|
|
pixDestroy(&pix2);
|
|
return 1;
|
|
}
|
|
|
|
/* Compute the c.c. data */
|
|
na4 = numaCreate(0);
|
|
na8 = numaCreate(0);
|
|
numaSetParameters(na4, start, incr);
|
|
numaSetParameters(na8, start, incr);
|
|
for (thresh = start, i = 0; thresh <= end; thresh += incr, i++) {
|
|
pix3 = pixConvertTo1(pix2, thresh);
|
|
pixCountConnComp(pix3, 4, &n4);
|
|
pixCountConnComp(pix3, 8, &n8);
|
|
numaAddNumber(na4, n4);
|
|
numaAddNumber(na8, n8);
|
|
pixDestroy(&pix3);
|
|
}
|
|
if (debugflag) {
|
|
gplot = gplotCreate("/tmp/threshroot", GPLOT_PNG,
|
|
"number of cc vs. threshold",
|
|
"threshold", "number of cc");
|
|
gplotAddPlot(gplot, NULL, na4, GPLOT_LINES, "plot 4cc");
|
|
gplotAddPlot(gplot, NULL, na8, GPLOT_LINES, "plot 8cc");
|
|
gplotMakeOutput(gplot);
|
|
gplotDestroy(&gplot);
|
|
}
|
|
|
|
n = numaGetCount(na4);
|
|
found = FALSE;
|
|
for (i = 0; i < n; i++) {
|
|
if (i == 0) {
|
|
numaGetFValue(na4, i, &firstcount4);
|
|
prevcount4 = firstcount4;
|
|
} else {
|
|
numaGetFValue(na4, i, &count4);
|
|
numaGetFValue(na8, i, &count8);
|
|
diff48 = (count4 - count8) / firstcount4;
|
|
diff4 = L_ABS(prevcount4 - count4) / firstcount4;
|
|
if (debugflag) {
|
|
fprintf(stderr, "diff48 = %7.3f, diff4 = %7.3f\n",
|
|
diff48, diff4);
|
|
}
|
|
if (diff48 < thresh48 && diff4 < threshdiff) {
|
|
found = TRUE;
|
|
break;
|
|
}
|
|
prevcount4 = count4;
|
|
}
|
|
}
|
|
numaDestroy(&na4);
|
|
numaDestroy(&na8);
|
|
|
|
if (found) {
|
|
globthresh = start + i * incr;
|
|
if (pglobthresh) *pglobthresh = globthresh;
|
|
if (ppixd) {
|
|
*ppixd = pixConvertTo1(pix2, globthresh);
|
|
pixCopyResolution(*ppixd, pixs);
|
|
}
|
|
if (debugflag) fprintf(stderr, "global threshold = %d\n", globthresh);
|
|
pixDestroy(&pix2);
|
|
return 0;
|
|
}
|
|
|
|
if (debugflag) fprintf(stderr, "no global threshold found\n");
|
|
pixDestroy(&pix2);
|
|
return 1;
|
|
}
|