mirror of http://192.168.1.51:8099/lmh188/twain3.0
1966 lines
59 KiB
C
1966 lines
59 KiB
C
|
/*====================================================================*
|
||
|
- Copyright (C) 2001 Leptonica. All rights reserved.
|
||
|
-
|
||
|
- Redistribution and use in source and binary forms, with or without
|
||
|
- modification, are permitted provided that the following conditions
|
||
|
- are met:
|
||
|
- 1. Redistributions of source code must retain the above copyright
|
||
|
- notice, this list of conditions and the following disclaimer.
|
||
|
- 2. Redistributions in binary form must reproduce the above
|
||
|
- copyright notice, this list of conditions and the following
|
||
|
- disclaimer in the documentation and/or other materials
|
||
|
- provided with the distribution.
|
||
|
-
|
||
|
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||
|
- ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||
|
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||
|
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
|
||
|
- CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||
|
- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||
|
- PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||
|
- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||
|
- OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||
|
- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||
|
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||
|
*====================================================================*/
|
||
|
|
||
|
/*!
|
||
|
* \file sarray1.c
|
||
|
* <pre>
|
||
|
*
|
||
|
* Create/Destroy/Copy
|
||
|
* SARRAY *sarrayCreate()
|
||
|
* SARRAY *sarrayCreateInitialized()
|
||
|
* SARRAY *sarrayCreateWordsFromString()
|
||
|
* SARRAY *sarrayCreateLinesFromString()
|
||
|
* void *sarrayDestroy()
|
||
|
* SARRAY *sarrayCopy()
|
||
|
* SARRAY *sarrayClone()
|
||
|
*
|
||
|
* Add/Remove string
|
||
|
* l_int32 sarrayAddString()
|
||
|
* static l_int32 sarrayExtendArray()
|
||
|
* char *sarrayRemoveString()
|
||
|
* l_int32 sarrayReplaceString()
|
||
|
* l_int32 sarrayClear()
|
||
|
*
|
||
|
* Accessors
|
||
|
* l_int32 sarrayGetCount()
|
||
|
* char **sarrayGetArray()
|
||
|
* char *sarrayGetString()
|
||
|
* l_int32 sarrayGetRefcount()
|
||
|
* l_int32 sarrayChangeRefcount()
|
||
|
*
|
||
|
* Conversion back to string
|
||
|
* char *sarrayToString()
|
||
|
* char *sarrayToStringRange()
|
||
|
*
|
||
|
* Join 2 sarrays
|
||
|
* l_int32 sarrayJoin()
|
||
|
* l_int32 sarrayAppendRange()
|
||
|
*
|
||
|
* Pad an sarray to be the same size as another sarray
|
||
|
* l_int32 sarrayPadToSameSize()
|
||
|
*
|
||
|
* Convert word sarray to (formatted) line sarray
|
||
|
* SARRAY *sarrayConvertWordsToLines()
|
||
|
*
|
||
|
* Split string on separator list
|
||
|
* SARRAY *sarraySplitString()
|
||
|
*
|
||
|
* Filter sarray
|
||
|
* SARRAY *sarraySelectBySubstring()
|
||
|
* SARRAY *sarraySelectByRange()
|
||
|
* l_int32 sarrayParseRange()
|
||
|
*
|
||
|
* Serialize for I/O
|
||
|
* SARRAY *sarrayRead()
|
||
|
* SARRAY *sarrayReadStream()
|
||
|
* SARRAY *sarrayReadMem()
|
||
|
* l_int32 sarrayWrite()
|
||
|
* l_int32 sarrayWriteStream()
|
||
|
* l_int32 sarrayWriteMem()
|
||
|
* l_int32 sarrayAppend()
|
||
|
*
|
||
|
* Directory filenames
|
||
|
* SARRAY *getNumberedPathnamesInDirectory()
|
||
|
* SARRAY *getSortedPathnamesInDirectory()
|
||
|
* SARRAY *convertSortedToNumberedPathnames()
|
||
|
* SARRAY *getFilenamesInDirectory()
|
||
|
*
|
||
|
* These functions are important for efficient manipulation
|
||
|
* of string data, and they have found widespread use in
|
||
|
* leptonica. For example:
|
||
|
* (1) to generate text files: e.g., PostScript and PDF
|
||
|
* wrappers around sets of images
|
||
|
* (2) to parse text files: e.g., extracting prototypes
|
||
|
* from the source to generate allheaders.h
|
||
|
* (3) to generate code for compilation: e.g., the fast
|
||
|
* dwa code for arbitrary structuring elements.
|
||
|
*
|
||
|
* Comments on usage:
|
||
|
*
|
||
|
* The user is responsible for correctly disposing of strings
|
||
|
* that have been extracted from sarrays. In the following,
|
||
|
* "str_not_owned" means the returned handle does not own the string,
|
||
|
* and "str_owned" means the returned handle owns the string.
|
||
|
* - To extract a string from an Sarray in order to inspect it
|
||
|
* or to make a copy of it later, get a handle to it:
|
||
|
* copyflag = L_NOCOPY.
|
||
|
* In this case, you must neither free the string nor put it
|
||
|
* directly in another array:
|
||
|
* str-not-owned = sarrayGetString(sa, index, L_NOCOPY);
|
||
|
* - To extract a copy of a string from an Sarray, use:
|
||
|
* str-owned = sarrayGetString(sa, index, L_COPY);
|
||
|
* ~ To insert a string that is in one array into another
|
||
|
* array (always leaving the first array intact), there are
|
||
|
* two options:
|
||
|
* (1) use copyflag = L_COPY to make an immediate copy,
|
||
|
* which you then add to the second array by insertion:
|
||
|
* str-owned = sarrayGetString(sa, index, L_COPY);
|
||
|
* sarrayAddString(sa, str-owned, L_INSERT);
|
||
|
* (2) use copyflag = L_NOCOPY to get another handle to
|
||
|
* the string; you then add a copy of it to the
|
||
|
* second string array:
|
||
|
* str-not-owned = sarrayGetString(sa, index, L_NOCOPY);
|
||
|
* sarrayAddString(sa, str-not-owned, L_COPY).
|
||
|
* sarrayAddString() transfers ownership to the Sarray, so never
|
||
|
* use L_INSERT if the string is owned by another array.
|
||
|
*
|
||
|
* In all cases, when you use copyflag = L_COPY to extract
|
||
|
* a string from an array, you must either free it
|
||
|
* or insert it in an array that will be freed later.
|
||
|
* </pre>
|
||
|
*/
|
||
|
|
||
|
#include <string.h>
|
||
|
#ifndef _WIN32
|
||
|
#include <dirent.h> /* unix only */
|
||
|
#include <sys/stat.h>
|
||
|
#include <limits.h> /* needed for realpath() */
|
||
|
#include <stdlib.h> /* needed for realpath() */
|
||
|
#endif /* ! _WIN32 */
|
||
|
#include "allheaders.h"
|
||
|
|
||
|
static const l_uint32 MaxPtrArraySize = 100000;
|
||
|
static const l_int32 InitialPtrArraySize = 50; /*!< n'importe quoi */
|
||
|
|
||
|
/* Static functions */
|
||
|
static l_int32 sarrayExtendArray(SARRAY *sa);
|
||
|
|
||
|
|
||
|
/*--------------------------------------------------------------------------*
|
||
|
* String array create/destroy/copy/extend *
|
||
|
*--------------------------------------------------------------------------*/
|
||
|
/*!
|
||
|
* \brief sarrayCreate()
|
||
|
*
|
||
|
* \param[in] n size of string ptr array to be alloc'd; use 0 for default
|
||
|
* \return sarray, or NULL on error
|
||
|
*/
|
||
|
SARRAY *
|
||
|
sarrayCreate(l_int32 n)
|
||
|
{
|
||
|
SARRAY *sa;
|
||
|
|
||
|
PROCNAME("sarrayCreate");
|
||
|
|
||
|
if (n <= 0 || n > MaxPtrArraySize)
|
||
|
n = InitialPtrArraySize;
|
||
|
|
||
|
sa = (SARRAY *)LEPT_CALLOC(1, sizeof(SARRAY));
|
||
|
if ((sa->array = (char **)LEPT_CALLOC(n, sizeof(char *))) == NULL) {
|
||
|
sarrayDestroy(&sa);
|
||
|
return (SARRAY *)ERROR_PTR("ptr array not made", procName, NULL);
|
||
|
}
|
||
|
|
||
|
sa->nalloc = n;
|
||
|
sa->n = 0;
|
||
|
sa->refcount = 1;
|
||
|
return sa;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief sarrayCreateInitialized()
|
||
|
*
|
||
|
* \param[in] n size of string ptr array to be alloc'd
|
||
|
* \param[in] initstr string to be initialized on the full array
|
||
|
* \return sarray, or NULL on error
|
||
|
*/
|
||
|
SARRAY *
|
||
|
sarrayCreateInitialized(l_int32 n,
|
||
|
const char *initstr)
|
||
|
{
|
||
|
l_int32 i;
|
||
|
SARRAY *sa;
|
||
|
|
||
|
PROCNAME("sarrayCreateInitialized");
|
||
|
|
||
|
if (n <= 0)
|
||
|
return (SARRAY *)ERROR_PTR("n must be > 0", procName, NULL);
|
||
|
if (!initstr)
|
||
|
return (SARRAY *)ERROR_PTR("initstr not defined", procName, NULL);
|
||
|
|
||
|
sa = sarrayCreate(n);
|
||
|
for (i = 0; i < n; i++)
|
||
|
sarrayAddString(sa, initstr, L_COPY);
|
||
|
return sa;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief sarrayCreateWordsFromString()
|
||
|
*
|
||
|
* \param[in] string
|
||
|
* \return sarray, or NULL on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This finds the number of word substrings, creates an sarray
|
||
|
* of this size, and puts copies of each substring into the sarray.
|
||
|
* </pre>
|
||
|
*/
|
||
|
SARRAY *
|
||
|
sarrayCreateWordsFromString(const char *string)
|
||
|
{
|
||
|
char separators[] = " \n\t";
|
||
|
l_int32 i, nsub, size, inword;
|
||
|
SARRAY *sa;
|
||
|
|
||
|
PROCNAME("sarrayCreateWordsFromString");
|
||
|
|
||
|
if (!string)
|
||
|
return (SARRAY *)ERROR_PTR("textstr not defined", procName, NULL);
|
||
|
|
||
|
/* Find the number of words */
|
||
|
size = strlen(string);
|
||
|
nsub = 0;
|
||
|
inword = FALSE;
|
||
|
for (i = 0; i < size; i++) {
|
||
|
if (inword == FALSE &&
|
||
|
(string[i] != ' ' && string[i] != '\t' && string[i] != '\n')) {
|
||
|
inword = TRUE;
|
||
|
nsub++;
|
||
|
} else if (inword == TRUE &&
|
||
|
(string[i] == ' ' || string[i] == '\t' || string[i] == '\n')) {
|
||
|
inword = FALSE;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if ((sa = sarrayCreate(nsub)) == NULL)
|
||
|
return (SARRAY *)ERROR_PTR("sa not made", procName, NULL);
|
||
|
sarraySplitString(sa, string, separators);
|
||
|
|
||
|
return sa;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief sarrayCreateLinesFromString()
|
||
|
*
|
||
|
* \param[in] string
|
||
|
* \param[in] blankflag 0 to exclude blank lines; 1 to include
|
||
|
* \return sarray, or NULL on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This finds the number of line substrings, each of which
|
||
|
* ends with a newline, and puts a copy of each substring
|
||
|
* in a new sarray.
|
||
|
* (2) The newline characters are removed from each substring.
|
||
|
* </pre>
|
||
|
*/
|
||
|
SARRAY *
|
||
|
sarrayCreateLinesFromString(const char *string,
|
||
|
l_int32 blankflag)
|
||
|
{
|
||
|
l_int32 i, nsub, size, startptr;
|
||
|
char *cstring, *substring;
|
||
|
SARRAY *sa;
|
||
|
|
||
|
PROCNAME("sarrayCreateLinesFromString");
|
||
|
|
||
|
if (!string)
|
||
|
return (SARRAY *)ERROR_PTR("textstr not defined", procName, NULL);
|
||
|
|
||
|
/* Find the number of lines */
|
||
|
size = strlen(string);
|
||
|
nsub = 0;
|
||
|
for (i = 0; i < size; i++) {
|
||
|
if (string[i] == '\n')
|
||
|
nsub++;
|
||
|
}
|
||
|
|
||
|
if ((sa = sarrayCreate(nsub)) == NULL)
|
||
|
return (SARRAY *)ERROR_PTR("sa not made", procName, NULL);
|
||
|
|
||
|
if (blankflag) { /* keep blank lines as null strings */
|
||
|
/* Make a copy for munging */
|
||
|
if ((cstring = stringNew(string)) == NULL) {
|
||
|
sarrayDestroy(&sa);
|
||
|
return (SARRAY *)ERROR_PTR("cstring not made", procName, NULL);
|
||
|
}
|
||
|
/* We'll insert nulls like strtok */
|
||
|
startptr = 0;
|
||
|
for (i = 0; i < size; i++) {
|
||
|
if (cstring[i] == '\n') {
|
||
|
cstring[i] = '\0';
|
||
|
if (i > 0 && cstring[i - 1] == '\r')
|
||
|
cstring[i - 1] = '\0'; /* also remove Windows CR */
|
||
|
if ((substring = stringNew(cstring + startptr)) == NULL) {
|
||
|
sarrayDestroy(&sa);
|
||
|
LEPT_FREE(cstring);
|
||
|
return (SARRAY *)ERROR_PTR("substring not made",
|
||
|
procName, NULL);
|
||
|
}
|
||
|
sarrayAddString(sa, substring, L_INSERT);
|
||
|
/* fprintf(stderr, "substring = %s\n", substring); */
|
||
|
startptr = i + 1;
|
||
|
}
|
||
|
}
|
||
|
if (startptr < size) { /* no newline at end of last line */
|
||
|
if ((substring = stringNew(cstring + startptr)) == NULL) {
|
||
|
sarrayDestroy(&sa);
|
||
|
LEPT_FREE(cstring);
|
||
|
return (SARRAY *)ERROR_PTR("substring not made",
|
||
|
procName, NULL);
|
||
|
}
|
||
|
sarrayAddString(sa, substring, L_INSERT);
|
||
|
/* fprintf(stderr, "substring = %s\n", substring); */
|
||
|
}
|
||
|
LEPT_FREE(cstring);
|
||
|
} else { /* remove blank lines; use strtok */
|
||
|
sarraySplitString(sa, string, "\r\n");
|
||
|
}
|
||
|
|
||
|
return sa;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief sarrayDestroy()
|
||
|
*
|
||
|
* \param[in,out] psa will be set to null before returning
|
||
|
* \return void
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) Decrements the ref count and, if 0, destroys the sarray.
|
||
|
* (2) Always nulls the input ptr.
|
||
|
* </pre>
|
||
|
*/
|
||
|
void
|
||
|
sarrayDestroy(SARRAY **psa)
|
||
|
{
|
||
|
l_int32 i;
|
||
|
SARRAY *sa;
|
||
|
|
||
|
PROCNAME("sarrayDestroy");
|
||
|
|
||
|
if (psa == NULL) {
|
||
|
L_WARNING("ptr address is NULL!\n", procName);
|
||
|
return;
|
||
|
}
|
||
|
if ((sa = *psa) == NULL)
|
||
|
return;
|
||
|
|
||
|
sarrayChangeRefcount(sa, -1);
|
||
|
if (sarrayGetRefcount(sa) <= 0) {
|
||
|
if (sa->array) {
|
||
|
for (i = 0; i < sa->n; i++) {
|
||
|
if (sa->array[i])
|
||
|
LEPT_FREE(sa->array[i]);
|
||
|
}
|
||
|
LEPT_FREE(sa->array);
|
||
|
}
|
||
|
LEPT_FREE(sa);
|
||
|
}
|
||
|
|
||
|
*psa = NULL;
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief sarrayCopy()
|
||
|
*
|
||
|
* \param[in] sa string array
|
||
|
* \return copy of sarray, or NULL on error
|
||
|
*/
|
||
|
SARRAY *
|
||
|
sarrayCopy(SARRAY *sa)
|
||
|
{
|
||
|
l_int32 i;
|
||
|
SARRAY *csa;
|
||
|
|
||
|
PROCNAME("sarrayCopy");
|
||
|
|
||
|
if (!sa)
|
||
|
return (SARRAY *)ERROR_PTR("sa not defined", procName, NULL);
|
||
|
|
||
|
if ((csa = sarrayCreate(sa->nalloc)) == NULL)
|
||
|
return (SARRAY *)ERROR_PTR("csa not made", procName, NULL);
|
||
|
|
||
|
for (i = 0; i < sa->n; i++)
|
||
|
sarrayAddString(csa, sa->array[i], L_COPY);
|
||
|
|
||
|
return csa;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief sarrayClone()
|
||
|
*
|
||
|
* \param[in] sa string array
|
||
|
* \return ptr to same sarray, or NULL on error
|
||
|
*/
|
||
|
SARRAY *
|
||
|
sarrayClone(SARRAY *sa)
|
||
|
{
|
||
|
PROCNAME("sarrayClone");
|
||
|
|
||
|
if (!sa)
|
||
|
return (SARRAY *)ERROR_PTR("sa not defined", procName, NULL);
|
||
|
sarrayChangeRefcount(sa, 1);
|
||
|
return sa;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief sarrayAddString()
|
||
|
*
|
||
|
* \param[in] sa string array
|
||
|
* \param[in] string string to be added
|
||
|
* \param[in] copyflag L_INSERT, L_NOCOPY or L_COPY
|
||
|
* \return 0 if OK, 1 on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) See usage comments at the top of this file. L_INSERT is
|
||
|
* equivalent to L_NOCOPY.
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_ok
|
||
|
sarrayAddString(SARRAY *sa,
|
||
|
const char *string,
|
||
|
l_int32 copyflag)
|
||
|
{
|
||
|
l_int32 n;
|
||
|
|
||
|
PROCNAME("sarrayAddString");
|
||
|
|
||
|
if (!sa)
|
||
|
return ERROR_INT("sa not defined", procName, 1);
|
||
|
if (!string)
|
||
|
return ERROR_INT("string not defined", procName, 1);
|
||
|
if (copyflag != L_INSERT && copyflag != L_NOCOPY && copyflag != L_COPY)
|
||
|
return ERROR_INT("invalid copyflag", procName, 1);
|
||
|
|
||
|
n = sarrayGetCount(sa);
|
||
|
if (n >= sa->nalloc)
|
||
|
sarrayExtendArray(sa);
|
||
|
|
||
|
if (copyflag == L_COPY)
|
||
|
sa->array[n] = stringNew(string);
|
||
|
else /* L_INSERT or L_NOCOPY */
|
||
|
sa->array[n] = (char *)string;
|
||
|
sa->n++;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief sarrayExtendArray()
|
||
|
*
|
||
|
* \param[in] sa string array
|
||
|
* \return 0 if OK, 1 on error
|
||
|
*/
|
||
|
static l_int32
|
||
|
sarrayExtendArray(SARRAY *sa)
|
||
|
{
|
||
|
PROCNAME("sarrayExtendArray");
|
||
|
|
||
|
if (!sa)
|
||
|
return ERROR_INT("sa not defined", procName, 1);
|
||
|
|
||
|
if ((sa->array = (char **)reallocNew((void **)&sa->array,
|
||
|
sizeof(char *) * sa->nalloc,
|
||
|
2 * sizeof(char *) * sa->nalloc)) == NULL)
|
||
|
return ERROR_INT("new ptr array not returned", procName, 1);
|
||
|
|
||
|
sa->nalloc *= 2;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief sarrayRemoveString()
|
||
|
*
|
||
|
* \param[in] sa string array
|
||
|
* \param[in] index of string within sarray
|
||
|
* \return removed string, or NULL on error
|
||
|
*/
|
||
|
char *
|
||
|
sarrayRemoveString(SARRAY *sa,
|
||
|
l_int32 index)
|
||
|
{
|
||
|
char *string;
|
||
|
char **array;
|
||
|
l_int32 i, n, nalloc;
|
||
|
|
||
|
PROCNAME("sarrayRemoveString");
|
||
|
|
||
|
if (!sa)
|
||
|
return (char *)ERROR_PTR("sa not defined", procName, NULL);
|
||
|
|
||
|
if ((array = sarrayGetArray(sa, &nalloc, &n)) == NULL)
|
||
|
return (char *)ERROR_PTR("array not returned", procName, NULL);
|
||
|
|
||
|
if (index < 0 || index >= n)
|
||
|
return (char *)ERROR_PTR("array index out of bounds", procName, NULL);
|
||
|
|
||
|
string = array[index];
|
||
|
|
||
|
/* If removed string is not at end of array, shift
|
||
|
* to fill in, maintaining original ordering.
|
||
|
* Note: if we didn't care about the order, we could
|
||
|
* put the last string array[n - 1] directly into the hole. */
|
||
|
for (i = index; i < n - 1; i++)
|
||
|
array[i] = array[i + 1];
|
||
|
|
||
|
sa->n--;
|
||
|
return string;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief sarrayReplaceString()
|
||
|
*
|
||
|
* \param[in] sa string array
|
||
|
* \param[in] index of string within sarray to be replaced
|
||
|
* \param[in] newstr string to replace existing one
|
||
|
* \param[in] copyflag L_INSERT, L_COPY
|
||
|
* \return 0 if OK, 1 on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This destroys an existing string and replaces it with
|
||
|
* the new string or a copy of it.
|
||
|
* (2) By design, an sarray is always compacted, so there are
|
||
|
* never any holes (null ptrs) in the ptr array up to the
|
||
|
* current count.
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_ok
|
||
|
sarrayReplaceString(SARRAY *sa,
|
||
|
l_int32 index,
|
||
|
char *newstr,
|
||
|
l_int32 copyflag)
|
||
|
{
|
||
|
char *str;
|
||
|
l_int32 n;
|
||
|
|
||
|
PROCNAME("sarrayReplaceString");
|
||
|
|
||
|
if (!sa)
|
||
|
return ERROR_INT("sa not defined", procName, 1);
|
||
|
n = sarrayGetCount(sa);
|
||
|
if (index < 0 || index >= n)
|
||
|
return ERROR_INT("array index out of bounds", procName, 1);
|
||
|
if (!newstr)
|
||
|
return ERROR_INT("newstr not defined", procName, 1);
|
||
|
if (copyflag != L_INSERT && copyflag != L_COPY)
|
||
|
return ERROR_INT("invalid copyflag", procName, 1);
|
||
|
|
||
|
LEPT_FREE(sa->array[index]);
|
||
|
if (copyflag == L_INSERT)
|
||
|
str = newstr;
|
||
|
else /* L_COPY */
|
||
|
str = stringNew(newstr);
|
||
|
sa->array[index] = str;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief sarrayClear()
|
||
|
*
|
||
|
* \param[in] sa string array
|
||
|
* \return 0 if OK; 1 on error
|
||
|
*/
|
||
|
l_ok
|
||
|
sarrayClear(SARRAY *sa)
|
||
|
{
|
||
|
l_int32 i;
|
||
|
|
||
|
PROCNAME("sarrayClear");
|
||
|
|
||
|
if (!sa)
|
||
|
return ERROR_INT("sa not defined", procName, 1);
|
||
|
for (i = 0; i < sa->n; i++) { /* free strings and null ptrs */
|
||
|
LEPT_FREE(sa->array[i]);
|
||
|
sa->array[i] = NULL;
|
||
|
}
|
||
|
sa->n = 0;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*----------------------------------------------------------------------*
|
||
|
* Accessors *
|
||
|
*----------------------------------------------------------------------*/
|
||
|
/*!
|
||
|
* \brief sarrayGetCount()
|
||
|
*
|
||
|
* \param[in] sa string array
|
||
|
* \return count, or 0 if no strings or on error
|
||
|
*/
|
||
|
l_int32
|
||
|
sarrayGetCount(SARRAY *sa)
|
||
|
{
|
||
|
PROCNAME("sarrayGetCount");
|
||
|
|
||
|
if (!sa)
|
||
|
return ERROR_INT("sa not defined", procName, 0);
|
||
|
return sa->n;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief sarrayGetArray()
|
||
|
*
|
||
|
* \param[in] sa string array
|
||
|
* \param[out] pnalloc [optional] number allocated string ptrs
|
||
|
* \param[out] pn [optional] number allocated strings
|
||
|
* \return ptr to string array, or NULL on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) Caution: the returned array is not a copy, so caller
|
||
|
* must not destroy it!
|
||
|
* </pre>
|
||
|
*/
|
||
|
char **
|
||
|
sarrayGetArray(SARRAY *sa,
|
||
|
l_int32 *pnalloc,
|
||
|
l_int32 *pn)
|
||
|
{
|
||
|
char **array;
|
||
|
|
||
|
PROCNAME("sarrayGetArray");
|
||
|
|
||
|
if (!sa)
|
||
|
return (char **)ERROR_PTR("sa not defined", procName, NULL);
|
||
|
|
||
|
array = sa->array;
|
||
|
if (pnalloc) *pnalloc = sa->nalloc;
|
||
|
if (pn) *pn = sa->n;
|
||
|
|
||
|
return array;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief sarrayGetString()
|
||
|
*
|
||
|
* \param[in] sa string array
|
||
|
* \param[in] index to the index-th string
|
||
|
* \param[in] copyflag L_NOCOPY or L_COPY
|
||
|
* \return string, or NULL on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) See usage comments at the top of this file.
|
||
|
* (2) To get a pointer to the string itself, use L_NOCOPY.
|
||
|
* To get a copy of the string, use L_COPY.
|
||
|
* </pre>
|
||
|
*/
|
||
|
char *
|
||
|
sarrayGetString(SARRAY *sa,
|
||
|
l_int32 index,
|
||
|
l_int32 copyflag)
|
||
|
{
|
||
|
PROCNAME("sarrayGetString");
|
||
|
|
||
|
if (!sa)
|
||
|
return (char *)ERROR_PTR("sa not defined", procName, NULL);
|
||
|
if (index < 0 || index >= sa->n)
|
||
|
return (char *)ERROR_PTR("index not valid", procName, NULL);
|
||
|
if (copyflag != L_NOCOPY && copyflag != L_COPY)
|
||
|
return (char *)ERROR_PTR("invalid copyflag", procName, NULL);
|
||
|
|
||
|
if (copyflag == L_NOCOPY)
|
||
|
return sa->array[index];
|
||
|
else /* L_COPY */
|
||
|
return stringNew(sa->array[index]);
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief sarrayGetRefCount()
|
||
|
*
|
||
|
* \param[in] sa string array
|
||
|
* \return refcount, or UNDEF on error
|
||
|
*/
|
||
|
l_int32
|
||
|
sarrayGetRefcount(SARRAY *sa)
|
||
|
{
|
||
|
PROCNAME("sarrayGetRefcount");
|
||
|
|
||
|
if (!sa)
|
||
|
return ERROR_INT("sa not defined", procName, UNDEF);
|
||
|
return sa->refcount;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief sarrayChangeRefCount()
|
||
|
*
|
||
|
* \param[in] sa string array
|
||
|
* \param[in] delta change to be applied
|
||
|
* \return 0 if OK, 1 on error
|
||
|
*/
|
||
|
l_ok
|
||
|
sarrayChangeRefcount(SARRAY *sa,
|
||
|
l_int32 delta)
|
||
|
{
|
||
|
PROCNAME("sarrayChangeRefcount");
|
||
|
|
||
|
if (!sa)
|
||
|
return ERROR_INT("sa not defined", procName, UNDEF);
|
||
|
sa->refcount += delta;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*----------------------------------------------------------------------*
|
||
|
* Conversion to string *
|
||
|
*----------------------------------------------------------------------*/
|
||
|
/*!
|
||
|
* \brief sarrayToString()
|
||
|
*
|
||
|
* \param[in] sa string array
|
||
|
* \param[in] addnlflag flag: 0 adds nothing to each substring
|
||
|
* 1 adds '\n' to each substring
|
||
|
* 2 adds ' ' to each substring
|
||
|
* \return dest string, or NULL on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) Concatenates all the strings in the sarray, preserving
|
||
|
* all white space.
|
||
|
* (2) If addnlflag != 0, adds either a '\n' or a ' ' after
|
||
|
* each substring.
|
||
|
* (3) This function was NOT implemented as:
|
||
|
* for (i = 0; i < n; i++)
|
||
|
* strcat(dest, sarrayGetString(sa, i, L_NOCOPY));
|
||
|
* Do you see why?
|
||
|
* </pre>
|
||
|
*/
|
||
|
char *
|
||
|
sarrayToString(SARRAY *sa,
|
||
|
l_int32 addnlflag)
|
||
|
{
|
||
|
PROCNAME("sarrayToString");
|
||
|
|
||
|
if (!sa)
|
||
|
return (char *)ERROR_PTR("sa not defined", procName, NULL);
|
||
|
|
||
|
return sarrayToStringRange(sa, 0, 0, addnlflag);
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief sarrayToStringRange()
|
||
|
*
|
||
|
* \param[in] sa string array
|
||
|
* \param[in] first index of first string to use; starts with 0
|
||
|
* \param[in] nstrings number of strings to append into the result; use
|
||
|
* 0 to append to the end of the sarray
|
||
|
* \param[in] addnlflag flag: 0 adds nothing to each substring
|
||
|
* 1 adds '\n' to each substring
|
||
|
* 2 adds ' ' to each substring
|
||
|
* \return dest string, or NULL on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) Concatenates the specified strings inthe sarray, preserving
|
||
|
* all white space.
|
||
|
* (2) If addnlflag != 0, adds either a '\n' or a ' ' after
|
||
|
* each substring.
|
||
|
* (3) If the sarray is empty, this returns a string with just
|
||
|
* the character corresponding to %addnlflag.
|
||
|
* </pre>
|
||
|
*/
|
||
|
char *
|
||
|
sarrayToStringRange(SARRAY *sa,
|
||
|
l_int32 first,
|
||
|
l_int32 nstrings,
|
||
|
l_int32 addnlflag)
|
||
|
{
|
||
|
char *dest, *src, *str;
|
||
|
l_int32 n, i, last, size, index, len;
|
||
|
|
||
|
PROCNAME("sarrayToStringRange");
|
||
|
|
||
|
if (!sa)
|
||
|
return (char *)ERROR_PTR("sa not defined", procName, NULL);
|
||
|
if (addnlflag != 0 && addnlflag != 1 && addnlflag != 2)
|
||
|
return (char *)ERROR_PTR("invalid addnlflag", procName, NULL);
|
||
|
|
||
|
n = sarrayGetCount(sa);
|
||
|
|
||
|
/* Empty sa; return char corresponding to addnlflag only */
|
||
|
if (n == 0) {
|
||
|
if (first == 0) {
|
||
|
if (addnlflag == 0)
|
||
|
return stringNew("");
|
||
|
if (addnlflag == 1)
|
||
|
return stringNew("\n");
|
||
|
else /* addnlflag == 2) */
|
||
|
return stringNew(" ");
|
||
|
} else {
|
||
|
return (char *)ERROR_PTR("first not valid", procName, NULL);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (first < 0 || first >= n)
|
||
|
return (char *)ERROR_PTR("first not valid", procName, NULL);
|
||
|
if (nstrings == 0 || (nstrings > n - first))
|
||
|
nstrings = n - first; /* no overflow */
|
||
|
last = first + nstrings - 1;
|
||
|
|
||
|
size = 0;
|
||
|
for (i = first; i <= last; i++) {
|
||
|
if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
|
||
|
return (char *)ERROR_PTR("str not found", procName, NULL);
|
||
|
size += strlen(str) + 2;
|
||
|
}
|
||
|
|
||
|
if ((dest = (char *)LEPT_CALLOC(size + 1, sizeof(char))) == NULL)
|
||
|
return (char *)ERROR_PTR("dest not made", procName, NULL);
|
||
|
|
||
|
index = 0;
|
||
|
for (i = first; i <= last; i++) {
|
||
|
src = sarrayGetString(sa, i, L_NOCOPY);
|
||
|
len = strlen(src);
|
||
|
memcpy(dest + index, src, len);
|
||
|
index += len;
|
||
|
if (addnlflag == 1) {
|
||
|
dest[index] = '\n';
|
||
|
index++;
|
||
|
} else if (addnlflag == 2) {
|
||
|
dest[index] = ' ';
|
||
|
index++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return dest;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*----------------------------------------------------------------------*
|
||
|
* Join 2 sarrays *
|
||
|
*----------------------------------------------------------------------*/
|
||
|
/*!
|
||
|
* \brief sarrayJoin()
|
||
|
*
|
||
|
* \param[in] sa1 to be added to
|
||
|
* \param[in] sa2 append to sa1
|
||
|
* \return 0 if OK, 1 on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) Copies of the strings in sarray2 are added to sarray1.
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_ok
|
||
|
sarrayJoin(SARRAY *sa1,
|
||
|
SARRAY *sa2)
|
||
|
{
|
||
|
char *str;
|
||
|
l_int32 n, i;
|
||
|
|
||
|
PROCNAME("sarrayJoin");
|
||
|
|
||
|
if (!sa1)
|
||
|
return ERROR_INT("sa1 not defined", procName, 1);
|
||
|
if (!sa2)
|
||
|
return ERROR_INT("sa2 not defined", procName, 1);
|
||
|
|
||
|
n = sarrayGetCount(sa2);
|
||
|
for (i = 0; i < n; i++) {
|
||
|
str = sarrayGetString(sa2, i, L_NOCOPY);
|
||
|
sarrayAddString(sa1, str, L_COPY);
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief sarrayAppendRange()
|
||
|
*
|
||
|
* \param[in] sa1 to be added to
|
||
|
* \param[in] sa2 append specified range of strings in sa2 to sa1
|
||
|
* \param[in] start index of first string of sa2 to append
|
||
|
* \param[in] end index of last string of sa2 to append;
|
||
|
* -1 to append to end of array
|
||
|
* \return 0 if OK, 1 on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) Copies of the strings in sarray2 are added to sarray1.
|
||
|
* (2) The [start ... end] range is truncated if necessary.
|
||
|
* (3) Use end == -1 to append to the end of sa2.
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_ok
|
||
|
sarrayAppendRange(SARRAY *sa1,
|
||
|
SARRAY *sa2,
|
||
|
l_int32 start,
|
||
|
l_int32 end)
|
||
|
{
|
||
|
char *str;
|
||
|
l_int32 n, i;
|
||
|
|
||
|
PROCNAME("sarrayAppendRange");
|
||
|
|
||
|
if (!sa1)
|
||
|
return ERROR_INT("sa1 not defined", procName, 1);
|
||
|
if (!sa2)
|
||
|
return ERROR_INT("sa2 not defined", procName, 1);
|
||
|
|
||
|
if (start < 0)
|
||
|
start = 0;
|
||
|
n = sarrayGetCount(sa2);
|
||
|
if (end < 0 || end >= n)
|
||
|
end = n - 1;
|
||
|
if (start > end)
|
||
|
return ERROR_INT("start > end", procName, 1);
|
||
|
|
||
|
for (i = start; i <= end; i++) {
|
||
|
str = sarrayGetString(sa2, i, L_NOCOPY);
|
||
|
sarrayAddString(sa1, str, L_COPY);
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*----------------------------------------------------------------------*
|
||
|
* Pad an sarray to be the same size as another sarray *
|
||
|
*----------------------------------------------------------------------*/
|
||
|
/*!
|
||
|
* \brief sarrayPadToSameSize()
|
||
|
*
|
||
|
* \param[in] sa1, sa2
|
||
|
* \param[in] padstring
|
||
|
* \return 0 if OK, 1 on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) If two sarrays have different size, this adds enough
|
||
|
* instances of %padstring to the smaller so that they are
|
||
|
* the same size. It is useful when two or more sarrays
|
||
|
* are being sequenced in parallel, and it is necessary to
|
||
|
* find a valid string at each index.
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_ok
|
||
|
sarrayPadToSameSize(SARRAY *sa1,
|
||
|
SARRAY *sa2,
|
||
|
const char *padstring)
|
||
|
{
|
||
|
l_int32 i, n1, n2;
|
||
|
|
||
|
PROCNAME("sarrayPadToSameSize");
|
||
|
|
||
|
if (!sa1 || !sa2)
|
||
|
return ERROR_INT("both sa1 and sa2 not defined", procName, 1);
|
||
|
|
||
|
n1 = sarrayGetCount(sa1);
|
||
|
n2 = sarrayGetCount(sa2);
|
||
|
if (n1 < n2) {
|
||
|
for (i = n1; i < n2; i++)
|
||
|
sarrayAddString(sa1, padstring, L_COPY);
|
||
|
} else if (n1 > n2) {
|
||
|
for (i = n2; i < n1; i++)
|
||
|
sarrayAddString(sa2, padstring, L_COPY);
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*----------------------------------------------------------------------*
|
||
|
* Convert word sarray to line sarray *
|
||
|
*----------------------------------------------------------------------*/
|
||
|
/*!
|
||
|
* \brief sarrayConvertWordsToLines()
|
||
|
*
|
||
|
* \param[in] sa sa of individual words
|
||
|
* \param[in] linesize max num of chars in each line
|
||
|
* \return saout sa of formatted lines, or NULL on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This is useful for re-typesetting text to a specific maximum
|
||
|
* line length. The individual words in the input sarray
|
||
|
* are concatenated into textlines. An input word string of zero
|
||
|
* length is taken to be a paragraph separator. Each time
|
||
|
* such a string is found, the current line is ended and
|
||
|
* a new line is also produced that contains just the
|
||
|
* string of zero length "". When the output sarray
|
||
|
* of lines is eventually converted to a string with newlines
|
||
|
* typically appended to each line string, the empty
|
||
|
* strings are just converted to newlines, producing the visible
|
||
|
* paragraph separation.
|
||
|
* (2) What happens when a word is larger than linesize?
|
||
|
* We write it out as a single line anyway! Words preceding
|
||
|
* or following this long word are placed on lines preceding
|
||
|
* or following the line with the long word. Why this choice?
|
||
|
* Long "words" found in text documents are typically URLs, and
|
||
|
* it's often desirable not to put newlines in the middle of a URL.
|
||
|
* The text display program e.g., text editor will typically
|
||
|
* wrap the long "word" to fit in the window.
|
||
|
* </pre>
|
||
|
*/
|
||
|
SARRAY *
|
||
|
sarrayConvertWordsToLines(SARRAY *sa,
|
||
|
l_int32 linesize)
|
||
|
{
|
||
|
char *wd, *strl;
|
||
|
char emptystring[] = "";
|
||
|
l_int32 n, i, len, totlen;
|
||
|
SARRAY *sal, *saout;
|
||
|
|
||
|
PROCNAME("sarrayConvertWordsToLines");
|
||
|
|
||
|
if (!sa)
|
||
|
return (SARRAY *)ERROR_PTR("sa not defined", procName, NULL);
|
||
|
|
||
|
saout = sarrayCreate(0);
|
||
|
n = sarrayGetCount(sa);
|
||
|
totlen = 0;
|
||
|
sal = NULL;
|
||
|
for (i = 0; i < n; i++) {
|
||
|
if (!sal)
|
||
|
sal = sarrayCreate(0);
|
||
|
wd = sarrayGetString(sa, i, L_NOCOPY);
|
||
|
len = strlen(wd);
|
||
|
if (len == 0) { /* end of paragraph: end line & insert blank line */
|
||
|
if (totlen > 0) {
|
||
|
strl = sarrayToString(sal, 2);
|
||
|
sarrayAddString(saout, strl, L_INSERT);
|
||
|
}
|
||
|
sarrayAddString(saout, emptystring, L_COPY);
|
||
|
sarrayDestroy(&sal);
|
||
|
totlen = 0;
|
||
|
} else if (totlen == 0 && len + 1 > linesize) { /* long word! */
|
||
|
sarrayAddString(saout, wd, L_COPY); /* copy to one line */
|
||
|
} else if (totlen + len + 1 > linesize) { /* end line & start new */
|
||
|
strl = sarrayToString(sal, 2);
|
||
|
sarrayAddString(saout, strl, L_INSERT);
|
||
|
sarrayDestroy(&sal);
|
||
|
sal = sarrayCreate(0);
|
||
|
sarrayAddString(sal, wd, L_COPY);
|
||
|
totlen = len + 1;
|
||
|
} else { /* add to current line */
|
||
|
sarrayAddString(sal, wd, L_COPY);
|
||
|
totlen += len + 1;
|
||
|
}
|
||
|
}
|
||
|
if (totlen > 0) { /* didn't end with blank line; output last line */
|
||
|
strl = sarrayToString(sal, 2);
|
||
|
sarrayAddString(saout, strl, L_INSERT);
|
||
|
sarrayDestroy(&sal);
|
||
|
}
|
||
|
|
||
|
return saout;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*----------------------------------------------------------------------*
|
||
|
* Split string on separator list *
|
||
|
*----------------------------------------------------------------------*/
|
||
|
/*
|
||
|
* \brief sarraySplitString()
|
||
|
*
|
||
|
* \param[in] sa to append to; typically empty initially
|
||
|
* \param[in] str string to split; not changed
|
||
|
* \param[in] separators characters that split input string
|
||
|
* \return 0 if OK, 1 on error.
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This uses strtokSafe(). See the notes there in utils.c.
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_int32
|
||
|
sarraySplitString(SARRAY *sa,
|
||
|
const char *str,
|
||
|
const char *separators)
|
||
|
{
|
||
|
char *cstr, *substr, *saveptr;
|
||
|
|
||
|
PROCNAME("sarraySplitString");
|
||
|
|
||
|
if (!sa)
|
||
|
return ERROR_INT("sa not defined", procName, 1);
|
||
|
if (!str)
|
||
|
return ERROR_INT("str not defined", procName, 1);
|
||
|
if (!separators)
|
||
|
return ERROR_INT("separators not defined", procName, 1);
|
||
|
|
||
|
cstr = stringNew(str); /* preserves const-ness of input str */
|
||
|
saveptr = NULL;
|
||
|
substr = strtokSafe(cstr, separators, &saveptr);
|
||
|
if (substr)
|
||
|
sarrayAddString(sa, substr, L_INSERT);
|
||
|
while ((substr = strtokSafe(NULL, separators, &saveptr)))
|
||
|
sarrayAddString(sa, substr, L_INSERT);
|
||
|
LEPT_FREE(cstr);
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*----------------------------------------------------------------------*
|
||
|
* Filter sarray *
|
||
|
*----------------------------------------------------------------------*/
|
||
|
/*!
|
||
|
* \brief sarraySelectBySubstring()
|
||
|
*
|
||
|
* \param[in] sain input sarray
|
||
|
* \param[in] substr [optional] substring for matching; can be NULL
|
||
|
* \return saout output sarray, filtered with substring or NULL on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This selects all strings in sain that have substr as a substring.
|
||
|
* Note that we can't use strncmp() because we're looking for
|
||
|
* a match to the substring anywhere within each filename.
|
||
|
* (2) If substr == NULL, returns a copy of the sarray.
|
||
|
* </pre>
|
||
|
*/
|
||
|
SARRAY *
|
||
|
sarraySelectBySubstring(SARRAY *sain,
|
||
|
const char *substr)
|
||
|
{
|
||
|
char *str;
|
||
|
l_int32 n, i, offset, found;
|
||
|
SARRAY *saout;
|
||
|
|
||
|
PROCNAME("sarraySelectBySubstring");
|
||
|
|
||
|
if (!sain)
|
||
|
return (SARRAY *)ERROR_PTR("sain not defined", procName, NULL);
|
||
|
|
||
|
n = sarrayGetCount(sain);
|
||
|
if (!substr || n == 0)
|
||
|
return sarrayCopy(sain);
|
||
|
|
||
|
saout = sarrayCreate(n);
|
||
|
for (i = 0; i < n; i++) {
|
||
|
str = sarrayGetString(sain, i, L_NOCOPY);
|
||
|
arrayFindSequence((l_uint8 *)str, strlen(str), (l_uint8 *)substr,
|
||
|
strlen(substr), &offset, &found);
|
||
|
if (found)
|
||
|
sarrayAddString(saout, str, L_COPY);
|
||
|
}
|
||
|
|
||
|
return saout;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief sarraySelectByRange()
|
||
|
*
|
||
|
* \param[in] sain input sarray
|
||
|
* \param[in] first index of first string to be selected
|
||
|
* \param[in] last index of last string to be selected;
|
||
|
* use 0 to go to the end of the sarray
|
||
|
* \return saout output sarray, or NULL on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This makes %saout consisting of copies of all strings in %sain
|
||
|
* in the index set [first ... last]. Use %last == 0 to get all
|
||
|
* strings from %first to the last string in the sarray.
|
||
|
* </pre>
|
||
|
*/
|
||
|
SARRAY *
|
||
|
sarraySelectByRange(SARRAY *sain,
|
||
|
l_int32 first,
|
||
|
l_int32 last)
|
||
|
{
|
||
|
char *str;
|
||
|
l_int32 n, i;
|
||
|
SARRAY *saout;
|
||
|
|
||
|
PROCNAME("sarraySelectByRange");
|
||
|
|
||
|
if (!sain)
|
||
|
return (SARRAY *)ERROR_PTR("sain not defined", procName, NULL);
|
||
|
if (first < 0) first = 0;
|
||
|
n = sarrayGetCount(sain);
|
||
|
if (last <= 0) last = n - 1;
|
||
|
if (last >= n) {
|
||
|
L_WARNING("last > n - 1; setting to n - 1\n", procName);
|
||
|
last = n - 1;
|
||
|
}
|
||
|
if (first > last)
|
||
|
return (SARRAY *)ERROR_PTR("first must be >= last", procName, NULL);
|
||
|
|
||
|
saout = sarrayCreate(0);
|
||
|
for (i = first; i <= last; i++) {
|
||
|
str = sarrayGetString(sain, i, L_COPY);
|
||
|
sarrayAddString(saout, str, L_INSERT);
|
||
|
}
|
||
|
|
||
|
return saout;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief sarrayParseRange()
|
||
|
*
|
||
|
* \param[in] sa input sarray
|
||
|
* \param[in] start index to start range search
|
||
|
* \param[out] pactualstart index of actual start; may be > 'start'
|
||
|
* \param[out] pend index of end
|
||
|
* \param[out] pnewstart index of start of next range
|
||
|
* \param[in] substr substring for matching at beginning of string
|
||
|
* \param[in] loc byte offset within the string for the pattern;
|
||
|
* use -1 if the location does not matter.
|
||
|
* \return 0 if valid range found; 1 otherwise
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This finds the range of the next set of strings in SA,
|
||
|
* beginning the search at 'start', that does NOT have
|
||
|
* the substring 'substr' either at the indicated location
|
||
|
* in the string or anywhere in the string. The input
|
||
|
* variable 'loc' is the specified offset within the string;
|
||
|
* use -1 to indicate 'anywhere in the string'.
|
||
|
* (2) Always check the return value to verify that a valid range
|
||
|
* was found.
|
||
|
* (3) If a valid range is not found, the values of actstart,
|
||
|
* end and newstart are all set to the size of sa.
|
||
|
* (4) If this is the last valid range, newstart returns the value n.
|
||
|
* In use, this should be tested before calling the function.
|
||
|
* (5) Usage example. To find all the valid ranges in a file
|
||
|
* where the invalid lines begin with two dashes, copy each
|
||
|
* line in the file to a string in an sarray, and do:
|
||
|
* start = 0;
|
||
|
* while (!sarrayParseRange(sa, start, &actstart, &end, &start,
|
||
|
* "--", 0))
|
||
|
* fprintf(stderr, "start = %d, end = %d\n", actstart, end);
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_int32
|
||
|
sarrayParseRange(SARRAY *sa,
|
||
|
l_int32 start,
|
||
|
l_int32 *pactualstart,
|
||
|
l_int32 *pend,
|
||
|
l_int32 *pnewstart,
|
||
|
const char *substr,
|
||
|
l_int32 loc)
|
||
|
{
|
||
|
char *str;
|
||
|
l_int32 n, i, offset, found;
|
||
|
|
||
|
PROCNAME("sarrayParseRange");
|
||
|
|
||
|
if (!sa)
|
||
|
return ERROR_INT("sa not defined", procName, 1);
|
||
|
if (!pactualstart || !pend || !pnewstart)
|
||
|
return ERROR_INT("not all range addresses defined", procName, 1);
|
||
|
n = sarrayGetCount(sa);
|
||
|
*pactualstart = *pend = *pnewstart = n;
|
||
|
if (!substr)
|
||
|
return ERROR_INT("substr not defined", procName, 1);
|
||
|
|
||
|
/* Look for the first string without the marker */
|
||
|
if (start < 0 || start >= n)
|
||
|
return 1;
|
||
|
for (i = start; i < n; i++) {
|
||
|
str = sarrayGetString(sa, i, L_NOCOPY);
|
||
|
arrayFindSequence((l_uint8 *)str, strlen(str), (l_uint8 *)substr,
|
||
|
strlen(substr), &offset, &found);
|
||
|
if (loc < 0) {
|
||
|
if (!found) break;
|
||
|
} else {
|
||
|
if (!found || offset != loc) break;
|
||
|
}
|
||
|
}
|
||
|
start = i;
|
||
|
if (i == n) /* couldn't get started */
|
||
|
return 1;
|
||
|
|
||
|
/* Look for the last string without the marker */
|
||
|
*pactualstart = start;
|
||
|
for (i = start + 1; i < n; i++) {
|
||
|
str = sarrayGetString(sa, i, L_NOCOPY);
|
||
|
arrayFindSequence((l_uint8 *)str, strlen(str), (l_uint8 *)substr,
|
||
|
strlen(substr), &offset, &found);
|
||
|
if (loc < 0) {
|
||
|
if (found) break;
|
||
|
} else {
|
||
|
if (found && offset == loc) break;
|
||
|
}
|
||
|
}
|
||
|
*pend = i - 1;
|
||
|
start = i;
|
||
|
if (i == n) /* no further range */
|
||
|
return 0;
|
||
|
|
||
|
/* Look for the first string after *pend without the marker.
|
||
|
* This will start the next run of strings, if it exists. */
|
||
|
for (i = start; i < n; i++) {
|
||
|
str = sarrayGetString(sa, i, L_NOCOPY);
|
||
|
arrayFindSequence((l_uint8 *)str, strlen(str), (l_uint8 *)substr,
|
||
|
strlen(substr), &offset, &found);
|
||
|
if (loc < 0) {
|
||
|
if (!found) break;
|
||
|
} else {
|
||
|
if (!found || offset != loc) break;
|
||
|
}
|
||
|
}
|
||
|
if (i < n)
|
||
|
*pnewstart = i;
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*----------------------------------------------------------------------*
|
||
|
* Serialize for I/O *
|
||
|
*----------------------------------------------------------------------*/
|
||
|
/*!
|
||
|
* \brief sarrayRead()
|
||
|
*
|
||
|
* \param[in] filename
|
||
|
* \return sarray, or NULL on error
|
||
|
*/
|
||
|
SARRAY *
|
||
|
sarrayRead(const char *filename)
|
||
|
{
|
||
|
FILE *fp;
|
||
|
SARRAY *sa;
|
||
|
|
||
|
PROCNAME("sarrayRead");
|
||
|
|
||
|
if (!filename)
|
||
|
return (SARRAY *)ERROR_PTR("filename not defined", procName, NULL);
|
||
|
|
||
|
if ((fp = fopenReadStream(filename)) == NULL)
|
||
|
return (SARRAY *)ERROR_PTR("stream not opened", procName, NULL);
|
||
|
sa = sarrayReadStream(fp);
|
||
|
fclose(fp);
|
||
|
if (!sa)
|
||
|
return (SARRAY *)ERROR_PTR("sa not read", procName, NULL);
|
||
|
return sa;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief sarrayReadStream()
|
||
|
*
|
||
|
* \param[in] fp file stream
|
||
|
* \return sarray, or NULL on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) We store the size of each string along with the string.
|
||
|
* The limit on the number of strings is 2^24.
|
||
|
* The limit on the size of any string is 2^30 bytes.
|
||
|
* (2) This allows a string to have embedded newlines. By reading
|
||
|
* the entire string, as determined by its size, we are
|
||
|
* not affected by any number of embedded newlines.
|
||
|
* </pre>
|
||
|
*/
|
||
|
SARRAY *
|
||
|
sarrayReadStream(FILE *fp)
|
||
|
{
|
||
|
char *stringbuf;
|
||
|
l_int32 i, n, size, index, bufsize, version, ignore, success;
|
||
|
SARRAY *sa;
|
||
|
|
||
|
PROCNAME("sarrayReadStream");
|
||
|
|
||
|
if (!fp)
|
||
|
return (SARRAY *)ERROR_PTR("stream not defined", procName, NULL);
|
||
|
|
||
|
if (fscanf(fp, "\nSarray Version %d\n", &version) != 1)
|
||
|
return (SARRAY *)ERROR_PTR("not an sarray file", procName, NULL);
|
||
|
if (version != SARRAY_VERSION_NUMBER)
|
||
|
return (SARRAY *)ERROR_PTR("invalid sarray version", procName, NULL);
|
||
|
if (fscanf(fp, "Number of strings = %d\n", &n) != 1)
|
||
|
return (SARRAY *)ERROR_PTR("error on # strings", procName, NULL);
|
||
|
if (n > (1 << 24))
|
||
|
return (SARRAY *)ERROR_PTR("more than 2^24 strings!", procName, NULL);
|
||
|
|
||
|
success = TRUE;
|
||
|
if ((sa = sarrayCreate(n)) == NULL)
|
||
|
return (SARRAY *)ERROR_PTR("sa not made", procName, NULL);
|
||
|
bufsize = 512 + 1;
|
||
|
stringbuf = (char *)LEPT_CALLOC(bufsize, sizeof(char));
|
||
|
|
||
|
for (i = 0; i < n; i++) {
|
||
|
/* Get the size of the stored string */
|
||
|
if ((fscanf(fp, "%d[%d]:", &index, &size) != 2) || (size > (1 << 30))) {
|
||
|
success = FALSE;
|
||
|
L_ERROR("error on string size\n", procName);
|
||
|
goto cleanup;
|
||
|
}
|
||
|
/* Expand the string buffer if necessary */
|
||
|
if (size > bufsize - 5) {
|
||
|
LEPT_FREE(stringbuf);
|
||
|
bufsize = (l_int32)(1.5 * size);
|
||
|
stringbuf = (char *)LEPT_CALLOC(bufsize, sizeof(char));
|
||
|
}
|
||
|
/* Read the stored string, plus leading spaces and trailing \n */
|
||
|
if (fread(stringbuf, 1, size + 3, fp) != size + 3) {
|
||
|
success = FALSE;
|
||
|
L_ERROR("error reading string\n", procName);
|
||
|
goto cleanup;
|
||
|
}
|
||
|
/* Remove the \n that was added by sarrayWriteStream() */
|
||
|
stringbuf[size + 2] = '\0';
|
||
|
/* Copy it in, skipping the 2 leading spaces */
|
||
|
sarrayAddString(sa, stringbuf + 2, L_COPY);
|
||
|
}
|
||
|
ignore = fscanf(fp, "\n");
|
||
|
|
||
|
cleanup:
|
||
|
LEPT_FREE(stringbuf);
|
||
|
if (!success) sarrayDestroy(&sa);
|
||
|
return sa;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief sarrayReadMem()
|
||
|
*
|
||
|
* \param[in] data serialization in ascii
|
||
|
* \param[in] size of data; can use strlen to get it
|
||
|
* \return sarray, or NULL on error
|
||
|
*/
|
||
|
SARRAY *
|
||
|
sarrayReadMem(const l_uint8 *data,
|
||
|
size_t size)
|
||
|
{
|
||
|
FILE *fp;
|
||
|
SARRAY *sa;
|
||
|
|
||
|
PROCNAME("sarrayReadMem");
|
||
|
|
||
|
if (!data)
|
||
|
return (SARRAY *)ERROR_PTR("data not defined", procName, NULL);
|
||
|
if ((fp = fopenReadFromMemory(data, size)) == NULL)
|
||
|
return (SARRAY *)ERROR_PTR("stream not opened", procName, NULL);
|
||
|
|
||
|
sa = sarrayReadStream(fp);
|
||
|
fclose(fp);
|
||
|
if (!sa) L_ERROR("sarray not read\n", procName);
|
||
|
return sa;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief sarrayWrite()
|
||
|
*
|
||
|
* \param[in] filename
|
||
|
* \param[in] sa string array
|
||
|
* \return 0 if OK; 1 on error
|
||
|
*/
|
||
|
l_ok
|
||
|
sarrayWrite(const char *filename,
|
||
|
SARRAY *sa)
|
||
|
{
|
||
|
l_int32 ret;
|
||
|
FILE *fp;
|
||
|
|
||
|
PROCNAME("sarrayWrite");
|
||
|
|
||
|
if (!filename)
|
||
|
return ERROR_INT("filename not defined", procName, 1);
|
||
|
if (!sa)
|
||
|
return ERROR_INT("sa not defined", procName, 1);
|
||
|
|
||
|
if ((fp = fopenWriteStream(filename, "w")) == NULL)
|
||
|
return ERROR_INT("stream not opened", procName, 1);
|
||
|
ret = sarrayWriteStream(fp, sa);
|
||
|
fclose(fp);
|
||
|
if (ret)
|
||
|
return ERROR_INT("sa not written to stream", procName, 1);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief sarrayWriteStream()
|
||
|
*
|
||
|
* \param[in] fp file stream
|
||
|
* \param[in] sa string array
|
||
|
* \return 0 if OK; 1 on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This appends a '\n' to each string, which is stripped
|
||
|
* off by sarrayReadStream().
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_ok
|
||
|
sarrayWriteStream(FILE *fp,
|
||
|
SARRAY *sa)
|
||
|
{
|
||
|
l_int32 i, n, len;
|
||
|
|
||
|
PROCNAME("sarrayWriteStream");
|
||
|
|
||
|
if (!fp)
|
||
|
return ERROR_INT("stream not defined", procName, 1);
|
||
|
if (!sa)
|
||
|
return ERROR_INT("sa not defined", procName, 1);
|
||
|
|
||
|
n = sarrayGetCount(sa);
|
||
|
fprintf(fp, "\nSarray Version %d\n", SARRAY_VERSION_NUMBER);
|
||
|
fprintf(fp, "Number of strings = %d\n", n);
|
||
|
for (i = 0; i < n; i++) {
|
||
|
len = strlen(sa->array[i]);
|
||
|
fprintf(fp, " %d[%d]: %s\n", i, len, sa->array[i]);
|
||
|
}
|
||
|
fprintf(fp, "\n");
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief sarrayWriteMem()
|
||
|
*
|
||
|
* \param[out] pdata data of serialized sarray; ascii
|
||
|
* \param[out] psize size of returned data
|
||
|
* \param[in] sa
|
||
|
* \return 0 if OK, 1 on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) Serializes a sarray in memory and puts the result in a buffer.
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_ok
|
||
|
sarrayWriteMem(l_uint8 **pdata,
|
||
|
size_t *psize,
|
||
|
SARRAY *sa)
|
||
|
{
|
||
|
l_int32 ret;
|
||
|
FILE *fp;
|
||
|
|
||
|
PROCNAME("sarrayWriteMem");
|
||
|
|
||
|
if (pdata) *pdata = NULL;
|
||
|
if (psize) *psize = 0;
|
||
|
if (!pdata)
|
||
|
return ERROR_INT("&data not defined", procName, 1);
|
||
|
if (!psize)
|
||
|
return ERROR_INT("&size not defined", procName, 1);
|
||
|
if (!sa)
|
||
|
return ERROR_INT("sa not defined", procName, 1);
|
||
|
|
||
|
#if HAVE_FMEMOPEN
|
||
|
if ((fp = open_memstream((char **)pdata, psize)) == NULL)
|
||
|
return ERROR_INT("stream not opened", procName, 1);
|
||
|
ret = sarrayWriteStream(fp, sa);
|
||
|
#else
|
||
|
L_INFO("work-around: writing to a temp file\n", procName);
|
||
|
#ifdef _WIN32
|
||
|
if ((fp = fopenWriteWinTempfile()) == NULL)
|
||
|
return ERROR_INT("tmpfile stream not opened", procName, 1);
|
||
|
#else
|
||
|
if ((fp = tmpfile()) == NULL)
|
||
|
return ERROR_INT("tmpfile stream not opened", procName, 1);
|
||
|
#endif /* _WIN32 */
|
||
|
ret = sarrayWriteStream(fp, sa);
|
||
|
rewind(fp);
|
||
|
*pdata = l_binaryReadStream(fp, psize);
|
||
|
#endif /* HAVE_FMEMOPEN */
|
||
|
fclose(fp);
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief sarrayAppend()
|
||
|
*
|
||
|
* \param[in] filename
|
||
|
* \param[in] sa
|
||
|
* \return 0 if OK; 1 on error
|
||
|
*/
|
||
|
l_ok
|
||
|
sarrayAppend(const char *filename,
|
||
|
SARRAY *sa)
|
||
|
{
|
||
|
FILE *fp;
|
||
|
|
||
|
PROCNAME("sarrayAppend");
|
||
|
|
||
|
if (!filename)
|
||
|
return ERROR_INT("filename not defined", procName, 1);
|
||
|
if (!sa)
|
||
|
return ERROR_INT("sa not defined", procName, 1);
|
||
|
|
||
|
if ((fp = fopenWriteStream(filename, "a")) == NULL)
|
||
|
return ERROR_INT("stream not opened", procName, 1);
|
||
|
if (sarrayWriteStream(fp, sa)) {
|
||
|
fclose(fp);
|
||
|
return ERROR_INT("sa not appended to stream", procName, 1);
|
||
|
}
|
||
|
|
||
|
fclose(fp);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*---------------------------------------------------------------------*
|
||
|
* Directory filenames *
|
||
|
*---------------------------------------------------------------------*/
|
||
|
/*!
|
||
|
* \brief getNumberedPathnamesInDirectory()
|
||
|
*
|
||
|
* \param[in] dirname directory name
|
||
|
* \param[in] substr [optional] substring filter on filenames; can be NULL
|
||
|
* \param[in] numpre number of characters in name before number
|
||
|
* \param[in] numpost number of characters in name after the number,
|
||
|
* up to a dot before an extension
|
||
|
* \param[in] maxnum only consider page numbers up to this value
|
||
|
* \return sarray of numbered pathnames, or NULL on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) Returns the full pathnames of the numbered filenames in
|
||
|
* the directory. The number in the filename is the index
|
||
|
* into the sarray. For indices for which there are no filenames,
|
||
|
* an empty string ("") is placed into the sarray.
|
||
|
* This makes reading numbered files very simple. For example,
|
||
|
* the image whose filename includes number N can be retrieved using
|
||
|
* pixReadIndexed(sa, N);
|
||
|
* (2) If %substr is not NULL, only filenames that contain
|
||
|
* the substring can be included. If %substr is NULL,
|
||
|
* all matching filenames are used.
|
||
|
* (3) If no numbered files are found, it returns an empty sarray,
|
||
|
* with no initialized strings.
|
||
|
* (4) It is assumed that the page number is contained within
|
||
|
* the basename (the filename without directory or extension).
|
||
|
* %numpre is the number of characters in the basename
|
||
|
* preceding the actual page number; %numpost is the number
|
||
|
* following the page number, up to either the end of the
|
||
|
* basename or a ".", whichever comes first.
|
||
|
* (5) This is useful when all filenames contain numbers that are
|
||
|
* not necessarily consecutive. 0-padding is not required.
|
||
|
* (6) To use a O(n) matching algorithm, the largest page number
|
||
|
* is found and two internal arrays of this size are created.
|
||
|
* This maximum is constrained not to exceed %maxsum,
|
||
|
* to make sure that an unrealistically large number is not
|
||
|
* accidentally used to determine the array sizes.
|
||
|
* </pre>
|
||
|
*/
|
||
|
SARRAY *
|
||
|
getNumberedPathnamesInDirectory(const char *dirname,
|
||
|
const char *substr,
|
||
|
l_int32 numpre,
|
||
|
l_int32 numpost,
|
||
|
l_int32 maxnum)
|
||
|
{
|
||
|
l_int32 nfiles;
|
||
|
SARRAY *sa, *saout;
|
||
|
|
||
|
PROCNAME("getNumberedPathnamesInDirectory");
|
||
|
|
||
|
if (!dirname)
|
||
|
return (SARRAY *)ERROR_PTR("dirname not defined", procName, NULL);
|
||
|
|
||
|
if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL)
|
||
|
return (SARRAY *)ERROR_PTR("sa not made", procName, NULL);
|
||
|
if ((nfiles = sarrayGetCount(sa)) == 0) {
|
||
|
sarrayDestroy(&sa);
|
||
|
return sarrayCreate(1);
|
||
|
}
|
||
|
|
||
|
saout = convertSortedToNumberedPathnames(sa, numpre, numpost, maxnum);
|
||
|
sarrayDestroy(&sa);
|
||
|
return saout;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief getSortedPathnamesInDirectory()
|
||
|
*
|
||
|
* \param[in] dirname directory name
|
||
|
* \param[in] substr [optional] substring filter on filenames; can be NULL
|
||
|
* \param[in] first 0-based
|
||
|
* \param[in] nfiles use 0 for all to the end
|
||
|
* \return sarray of sorted pathnames, or NULL on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) Use %substr to filter filenames in the directory. If
|
||
|
* %substr == NULL, this takes all files.
|
||
|
* (2) The files in the directory, after optional filtering by
|
||
|
* the substring, are lexically sorted in increasing order.
|
||
|
* Use %first and %nfiles to select a contiguous set of files.
|
||
|
* (3) The full pathnames are returned for the requested sequence.
|
||
|
* If no files are found after filtering, returns an empty sarray.
|
||
|
* </pre>
|
||
|
*/
|
||
|
SARRAY *
|
||
|
getSortedPathnamesInDirectory(const char *dirname,
|
||
|
const char *substr,
|
||
|
l_int32 first,
|
||
|
l_int32 nfiles)
|
||
|
{
|
||
|
char *fname, *fullname;
|
||
|
l_int32 i, n, last;
|
||
|
SARRAY *sa, *safiles, *saout;
|
||
|
|
||
|
PROCNAME("getSortedPathnamesInDirectory");
|
||
|
|
||
|
if (!dirname)
|
||
|
return (SARRAY *)ERROR_PTR("dirname not defined", procName, NULL);
|
||
|
|
||
|
if ((sa = getFilenamesInDirectory(dirname)) == NULL)
|
||
|
return (SARRAY *)ERROR_PTR("sa not made", procName, NULL);
|
||
|
safiles = sarraySelectBySubstring(sa, substr);
|
||
|
sarrayDestroy(&sa);
|
||
|
n = sarrayGetCount(safiles);
|
||
|
if (n == 0) {
|
||
|
L_WARNING("no files found\n", procName);
|
||
|
return safiles;
|
||
|
}
|
||
|
|
||
|
sarraySort(safiles, safiles, L_SORT_INCREASING);
|
||
|
|
||
|
first = L_MIN(L_MAX(first, 0), n - 1);
|
||
|
if (nfiles == 0)
|
||
|
nfiles = n - first;
|
||
|
last = L_MIN(first + nfiles - 1, n - 1);
|
||
|
|
||
|
saout = sarrayCreate(last - first + 1);
|
||
|
for (i = first; i <= last; i++) {
|
||
|
fname = sarrayGetString(safiles, i, L_NOCOPY);
|
||
|
fullname = pathJoin(dirname, fname);
|
||
|
sarrayAddString(saout, fullname, L_INSERT);
|
||
|
}
|
||
|
|
||
|
sarrayDestroy(&safiles);
|
||
|
return saout;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief convertSortedToNumberedPathnames()
|
||
|
*
|
||
|
* \param[in] sa sorted pathnames including zero-padded integers
|
||
|
* \param[in] numpre number of characters in name before number
|
||
|
* \param[in] numpost number of characters in name after the number,
|
||
|
* up to a dot before an extension
|
||
|
* \param[in] maxnum only consider page numbers up to this value
|
||
|
* \return sarray of numbered pathnames, or NULL on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) Typically, numpre = numpost = 0; e.g., when the filename
|
||
|
* just has a number followed by an optional extension.
|
||
|
* </pre>
|
||
|
*/
|
||
|
SARRAY *
|
||
|
convertSortedToNumberedPathnames(SARRAY *sa,
|
||
|
l_int32 numpre,
|
||
|
l_int32 numpost,
|
||
|
l_int32 maxnum)
|
||
|
{
|
||
|
char *fname, *str;
|
||
|
l_int32 i, nfiles, num, index;
|
||
|
SARRAY *saout;
|
||
|
|
||
|
PROCNAME("convertSortedToNumberedPathnames");
|
||
|
|
||
|
if (!sa)
|
||
|
return (SARRAY *)ERROR_PTR("sa not defined", procName, NULL);
|
||
|
if ((nfiles = sarrayGetCount(sa)) == 0)
|
||
|
return sarrayCreate(1);
|
||
|
|
||
|
/* Find the last file in the sorted array that has a number
|
||
|
* that (a) matches the count pattern and (b) does not
|
||
|
* exceed %maxnum. %maxnum sets an upper limit on the size
|
||
|
* of the sarray. */
|
||
|
num = 0;
|
||
|
for (i = nfiles - 1; i >= 0; i--) {
|
||
|
fname = sarrayGetString(sa, i, L_NOCOPY);
|
||
|
num = extractNumberFromFilename(fname, numpre, numpost);
|
||
|
if (num < 0) continue;
|
||
|
num = L_MIN(num + 1, maxnum);
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
if (num <= 0) /* none found */
|
||
|
return sarrayCreate(1);
|
||
|
|
||
|
/* Insert pathnames into the output sarray.
|
||
|
* Ignore numbers that are out of the range of sarray. */
|
||
|
saout = sarrayCreateInitialized(num, "");
|
||
|
for (i = 0; i < nfiles; i++) {
|
||
|
fname = sarrayGetString(sa, i, L_NOCOPY);
|
||
|
index = extractNumberFromFilename(fname, numpre, numpost);
|
||
|
if (index < 0 || index >= num) continue;
|
||
|
str = sarrayGetString(saout, index, L_NOCOPY);
|
||
|
if (str[0] != '\0') {
|
||
|
L_WARNING("\n Multiple files with same number: %d\n",
|
||
|
procName, index);
|
||
|
}
|
||
|
sarrayReplaceString(saout, index, fname, L_COPY);
|
||
|
}
|
||
|
|
||
|
return saout;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief getFilenamesInDirectory()
|
||
|
*
|
||
|
* \param[in] dirname directory name
|
||
|
* \return sarray of file names, or NULL on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) The versions compiled under unix and cygwin use the POSIX C
|
||
|
* library commands for handling directories. For windows,
|
||
|
* there is a separate implementation.
|
||
|
* (2) It returns an array of filename tails; i.e., only the part of
|
||
|
* the path after the last slash.
|
||
|
* (3) Use of the d_type field of dirent is not portable:
|
||
|
* "According to POSIX, the dirent structure contains a field
|
||
|
* char d_name[] of unspecified size, with at most NAME_MAX
|
||
|
* characters preceding the terminating null character. Use
|
||
|
* of other fields will harm the portability of your programs."
|
||
|
* (4) As a consequence of (3), we note several things:
|
||
|
* ~ MINGW doesn't have a d_type member.
|
||
|
* ~ Older versions of gcc (e.g., 2.95.3) return DT_UNKNOWN
|
||
|
* for d_type from all files.
|
||
|
* On these systems, this function will return directories
|
||
|
* (except for '.' and '..', which are eliminated using
|
||
|
* the d_name field).
|
||
|
* </pre>
|
||
|
*/
|
||
|
|
||
|
#ifndef _WIN32
|
||
|
|
||
|
SARRAY *
|
||
|
getFilenamesInDirectory(const char *dirname)
|
||
|
{
|
||
|
char dir[PATH_MAX + 1];
|
||
|
char *realdir, *stat_path, *ignore;
|
||
|
size_t size;
|
||
|
SARRAY *safiles;
|
||
|
DIR *pdir;
|
||
|
struct dirent *pdirentry;
|
||
|
int dfd, stat_ret;
|
||
|
struct stat st;
|
||
|
|
||
|
PROCNAME("getFilenamesInDirectory");
|
||
|
|
||
|
if (!dirname)
|
||
|
return (SARRAY *)ERROR_PTR("dirname not defined", procName, NULL);
|
||
|
|
||
|
/* It's nice to ignore directories. fstatat() works with relative
|
||
|
directory paths, but stat() requires using the absolute path.
|
||
|
Also, do not pass NULL as the second parameter to realpath();
|
||
|
use a buffer of sufficient size. */
|
||
|
ignore = realpath(dirname, dir); /* see note above */
|
||
|
realdir = genPathname(dir, NULL);
|
||
|
if ((pdir = opendir(realdir)) == NULL) {
|
||
|
LEPT_FREE(realdir);
|
||
|
return (SARRAY *)ERROR_PTR("pdir not opened", procName, NULL);
|
||
|
}
|
||
|
safiles = sarrayCreate(0);
|
||
|
dfd = dirfd(pdir);
|
||
|
while ((pdirentry = readdir(pdir))) {
|
||
|
#if HAVE_FSTATAT
|
||
|
stat_ret = fstatat(dfd, pdirentry->d_name, &st, 0);
|
||
|
#else
|
||
|
size = strlen(realdir) + strlen(pdirentry->d_name) + 2;
|
||
|
if (size > PATH_MAX) {
|
||
|
L_ERROR("size = %zu too large; skipping\n", procName, size);
|
||
|
continue;
|
||
|
}
|
||
|
stat_path = (char *)LEPT_CALLOC(size, 1);
|
||
|
snprintf(stat_path, size, "%s/%s", realdir, pdirentry->d_name);
|
||
|
stat_ret = stat(stat_path, &st);
|
||
|
LEPT_FREE(stat_path);
|
||
|
#endif
|
||
|
if (stat_ret == 0 && S_ISDIR(st.st_mode))
|
||
|
continue;
|
||
|
sarrayAddString(safiles, pdirentry->d_name, L_COPY);
|
||
|
}
|
||
|
closedir(pdir);
|
||
|
LEPT_FREE(realdir);
|
||
|
return safiles;
|
||
|
}
|
||
|
|
||
|
#else /* _WIN32 */
|
||
|
|
||
|
/* http://msdn2.microsoft.com/en-us/library/aa365200(VS.85).aspx */
|
||
|
#include <windows.h>
|
||
|
|
||
|
SARRAY *
|
||
|
getFilenamesInDirectory(const char *dirname)
|
||
|
{
|
||
|
char *pszDir;
|
||
|
char *realdir;
|
||
|
HANDLE hFind = INVALID_HANDLE_VALUE;
|
||
|
SARRAY *safiles;
|
||
|
WIN32_FIND_DATAA ffd;
|
||
|
|
||
|
PROCNAME("getFilenamesInDirectory");
|
||
|
|
||
|
if (!dirname)
|
||
|
return (SARRAY *)ERROR_PTR("dirname not defined", procName, NULL);
|
||
|
|
||
|
realdir = genPathname(dirname, NULL);
|
||
|
pszDir = stringJoin(realdir, "\\*");
|
||
|
LEPT_FREE(realdir);
|
||
|
|
||
|
if (strlen(pszDir) + 1 > MAX_PATH) {
|
||
|
LEPT_FREE(pszDir);
|
||
|
return (SARRAY *)ERROR_PTR("dirname is too long", procName, NULL);
|
||
|
}
|
||
|
|
||
|
if ((safiles = sarrayCreate(0)) == NULL) {
|
||
|
LEPT_FREE(pszDir);
|
||
|
return (SARRAY *)ERROR_PTR("safiles not made", procName, NULL);
|
||
|
}
|
||
|
|
||
|
hFind = FindFirstFileA(pszDir, &ffd);
|
||
|
if (INVALID_HANDLE_VALUE == hFind) {
|
||
|
sarrayDestroy(&safiles);
|
||
|
LEPT_FREE(pszDir);
|
||
|
return (SARRAY *)ERROR_PTR("hFind not opened", procName, NULL);
|
||
|
}
|
||
|
|
||
|
while (FindNextFileA(hFind, &ffd) != 0) {
|
||
|
if (ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) /* skip dirs */
|
||
|
continue;
|
||
|
convertSepCharsInPath(ffd.cFileName, UNIX_PATH_SEPCHAR);
|
||
|
sarrayAddString(safiles, ffd.cFileName, L_COPY);
|
||
|
}
|
||
|
|
||
|
FindClose(hFind);
|
||
|
LEPT_FREE(pszDir);
|
||
|
return safiles;
|
||
|
}
|
||
|
#endif /* _WIN32 */
|