mirror of http://192.168.1.51:8099/lmh188/twain3.0
3365 lines
101 KiB
C
3365 lines
101 KiB
C
|
/*====================================================================*
|
||
|
- Copyright (C) 2001 Leptonica. All rights reserved.
|
||
|
-
|
||
|
- Redistribution and use in source and binary forms, with or without
|
||
|
- modification, are permitted provided that the following conditions
|
||
|
- are met:
|
||
|
- 1. Redistributions of source code must retain the above copyright
|
||
|
- notice, this list of conditions and the following disclaimer.
|
||
|
- 2. Redistributions in binary form must reproduce the above
|
||
|
- copyright notice, this list of conditions and the following
|
||
|
- disclaimer in the documentation and/or other materials
|
||
|
- provided with the distribution.
|
||
|
-
|
||
|
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||
|
- ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||
|
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||
|
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
|
||
|
- CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||
|
- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||
|
- PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||
|
- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||
|
- OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||
|
- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||
|
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||
|
*====================================================================*/
|
||
|
|
||
|
/*!
|
||
|
* \file utils2.c
|
||
|
* <pre>
|
||
|
*
|
||
|
* ------------------------------------------
|
||
|
* This file has these utilities:
|
||
|
* - safe string operations
|
||
|
* - find/replace operations on strings
|
||
|
* - read/write between file and memory
|
||
|
* - multi-platform file and directory operations
|
||
|
* - file name operations
|
||
|
* ------------------------------------------
|
||
|
*
|
||
|
* Safe string procs
|
||
|
* char *stringNew()
|
||
|
* l_int32 stringCopy()
|
||
|
* l_int32 stringCopySegment()
|
||
|
* l_int32 stringReplace()
|
||
|
* l_int32 stringLength()
|
||
|
* l_int32 stringCat()
|
||
|
* char *stringConcatNew()
|
||
|
* char *stringJoin()
|
||
|
* l_int32 stringJoinIP()
|
||
|
* char *stringReverse()
|
||
|
* char *strtokSafe()
|
||
|
* l_int32 stringSplitOnToken()
|
||
|
*
|
||
|
* Find and replace string and array procs
|
||
|
* l_int32 stringCheckForChars()
|
||
|
* char *stringRemoveChars()
|
||
|
* char *stringReplaceEachSubstr()
|
||
|
* char *stringReplaceSubstr()
|
||
|
* L_DNA *stringFindEachSubstr()
|
||
|
* l_int32 stringFindSubstr()
|
||
|
* l_uint8 *arrayReplaceEachSequence()
|
||
|
* L_DNA *arrayFindEachSequence()
|
||
|
* l_int32 arrayFindSequence()
|
||
|
*
|
||
|
* Safe realloc
|
||
|
* void *reallocNew()
|
||
|
*
|
||
|
* Read and write between file and memory
|
||
|
* l_uint8 *l_binaryRead()
|
||
|
* l_uint8 *l_binaryReadStream()
|
||
|
* l_uint8 *l_binaryReadSelect()
|
||
|
* l_uint8 *l_binaryReadSelectStream()
|
||
|
* l_int32 l_binaryWrite()
|
||
|
* l_int32 nbytesInFile()
|
||
|
* l_int32 fnbytesInFile()
|
||
|
*
|
||
|
* Copy and compare in memory
|
||
|
* l_uint8 *l_binaryCopy()
|
||
|
* l_uint8 *l_binaryCompare()
|
||
|
*
|
||
|
* File copy operations
|
||
|
* l_int32 fileCopy()
|
||
|
* l_int32 fileConcatenate()
|
||
|
* l_int32 fileAppendString()
|
||
|
*
|
||
|
* Multi-platform functions for opening file streams
|
||
|
* FILE *fopenReadStream()
|
||
|
* FILE *fopenWriteStream()
|
||
|
* FILE *fopenReadFromMemory()
|
||
|
*
|
||
|
* Opening a windows tmpfile for writing
|
||
|
* FILE *fopenWriteWinTempfile()
|
||
|
*
|
||
|
* Multi-platform functions that avoid C-runtime boundary crossing
|
||
|
* with Windows DLLs
|
||
|
* FILE *lept_fopen()
|
||
|
* l_int32 lept_fclose()
|
||
|
* void lept_calloc()
|
||
|
* void lept_free()
|
||
|
*
|
||
|
* Multi-platform file system operations in temp directories
|
||
|
* l_int32 lept_mkdir()
|
||
|
* l_int32 lept_rmdir()
|
||
|
* l_int32 lept_direxists()
|
||
|
* l_int32 lept_mv()
|
||
|
* l_int32 lept_rm_match()
|
||
|
* l_int32 lept_rm()
|
||
|
* l_int32 lept_rmfile()
|
||
|
* l_int32 lept_cp()
|
||
|
*
|
||
|
* Special debug/test function for calling 'system'
|
||
|
* void callSystemDebug()
|
||
|
*
|
||
|
* General file name operations
|
||
|
* l_int32 splitPathAtDirectory()
|
||
|
* l_int32 splitPathAtExtension()
|
||
|
* char *pathJoin()
|
||
|
* char *appendSubdirs()
|
||
|
*
|
||
|
* Special file name operations
|
||
|
* l_int32 convertSepCharsInPath()
|
||
|
* char *genPathname()
|
||
|
* l_int32 makeTempDirname()
|
||
|
* l_int32 modifyTrailingSlash()
|
||
|
* char *l_makeTempFilename()
|
||
|
* l_int32 extractNumberFromFilename()
|
||
|
*
|
||
|
*
|
||
|
* Notes on multi-platform development
|
||
|
* -----------------------------------
|
||
|
* This is important:
|
||
|
* (1) With the exception of splitPathAtDirectory(), splitPathAtExtension()
|
||
|
* and genPathname(), all input pathnames must have unix separators.
|
||
|
* (2) On Windows, when you specify a read or write to "/tmp/...",
|
||
|
* the filename is rewritten to use the Windows temp directory:
|
||
|
* /tmp ==> [Temp]... (windows)
|
||
|
* (3) This filename rewrite, along with the conversion from unix
|
||
|
* to windows pathnames, happens in genPathname().
|
||
|
* (4) Use fopenReadStream() and fopenWriteStream() to open files,
|
||
|
* because these use genPathname() to find the platform-dependent
|
||
|
* filenames. Likewise for l_binaryRead() and l_binaryWrite().
|
||
|
* (5) For moving, copying and removing files and directories that are in
|
||
|
* subdirectories of /tmp, use the lept_*() file system shell wrappers:
|
||
|
* lept_mkdir(), lept_rmdir(), lept_mv(), lept_rm() and lept_cp().
|
||
|
* (6) Use the lept_*() C library wrappers. These work properly on
|
||
|
* Windows, where the same DLL must perform complementary operations
|
||
|
* on file streams (open/close) and heap memory (malloc/free):
|
||
|
* lept_fopen(), lept_fclose(), lept_calloc() and lept_free().
|
||
|
* (7) Why read and write files to temp directories?
|
||
|
* The library needs the ability to read and write ephemeral
|
||
|
* files to default places, both for generating debugging output
|
||
|
* and for supporting regression tests. Applications also need
|
||
|
* this ability for debugging.
|
||
|
* (8) Why do the pathname rewrite on Windows?
|
||
|
* The goal is to have the library, and programs using the library,
|
||
|
* run on multiple platforms without changes. The location of
|
||
|
* temporary files depends on the platform as well as the user's
|
||
|
* configuration. Temp files on Windows are in some directory
|
||
|
* not known a priori. To make everything work seamlessly on
|
||
|
* Windows, every time you open a file for reading or writing,
|
||
|
* use a special function such as fopenReadStream() or
|
||
|
* fopenWriteStream(); these call genPathname() to ensure that
|
||
|
* if it is a temp file, the correct path is used. To indicate
|
||
|
* that this is a temp file, the application is written with the
|
||
|
* root directory of the path in a canonical form: "/tmp".
|
||
|
* (9) Why is it that multi-platform directory functions like lept_mkdir()
|
||
|
* and lept_rmdir(), as well as associated file functions like
|
||
|
* lept_rm(), lept_mv() and lept_cp(), only work in the temp dir?
|
||
|
* These functions were designed to provide easy manipulation of
|
||
|
* temp files. The restriction to temp files is for safety -- to
|
||
|
* prevent an accidental deletion of important files. For example,
|
||
|
* lept_rmdir() first deletes all files in a specified subdirectory
|
||
|
* of temp, and then removes the directory.
|
||
|
*
|
||
|
* </pre>
|
||
|
*/
|
||
|
|
||
|
#ifdef HAVE_CONFIG_H
|
||
|
#include "config_auto.h"
|
||
|
#endif /* HAVE_CONFIG_H */
|
||
|
|
||
|
#ifdef _MSC_VER
|
||
|
#include <process.h>
|
||
|
#include <direct.h>
|
||
|
#define getcwd _getcwd /* fix MSVC warning */
|
||
|
#else
|
||
|
#include <unistd.h>
|
||
|
#endif /* _MSC_VER */
|
||
|
|
||
|
#ifdef _WIN32
|
||
|
#include <windows.h>
|
||
|
#include <fcntl.h> /* _O_CREAT, ... */
|
||
|
#include <io.h> /* _open */
|
||
|
#include <sys/stat.h> /* _S_IREAD, _S_IWRITE */
|
||
|
#else
|
||
|
#include <sys/stat.h> /* for stat, mkdir(2) */
|
||
|
#include <sys/types.h>
|
||
|
#endif
|
||
|
|
||
|
#ifdef OS_IOS
|
||
|
#include <unistd.h>
|
||
|
#include <errno.h>
|
||
|
#endif
|
||
|
|
||
|
#include <string.h>
|
||
|
#include <stddef.h>
|
||
|
#include "allheaders.h"
|
||
|
|
||
|
|
||
|
/*--------------------------------------------------------------------*
|
||
|
* Safe string operations *
|
||
|
*--------------------------------------------------------------------*/
|
||
|
/*!
|
||
|
* \brief stringNew()
|
||
|
*
|
||
|
* \param[in] src
|
||
|
* \return dest copy of %src string, or NULL on error
|
||
|
*/
|
||
|
char *
|
||
|
stringNew(const char *src)
|
||
|
{
|
||
|
l_int32 len;
|
||
|
char *dest;
|
||
|
|
||
|
PROCNAME("stringNew");
|
||
|
|
||
|
if (!src) {
|
||
|
L_WARNING("src not defined\n", procName);
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
len = strlen(src);
|
||
|
if ((dest = (char *)LEPT_CALLOC(len + 1, sizeof(char))) == NULL)
|
||
|
return (char *)ERROR_PTR("dest not made", procName, NULL);
|
||
|
|
||
|
stringCopy(dest, src, len);
|
||
|
return dest;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief stringCopy()
|
||
|
*
|
||
|
* \param[in] dest existing byte buffer
|
||
|
* \param[in] src string [optional] can be null
|
||
|
* \param[in] n max number of characters to copy
|
||
|
* \return 0 if OK, 1 on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) Relatively safe wrapper for strncpy, that checks the input,
|
||
|
* and does not complain if %src is null or %n < 1.
|
||
|
* If %n < 1, this is a no-op.
|
||
|
* (2) %dest needs to be at least %n bytes in size.
|
||
|
* (3) We don't call strncpy() because valgrind complains about
|
||
|
* use of uninitialized values.
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_ok
|
||
|
stringCopy(char *dest,
|
||
|
const char *src,
|
||
|
l_int32 n)
|
||
|
{
|
||
|
l_int32 i;
|
||
|
|
||
|
PROCNAME("stringCopy");
|
||
|
|
||
|
if (!dest)
|
||
|
return ERROR_INT("dest not defined", procName, 1);
|
||
|
if (!src || n < 1)
|
||
|
return 0;
|
||
|
|
||
|
/* Implementation of strncpy that valgrind doesn't complain about */
|
||
|
for (i = 0; i < n && src[i] != '\0'; i++)
|
||
|
dest[i] = src[i];
|
||
|
for (; i < n; i++)
|
||
|
dest[i] = '\0';
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief stringCopySegment()
|
||
|
*
|
||
|
*
|
||
|
* \param[in] src string
|
||
|
* \param[in] start byte position at start of segment
|
||
|
* \param[in] nbytes number of bytes in the segment; use 0 to go to end
|
||
|
* \return copy of segment, or NULL on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This is a variant of stringNew() that makes a new string
|
||
|
* from a segment of the input string. The segment is specified
|
||
|
* by the starting position and the number of bytes.
|
||
|
* (2) The start location %start must be within the string %src.
|
||
|
* (3) The copy is truncated to the end of the source string.
|
||
|
* Use %nbytes = 0 to copy to the end of %src.
|
||
|
* </pre>
|
||
|
*/
|
||
|
char *
|
||
|
stringCopySegment(const char *src,
|
||
|
l_int32 start,
|
||
|
l_int32 nbytes)
|
||
|
{
|
||
|
char *dest;
|
||
|
l_int32 len;
|
||
|
|
||
|
PROCNAME("stringCopySegment");
|
||
|
|
||
|
if (!src)
|
||
|
return (char *)ERROR_PTR("src not defined", procName, NULL);
|
||
|
len = strlen(src);
|
||
|
if (start < 0 || start > len - 1)
|
||
|
return (char *)ERROR_PTR("invalid start", procName, NULL);
|
||
|
if (nbytes <= 0) /* copy to the end */
|
||
|
nbytes = len - start;
|
||
|
if (start + nbytes > len) /* truncate to the end */
|
||
|
nbytes = len - start;
|
||
|
if ((dest = (char *)LEPT_CALLOC(nbytes + 1, sizeof(char))) == NULL)
|
||
|
return (char *)ERROR_PTR("dest not made", procName, NULL);
|
||
|
stringCopy(dest, src + start, nbytes);
|
||
|
return dest;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief stringReplace()
|
||
|
*
|
||
|
* \param[out] pdest string copy
|
||
|
* \param[in] src [optional] string; can be null
|
||
|
* \return 0 if OK; 1 on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) Frees any existing dest string
|
||
|
* (2) Puts a copy of src string in the dest
|
||
|
* (3) If either or both strings are null, does something reasonable.
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_ok
|
||
|
stringReplace(char **pdest,
|
||
|
const char *src)
|
||
|
{
|
||
|
PROCNAME("stringReplace");
|
||
|
|
||
|
if (!pdest)
|
||
|
return ERROR_INT("pdest not defined", procName, 1);
|
||
|
|
||
|
if (*pdest)
|
||
|
LEPT_FREE(*pdest);
|
||
|
|
||
|
if (src)
|
||
|
*pdest = stringNew(src);
|
||
|
else
|
||
|
*pdest = NULL;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief stringLength()
|
||
|
*
|
||
|
* \param[in] src string can be null or NULL-terminated string
|
||
|
* \param[in] size size of src buffer
|
||
|
* \return length of src in bytes.
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) Safe implementation of strlen that only checks size bytes
|
||
|
* for trailing NUL.
|
||
|
* (2) Valid returned string lengths are between 0 and size - 1.
|
||
|
* If size bytes are checked without finding a NUL byte, then
|
||
|
* an error is indicated by returning size.
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_int32
|
||
|
stringLength(const char *src,
|
||
|
size_t size)
|
||
|
{
|
||
|
l_int32 i;
|
||
|
|
||
|
PROCNAME("stringLength");
|
||
|
|
||
|
if (!src)
|
||
|
return ERROR_INT("src not defined", procName, 0);
|
||
|
if (size < 1)
|
||
|
return 0;
|
||
|
|
||
|
for (i = 0; i < size; i++) {
|
||
|
if (src[i] == '\0')
|
||
|
return i;
|
||
|
}
|
||
|
return size; /* didn't find a NUL byte */
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief stringCat()
|
||
|
*
|
||
|
* \param[in] dest null-terminated byte buffer
|
||
|
* \param[in] size size of dest
|
||
|
* \param[in] src string can be null or NULL-terminated string
|
||
|
* \return number of bytes added to dest; -1 on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) Alternative implementation of strncat, that checks the input,
|
||
|
* is easier to use (since the size of the dest buffer is specified
|
||
|
* rather than the number of bytes to copy), and does not complain
|
||
|
* if %src is null.
|
||
|
* (2) Never writes past end of dest.
|
||
|
* (3) If it can't append src (an error), it does nothing.
|
||
|
* (4) N.B. The order of 2nd and 3rd args is reversed from that in
|
||
|
* strncat, as in the Windows function strcat_s().
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_int32
|
||
|
stringCat(char *dest,
|
||
|
size_t size,
|
||
|
const char *src)
|
||
|
{
|
||
|
l_int32 i, n;
|
||
|
l_int32 lendest, lensrc;
|
||
|
|
||
|
PROCNAME("stringCat");
|
||
|
|
||
|
if (!dest)
|
||
|
return ERROR_INT("dest not defined", procName, -1);
|
||
|
if (size < 1)
|
||
|
return ERROR_INT("size < 1; too small", procName, -1);
|
||
|
if (!src)
|
||
|
return 0;
|
||
|
|
||
|
lendest = stringLength(dest, size);
|
||
|
if (lendest == size)
|
||
|
return ERROR_INT("no terminating nul byte", procName, -1);
|
||
|
lensrc = stringLength(src, size);
|
||
|
if (lensrc == 0)
|
||
|
return 0;
|
||
|
n = (lendest + lensrc > size - 1 ? size - lendest - 1 : lensrc);
|
||
|
if (n < 1)
|
||
|
return ERROR_INT("dest too small for append", procName, -1);
|
||
|
|
||
|
for (i = 0; i < n; i++)
|
||
|
dest[lendest + i] = src[i];
|
||
|
dest[lendest + n] = '\0';
|
||
|
return n;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief stringConcatNew()
|
||
|
*
|
||
|
* \param[in] first first string in list
|
||
|
* \param[in] ... NULL-terminated list of strings
|
||
|
* \return result new string concatenating the input strings, or
|
||
|
* NULL if first == NULL
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) The last arg in the list of strings must be NULL.
|
||
|
* (2) Caller must free the returned string.
|
||
|
* </pre>
|
||
|
*/
|
||
|
char *
|
||
|
stringConcatNew(const char *first, ...)
|
||
|
{
|
||
|
size_t len;
|
||
|
char *result, *ptr;
|
||
|
const char *arg;
|
||
|
va_list args;
|
||
|
|
||
|
if (!first) return NULL;
|
||
|
|
||
|
/* Find the length of the output string */
|
||
|
va_start(args, first);
|
||
|
len = strlen(first);
|
||
|
while ((arg = va_arg(args, const char *)) != NULL)
|
||
|
len += strlen(arg);
|
||
|
va_end(args);
|
||
|
result = (char *)LEPT_CALLOC(len + 1, sizeof(char));
|
||
|
|
||
|
/* Concatenate the args */
|
||
|
va_start(args, first);
|
||
|
ptr = result;
|
||
|
arg = first;
|
||
|
while (*arg)
|
||
|
*ptr++ = *arg++;
|
||
|
while ((arg = va_arg(args, const char *)) != NULL) {
|
||
|
while (*arg)
|
||
|
*ptr++ = *arg++;
|
||
|
}
|
||
|
va_end(args);
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief stringJoin()
|
||
|
*
|
||
|
* \param[in] src1 [optional] string; can be null
|
||
|
* \param[in] src2 [optional] string; can be null
|
||
|
* \return concatenated string, or NULL on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This is a safe version of strcat; it makes a new string.
|
||
|
* (2) It is not an error if either or both of the strings
|
||
|
* are empty, or if either or both of the pointers are null.
|
||
|
* </pre>
|
||
|
*/
|
||
|
char *
|
||
|
stringJoin(const char *src1,
|
||
|
const char *src2)
|
||
|
{
|
||
|
char *dest;
|
||
|
l_int32 srclen1, srclen2, destlen;
|
||
|
|
||
|
PROCNAME("stringJoin");
|
||
|
|
||
|
srclen1 = (src1) ? strlen(src1) : 0;
|
||
|
srclen2 = (src2) ? strlen(src2) : 0;
|
||
|
destlen = srclen1 + srclen2 + 3;
|
||
|
|
||
|
if ((dest = (char *)LEPT_CALLOC(destlen, sizeof(char))) == NULL)
|
||
|
return (char *)ERROR_PTR("calloc fail for dest", procName, NULL);
|
||
|
|
||
|
if (src1)
|
||
|
stringCopy(dest, src1, srclen1);
|
||
|
if (src2)
|
||
|
strncat(dest, src2, srclen2);
|
||
|
return dest;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief stringJoinIP()
|
||
|
*
|
||
|
* \param[in,out] psrc1 address of string src1; cannot be on the stack
|
||
|
* \param[in] src2 [optional] string; can be null
|
||
|
* \return 0 if OK, 1 on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This is a safe in-place version of strcat. The contents of
|
||
|
* src1 is replaced by the concatenation of src1 and src2.
|
||
|
* (2) It is not an error if either or both of the strings
|
||
|
* are empty (""), or if the pointers to the strings (*psrc1, src2)
|
||
|
* are null.
|
||
|
* (3) src1 should be initialized to null or an empty string
|
||
|
* before the first call. Use one of these:
|
||
|
* char *src1 = NULL;
|
||
|
* char *src1 = stringNew("");
|
||
|
* Then call with:
|
||
|
* stringJoinIP(&src1, src2);
|
||
|
* (4) This can also be implemented as a macro:
|
||
|
* \code
|
||
|
* #define stringJoinIP(src1, src2) \
|
||
|
* {tmpstr = stringJoin((src1),(src2)); \
|
||
|
* LEPT_FREE(src1); \
|
||
|
* (src1) = tmpstr;}
|
||
|
* \endcode
|
||
|
* (5) Another function to consider for joining many strings is
|
||
|
* stringConcatNew().
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_ok
|
||
|
stringJoinIP(char **psrc1,
|
||
|
const char *src2)
|
||
|
{
|
||
|
char *tmpstr;
|
||
|
|
||
|
PROCNAME("stringJoinIP");
|
||
|
|
||
|
if (!psrc1)
|
||
|
return ERROR_INT("&src1 not defined", procName, 1);
|
||
|
|
||
|
tmpstr = stringJoin(*psrc1, src2);
|
||
|
LEPT_FREE(*psrc1);
|
||
|
*psrc1 = tmpstr;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief stringReverse()
|
||
|
*
|
||
|
* \param[in] src string
|
||
|
* \return dest newly-allocated reversed string
|
||
|
*/
|
||
|
char *
|
||
|
stringReverse(const char *src)
|
||
|
{
|
||
|
char *dest;
|
||
|
l_int32 i, len;
|
||
|
|
||
|
PROCNAME("stringReverse");
|
||
|
|
||
|
if (!src)
|
||
|
return (char *)ERROR_PTR("src not defined", procName, NULL);
|
||
|
len = strlen(src);
|
||
|
if ((dest = (char *)LEPT_CALLOC(len + 1, sizeof(char))) == NULL)
|
||
|
return (char *)ERROR_PTR("calloc fail for dest", procName, NULL);
|
||
|
for (i = 0; i < len; i++)
|
||
|
dest[i] = src[len - 1 - i];
|
||
|
|
||
|
return dest;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief strtokSafe()
|
||
|
*
|
||
|
* \param[in] cstr input string to be sequentially parsed;
|
||
|
* use NULL after the first call
|
||
|
* \param[in] seps a string of character separators
|
||
|
* \param[out] psaveptr ptr to the next char after
|
||
|
* the last encountered separator
|
||
|
* \return substr a new string that is copied from the previous
|
||
|
* saveptr up to but not including the next
|
||
|
* separator character, or NULL if end of cstr.
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This is a thread-safe implementation of strtok.
|
||
|
* (2) It has the same interface as strtok_r.
|
||
|
* (3) It differs from strtok_r in usage in two respects:
|
||
|
* (a) the input string is not altered
|
||
|
* (b) each returned substring is newly allocated and must
|
||
|
* be freed after use.
|
||
|
* (4) Let me repeat that. This is "safe" because the input
|
||
|
* string is not altered and because each returned string
|
||
|
* is newly allocated on the heap.
|
||
|
* (5) It is here because, surprisingly, some C libraries don't
|
||
|
* include strtok_r.
|
||
|
* (6) Important usage points:
|
||
|
* ~ Input the string to be parsed on the first invocation.
|
||
|
* ~ Then input NULL after that; the value returned in saveptr
|
||
|
* is used in all subsequent calls.
|
||
|
* (7) This is only slightly slower than strtok_r.
|
||
|
* </pre>
|
||
|
*/
|
||
|
char *
|
||
|
strtokSafe(char *cstr,
|
||
|
const char *seps,
|
||
|
char **psaveptr)
|
||
|
{
|
||
|
char nextc;
|
||
|
char *start, *substr;
|
||
|
l_int32 istart, i, j, nchars;
|
||
|
|
||
|
PROCNAME("strtokSafe");
|
||
|
|
||
|
if (!seps)
|
||
|
return (char *)ERROR_PTR("seps not defined", procName, NULL);
|
||
|
if (!psaveptr)
|
||
|
return (char *)ERROR_PTR("&saveptr not defined", procName, NULL);
|
||
|
|
||
|
if (!cstr) {
|
||
|
start = *psaveptr;
|
||
|
} else {
|
||
|
start = cstr;
|
||
|
*psaveptr = NULL;
|
||
|
}
|
||
|
if (!start) /* nothing to do */
|
||
|
return NULL;
|
||
|
|
||
|
/* First time, scan for the first non-sep character */
|
||
|
istart = 0;
|
||
|
if (cstr) {
|
||
|
for (istart = 0;; istart++) {
|
||
|
if ((nextc = start[istart]) == '\0') {
|
||
|
*psaveptr = NULL; /* in case caller doesn't check ret value */
|
||
|
return NULL;
|
||
|
}
|
||
|
if (!strchr(seps, nextc))
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Scan through, looking for a sep character; if none is
|
||
|
* found, 'i' will be at the end of the string. */
|
||
|
for (i = istart;; i++) {
|
||
|
if ((nextc = start[i]) == '\0')
|
||
|
break;
|
||
|
if (strchr(seps, nextc))
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
/* Save the substring */
|
||
|
nchars = i - istart;
|
||
|
substr = (char *)LEPT_CALLOC(nchars + 1, sizeof(char));
|
||
|
stringCopy(substr, start + istart, nchars);
|
||
|
|
||
|
/* Look for the next non-sep character.
|
||
|
* If this is the last substring, return a null saveptr. */
|
||
|
for (j = i;; j++) {
|
||
|
if ((nextc = start[j]) == '\0') {
|
||
|
*psaveptr = NULL; /* no more non-sep characters */
|
||
|
break;
|
||
|
}
|
||
|
if (!strchr(seps, nextc)) {
|
||
|
*psaveptr = start + j; /* start here on next call */
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return substr;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief stringSplitOnToken()
|
||
|
*
|
||
|
* \param[in] cstr input string to be split; not altered
|
||
|
* \param[in] seps a string of character separators
|
||
|
* \param[out] phead ptr to copy of the input string, up to
|
||
|
* the first separator token encountered
|
||
|
* \param[out] ptail ptr to copy of the part of the input string
|
||
|
* starting with the first non-separator character
|
||
|
* that occurs after the first separator is found
|
||
|
* \return 0 if OK, 1 on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) The input string is not altered; all split parts are new strings.
|
||
|
* (2) The split occurs around the first consecutive sequence of
|
||
|
* tokens encountered.
|
||
|
* (3) The head goes from the beginning of the string up to
|
||
|
* but not including the first token found.
|
||
|
* (4) The tail contains the second part of the string, starting
|
||
|
* with the first char in that part that is NOT a token.
|
||
|
* (5) If no separator token is found, 'head' contains a copy
|
||
|
* of the input string and 'tail' is null.
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_ok
|
||
|
stringSplitOnToken(char *cstr,
|
||
|
const char *seps,
|
||
|
char **phead,
|
||
|
char **ptail)
|
||
|
{
|
||
|
char *saveptr;
|
||
|
|
||
|
PROCNAME("stringSplitOnToken");
|
||
|
|
||
|
if (!phead)
|
||
|
return ERROR_INT("&head not defined", procName, 1);
|
||
|
if (!ptail)
|
||
|
return ERROR_INT("&tail not defined", procName, 1);
|
||
|
*phead = *ptail = NULL;
|
||
|
if (!cstr)
|
||
|
return ERROR_INT("cstr not defined", procName, 1);
|
||
|
if (!seps)
|
||
|
return ERROR_INT("seps not defined", procName, 1);
|
||
|
|
||
|
*phead = strtokSafe(cstr, seps, &saveptr);
|
||
|
if (saveptr)
|
||
|
*ptail = stringNew(saveptr);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*--------------------------------------------------------------------*
|
||
|
* Find and replace procs *
|
||
|
*--------------------------------------------------------------------*/
|
||
|
/*!
|
||
|
* \brief stringCheckForChars()
|
||
|
*
|
||
|
* \param[in] src input string; can be of zero length
|
||
|
* \param[in] chars string of chars to be searched for in %src
|
||
|
* \param[out] pfound 1 if any characters are found; 0 otherwise
|
||
|
* \return 0 if OK, 1 on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This can be used to sanitize an operation by checking for
|
||
|
* special characters that don't belong in a string.
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_ok
|
||
|
stringCheckForChars(const char *src,
|
||
|
const char *chars,
|
||
|
l_int32 *pfound)
|
||
|
{
|
||
|
char ch;
|
||
|
l_int32 i, n;
|
||
|
|
||
|
PROCNAME("stringCheckForChars");
|
||
|
|
||
|
if (!pfound)
|
||
|
return ERROR_INT("&found not defined", procName, 1);
|
||
|
*pfound = FALSE;
|
||
|
if (!src || !chars)
|
||
|
return ERROR_INT("src and chars not both defined", procName, 1);
|
||
|
|
||
|
n = strlen(src);
|
||
|
for (i = 0; i < n; i++) {
|
||
|
ch = src[i];
|
||
|
if (strchr(chars, ch)) {
|
||
|
*pfound = TRUE;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief stringRemoveChars()
|
||
|
*
|
||
|
* \param[in] src input string; can be of zero length
|
||
|
* \param[in] remchars string of chars to be removed from src
|
||
|
* \return dest string with specified chars removed, or NULL on error
|
||
|
*/
|
||
|
char *
|
||
|
stringRemoveChars(const char *src,
|
||
|
const char *remchars)
|
||
|
{
|
||
|
char ch;
|
||
|
char *dest;
|
||
|
l_int32 nsrc, i, k;
|
||
|
|
||
|
PROCNAME("stringRemoveChars");
|
||
|
|
||
|
if (!src)
|
||
|
return (char *)ERROR_PTR("src not defined", procName, NULL);
|
||
|
if (!remchars)
|
||
|
return stringNew(src);
|
||
|
|
||
|
if ((dest = (char *)LEPT_CALLOC(strlen(src) + 1, sizeof(char))) == NULL)
|
||
|
return (char *)ERROR_PTR("dest not made", procName, NULL);
|
||
|
nsrc = strlen(src);
|
||
|
for (i = 0, k = 0; i < nsrc; i++) {
|
||
|
ch = src[i];
|
||
|
if (!strchr(remchars, ch))
|
||
|
dest[k++] = ch;
|
||
|
}
|
||
|
|
||
|
return dest;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief stringReplaceEachSubstr()
|
||
|
*
|
||
|
* \param[in] src input string; can be of zero length
|
||
|
* \param[in] sub1 substring to be replaced
|
||
|
* \param[in] sub2 substring to put in; can be ""
|
||
|
* \param[out] pcount [optional] the number of times that sub1
|
||
|
* is found in src; 0 if not found
|
||
|
* \return dest string with substring replaced, or NULL if the
|
||
|
* substring not found or on error.
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This is a wrapper for simple string substitution that uses
|
||
|
* the more general function arrayReplaceEachSequence().
|
||
|
* (2) This finds every non-overlapping occurrence of %sub1 in
|
||
|
* %src, and replaces it with %sub2. By "non-overlapping"
|
||
|
* we mean that after it finds each match, it removes the
|
||
|
* matching characters, replaces with the substitution string
|
||
|
* (if not empty), and continues. For example, if you replace
|
||
|
* 'aa' by 'X' in 'baaabbb', you find one match at position 1
|
||
|
* and return 'bXabbb'.
|
||
|
* (3) To only remove each instance of sub1, use "" for sub2
|
||
|
* (4) Returns a copy of %src if sub1 and sub2 are the same.
|
||
|
* (5) If the input %src is binary data that can have null characters,
|
||
|
* use arrayReplaceEachSequence() directly.
|
||
|
* </pre>
|
||
|
*/
|
||
|
char *
|
||
|
stringReplaceEachSubstr(const char *src,
|
||
|
const char *sub1,
|
||
|
const char *sub2,
|
||
|
l_int32 *pcount)
|
||
|
{
|
||
|
size_t datalen;
|
||
|
|
||
|
PROCNAME("stringReplaceEachSubstr");
|
||
|
|
||
|
if (pcount) *pcount = 0;
|
||
|
if (!src || !sub1 || !sub2)
|
||
|
return (char *)ERROR_PTR("src, sub1, sub2 not all defined",
|
||
|
procName, NULL);
|
||
|
|
||
|
if (strlen(sub2) > 0) {
|
||
|
return (char *)arrayReplaceEachSequence(
|
||
|
(const l_uint8 *)src, strlen(src),
|
||
|
(const l_uint8 *)sub1, strlen(sub1),
|
||
|
(const l_uint8 *)sub2, strlen(sub2),
|
||
|
&datalen, pcount);
|
||
|
} else { /* empty replacement string; removal only */
|
||
|
return (char *)arrayReplaceEachSequence(
|
||
|
(const l_uint8 *)src, strlen(src),
|
||
|
(const l_uint8 *)sub1, strlen(sub1),
|
||
|
NULL, 0, &datalen, pcount);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief stringReplaceSubstr()
|
||
|
*
|
||
|
* \param[in] src input string; can be of zero length
|
||
|
* \param[in] sub1 substring to be replaced
|
||
|
* \param[in] sub2 substring to put in; can be ""
|
||
|
* \param[in,out] ploc [optional] input start location for search;
|
||
|
* returns the loc after replacement
|
||
|
* \param[out] pfound [optional] 1 if sub1 is found; 0 otherwise
|
||
|
* \return dest string with substring replaced, or NULL on error.
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) Replaces the first instance.
|
||
|
* (2) To remove sub1 without replacement, use "" for sub2.
|
||
|
* (3) Returns a copy of %src if either no instance of %sub1 is found,
|
||
|
* or if %sub1 and %sub2 are the same.
|
||
|
* (4) If %ploc == NULL, the search will start at the beginning of %src.
|
||
|
* If %ploc != NULL, *ploc must be initialized to the byte offset
|
||
|
* within %src from which the search starts. To search the
|
||
|
* string from the beginning, set %loc = 0 and input &loc.
|
||
|
* After finding %sub1 and replacing it with %sub2, %loc will be
|
||
|
* returned as the next position after %sub2 in the output string.
|
||
|
* (5) Note that the output string also includes all the characters
|
||
|
* from the input string that occur after the single substitution.
|
||
|
* </pre>
|
||
|
*/
|
||
|
char *
|
||
|
stringReplaceSubstr(const char *src,
|
||
|
const char *sub1,
|
||
|
const char *sub2,
|
||
|
l_int32 *ploc,
|
||
|
l_int32 *pfound)
|
||
|
{
|
||
|
const char *ptr;
|
||
|
char *dest;
|
||
|
l_int32 nsrc, nsub1, nsub2, len, npre, loc;
|
||
|
|
||
|
PROCNAME("stringReplaceSubstr");
|
||
|
|
||
|
if (pfound) *pfound = 0;
|
||
|
if (!src || !sub1 || !sub2)
|
||
|
return (char *)ERROR_PTR("src, sub1, sub2 not all defined",
|
||
|
procName, NULL);
|
||
|
|
||
|
if (ploc)
|
||
|
loc = *ploc;
|
||
|
else
|
||
|
loc = 0;
|
||
|
if (!strcmp(sub1, sub2))
|
||
|
return stringNew(src);
|
||
|
if ((ptr = strstr(src + loc, sub1)) == NULL)
|
||
|
return stringNew(src);
|
||
|
if (pfound) *pfound = 1;
|
||
|
|
||
|
nsrc = strlen(src);
|
||
|
nsub1 = strlen(sub1);
|
||
|
nsub2 = strlen(sub2);
|
||
|
len = nsrc + nsub2 - nsub1;
|
||
|
if ((dest = (char *)LEPT_CALLOC(len + 1, sizeof(char))) == NULL)
|
||
|
return (char *)ERROR_PTR("dest not made", procName, NULL);
|
||
|
npre = ptr - src;
|
||
|
memcpy(dest, src, npre);
|
||
|
strcpy(dest + npre, sub2);
|
||
|
strcpy(dest + npre + nsub2, ptr + nsub1);
|
||
|
if (ploc) *ploc = npre + nsub2;
|
||
|
return dest;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief stringFindEachSubstr()
|
||
|
*
|
||
|
* \param[in] src input string; can be of zero length
|
||
|
* \param[in] sub substring to be searched for
|
||
|
* \return dna of offsets where the sequence is found, or NULL if
|
||
|
* none are found or on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This finds every non-overlapping occurrence in %src of %sub.
|
||
|
* After it finds each match, it moves forward in %src by the length
|
||
|
* of %sub before continuing the search. So for example,
|
||
|
* if you search for the sequence 'aa' in the data 'baaabbb',
|
||
|
* you find one match at position 1.
|
||
|
|
||
|
* </pre>
|
||
|
*/
|
||
|
L_DNA *
|
||
|
stringFindEachSubstr(const char *src,
|
||
|
const char *sub)
|
||
|
{
|
||
|
PROCNAME("stringFindEachSubstr");
|
||
|
|
||
|
if (!src || !sub)
|
||
|
return (L_DNA *)ERROR_PTR("src, sub not both defined", procName, NULL);
|
||
|
|
||
|
return arrayFindEachSequence((const l_uint8 *)src, strlen(src),
|
||
|
(const l_uint8 *)sub, strlen(sub));
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief stringFindSubstr()
|
||
|
*
|
||
|
* \param[in] src input string; can be of zero length
|
||
|
* \param[in] sub substring to be searched for; must not be empty
|
||
|
* \param[out] ploc [optional] location of substring in src
|
||
|
* \return 1 if found; 0 if not found or on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This is a wrapper around strstr(). It finds the first
|
||
|
* instance of %sub in %src. If the substring is not found
|
||
|
* and the location is returned, it has the value -1.
|
||
|
* (2) Both %src and %sub must be defined, and %sub must have
|
||
|
* length of at least 1.
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_int32
|
||
|
stringFindSubstr(const char *src,
|
||
|
const char *sub,
|
||
|
l_int32 *ploc)
|
||
|
{
|
||
|
const char *ptr;
|
||
|
|
||
|
PROCNAME("stringFindSubstr");
|
||
|
|
||
|
if (ploc) *ploc = -1;
|
||
|
if (!src || !sub)
|
||
|
return ERROR_INT("src and sub not both defined", procName, 0);
|
||
|
if (strlen(sub) == 0)
|
||
|
return ERROR_INT("substring length 0", procName, 0);
|
||
|
if (strlen(src) == 0)
|
||
|
return 0;
|
||
|
|
||
|
if ((ptr = strstr(src, sub)) == NULL) /* not found */
|
||
|
return 0;
|
||
|
|
||
|
if (ploc)
|
||
|
*ploc = ptr - src;
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief arrayReplaceEachSequence()
|
||
|
*
|
||
|
* \param[in] datas source byte array
|
||
|
* \param[in] dataslen length of source data, in bytes
|
||
|
* \param[in] seq subarray of bytes to find in source data
|
||
|
* \param[in] seqlen length of subarray, in bytes
|
||
|
* \param[in] newseq replacement subarray; can be null
|
||
|
* \param[in] newseqlen length of replacement subarray, in bytes
|
||
|
* \param[out] pdatadlen length of dest byte array, in bytes
|
||
|
* \param[out] pcount [optional] the number of times that sub1
|
||
|
* is found in src; 0 if not found
|
||
|
* \return datad with all all subarrays replaced (or removed)
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) The byte arrays %datas, %seq and %newseq are not C strings,
|
||
|
* because they can contain null bytes. Therefore, for each
|
||
|
* we must give the length of the array.
|
||
|
* (2) If %newseq == NULL, this just removes all instances of %seq.
|
||
|
* Otherwise, it replaces every non-overlapping occurrence of
|
||
|
* %seq in %datas with %newseq. A new array %datad and its
|
||
|
* size are returned. See arrayFindEachSequence() for more
|
||
|
* details on finding non-overlapping occurrences.
|
||
|
* (3) If no instances of %seq are found, this returns a copy of %datas.
|
||
|
* (4) The returned %datad is null terminated.
|
||
|
* (5) Can use stringReplaceEachSubstr() if using C strings.
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_uint8 *
|
||
|
arrayReplaceEachSequence(const l_uint8 *datas,
|
||
|
size_t dataslen,
|
||
|
const l_uint8 *seq,
|
||
|
size_t seqlen,
|
||
|
const l_uint8 *newseq,
|
||
|
size_t newseqlen,
|
||
|
size_t *pdatadlen,
|
||
|
l_int32 *pcount)
|
||
|
{
|
||
|
l_uint8 *datad;
|
||
|
size_t newsize;
|
||
|
l_int32 n, i, j, di, si, index, incr;
|
||
|
L_DNA *da;
|
||
|
|
||
|
PROCNAME("arrayReplaceEachSequence");
|
||
|
|
||
|
if (pcount) *pcount = 0;
|
||
|
if (!datas || !seq)
|
||
|
return (l_uint8 *)ERROR_PTR("datas & seq not both defined",
|
||
|
procName, NULL);
|
||
|
if (!pdatadlen)
|
||
|
return (l_uint8 *)ERROR_PTR("&datadlen not defined", procName, NULL);
|
||
|
*pdatadlen = 0;
|
||
|
|
||
|
/* Identify the locations of the sequence. If there are none,
|
||
|
* return a copy of %datas. */
|
||
|
if ((da = arrayFindEachSequence(datas, dataslen, seq, seqlen)) == NULL) {
|
||
|
*pdatadlen = dataslen;
|
||
|
return l_binaryCopy(datas, dataslen);
|
||
|
}
|
||
|
|
||
|
/* Allocate the output data; insure null termination */
|
||
|
n = l_dnaGetCount(da);
|
||
|
if (pcount) *pcount = n;
|
||
|
if (!newseq) newseqlen = 0;
|
||
|
newsize = dataslen + n * (newseqlen - seqlen) + 4;
|
||
|
if ((datad = (l_uint8 *)LEPT_CALLOC(newsize, sizeof(l_uint8))) == NULL) {
|
||
|
l_dnaDestroy(&da);
|
||
|
return (l_uint8 *)ERROR_PTR("datad not made", procName, NULL);
|
||
|
}
|
||
|
|
||
|
/* Replace each sequence instance with a new sequence */
|
||
|
l_dnaGetIValue(da, 0, &si);
|
||
|
for (i = 0, di = 0, index = 0; i < dataslen; i++) {
|
||
|
if (i == si) {
|
||
|
index++;
|
||
|
if (index < n) {
|
||
|
l_dnaGetIValue(da, index, &si);
|
||
|
incr = L_MIN(seqlen, si - i); /* amount to remove from datas */
|
||
|
} else {
|
||
|
incr = seqlen;
|
||
|
}
|
||
|
i += incr - 1; /* jump over the matched sequence in datas */
|
||
|
if (newseq) { /* add new sequence to datad */
|
||
|
for (j = 0; j < newseqlen; j++)
|
||
|
datad[di++] = newseq[j];
|
||
|
}
|
||
|
} else {
|
||
|
datad[di++] = datas[i];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
*pdatadlen = di;
|
||
|
l_dnaDestroy(&da);
|
||
|
return datad;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief arrayFindEachSequence()
|
||
|
*
|
||
|
* \param[in] data byte array
|
||
|
* \param[in] datalen length of data, in bytes
|
||
|
* \param[in] sequence subarray of bytes to find in data
|
||
|
* \param[in] seqlen length of sequence, in bytes
|
||
|
* \return dna of offsets where the sequence is found, or NULL if
|
||
|
* none are found or on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) The byte arrays %data and %sequence are not C strings,
|
||
|
* because they can contain null bytes. Therefore, for each
|
||
|
* we must give the length of the array.
|
||
|
* (2) This finds every non-overlapping occurrence in %data of %sequence.
|
||
|
* After it finds each match, it moves forward by the length
|
||
|
* of the sequence before continuing the search. So for example,
|
||
|
* if you search for the sequence 'aa' in the data 'baaabbb',
|
||
|
* you find one match at position 1.
|
||
|
* </pre>
|
||
|
*/
|
||
|
L_DNA *
|
||
|
arrayFindEachSequence(const l_uint8 *data,
|
||
|
size_t datalen,
|
||
|
const l_uint8 *sequence,
|
||
|
size_t seqlen)
|
||
|
{
|
||
|
l_int32 start, offset, realoffset, found;
|
||
|
L_DNA *da;
|
||
|
|
||
|
PROCNAME("arrayFindEachSequence");
|
||
|
|
||
|
if (!data || !sequence)
|
||
|
return (L_DNA *)ERROR_PTR("data & sequence not both defined",
|
||
|
procName, NULL);
|
||
|
|
||
|
da = l_dnaCreate(0);
|
||
|
start = 0;
|
||
|
while (1) {
|
||
|
arrayFindSequence(data + start, datalen - start, sequence, seqlen,
|
||
|
&offset, &found);
|
||
|
if (found == FALSE)
|
||
|
break;
|
||
|
|
||
|
realoffset = start + offset;
|
||
|
l_dnaAddNumber(da, realoffset);
|
||
|
start = realoffset + seqlen;
|
||
|
if (start >= datalen)
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
if (l_dnaGetCount(da) == 0)
|
||
|
l_dnaDestroy(&da);
|
||
|
return da;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief arrayFindSequence()
|
||
|
*
|
||
|
* \param[in] data byte array
|
||
|
* \param[in] datalen length of data, in bytes
|
||
|
* \param[in] sequence subarray of bytes to find in data
|
||
|
* \param[in] seqlen length of sequence, in bytes
|
||
|
* \param[out] poffset offset from beginning of
|
||
|
* data where the sequence begins
|
||
|
* \param[out] pfound 1 if sequence is found; 0 otherwise
|
||
|
* \return 0 if OK, 1 on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) The byte arrays 'data' and 'sequence' are not C strings,
|
||
|
* because they can contain null bytes. Therefore, for each
|
||
|
* we must give the length of the array.
|
||
|
* (2) This searches for the first occurrence in %data of %sequence,
|
||
|
* which consists of %seqlen bytes. The parameter %seqlen
|
||
|
* must not exceed the actual length of the %sequence byte array.
|
||
|
* (3) If the sequence is not found, the offset will be 0, so you
|
||
|
* must check %found.
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_ok
|
||
|
arrayFindSequence(const l_uint8 *data,
|
||
|
size_t datalen,
|
||
|
const l_uint8 *sequence,
|
||
|
size_t seqlen,
|
||
|
l_int32 *poffset,
|
||
|
l_int32 *pfound)
|
||
|
{
|
||
|
l_int32 i, j, found, lastpos;
|
||
|
|
||
|
PROCNAME("arrayFindSequence");
|
||
|
|
||
|
if (poffset) *poffset = 0;
|
||
|
if (pfound) *pfound = FALSE;
|
||
|
if (!data || !sequence)
|
||
|
return ERROR_INT("data & sequence not both defined", procName, 1);
|
||
|
if (!poffset || !pfound)
|
||
|
return ERROR_INT("&offset and &found not defined", procName, 1);
|
||
|
|
||
|
lastpos = datalen - seqlen + 1;
|
||
|
found = FALSE;
|
||
|
for (i = 0; i < lastpos; i++) {
|
||
|
for (j = 0; j < seqlen; j++) {
|
||
|
if (data[i + j] != sequence[j])
|
||
|
break;
|
||
|
if (j == seqlen - 1)
|
||
|
found = TRUE;
|
||
|
}
|
||
|
if (found == TRUE)
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
if (found == TRUE) {
|
||
|
*poffset = i;
|
||
|
*pfound = TRUE;
|
||
|
}
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*--------------------------------------------------------------------*
|
||
|
* Safe realloc *
|
||
|
*--------------------------------------------------------------------*/
|
||
|
/*!
|
||
|
* \brief reallocNew()
|
||
|
*
|
||
|
* \param[in,out] pindata nulls indata before reallocing
|
||
|
* \param[in] oldsize size of input data to be copied, in bytes
|
||
|
* \param[in] newsize size of buffer to be reallocated in bytes
|
||
|
* \return ptr to new data, or NULL on error
|
||
|
*
|
||
|
* Action: !N.B. 3) and (4!
|
||
|
* 1 Allocates memory, initialized to 0
|
||
|
* 2 Copies as much of the input data as possible
|
||
|
* to the new block, truncating the copy if necessary
|
||
|
* 3 Frees the input data
|
||
|
* 4 Zeroes the input data ptr
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) If newsize <=0, just frees input data and nulls ptr
|
||
|
* (2) If input data is null, just callocs new memory
|
||
|
* (3) This differs from realloc in that it always allocates
|
||
|
* new memory (if newsize > 0) and initializes it to 0,
|
||
|
* it requires the amount of old data to be copied,
|
||
|
* and it takes the address of the input ptr and
|
||
|
* nulls the handle.
|
||
|
* </pre>
|
||
|
*/
|
||
|
void *
|
||
|
reallocNew(void **pindata,
|
||
|
l_int32 oldsize,
|
||
|
l_int32 newsize)
|
||
|
{
|
||
|
l_int32 minsize;
|
||
|
void *indata;
|
||
|
void *newdata;
|
||
|
|
||
|
PROCNAME("reallocNew");
|
||
|
|
||
|
if (!pindata)
|
||
|
return ERROR_PTR("input data not defined", procName, NULL);
|
||
|
indata = *pindata;
|
||
|
|
||
|
if (newsize <= 0) { /* nonstandard usage */
|
||
|
if (indata) {
|
||
|
LEPT_FREE(indata);
|
||
|
*pindata = NULL;
|
||
|
}
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
if (!indata) { /* nonstandard usage */
|
||
|
if ((newdata = (void *)LEPT_CALLOC(1, newsize)) == NULL)
|
||
|
return ERROR_PTR("newdata not made", procName, NULL);
|
||
|
return newdata;
|
||
|
}
|
||
|
|
||
|
/* Standard usage */
|
||
|
if ((newdata = (void *)LEPT_CALLOC(1, newsize)) == NULL)
|
||
|
return ERROR_PTR("newdata not made", procName, NULL);
|
||
|
minsize = L_MIN(oldsize, newsize);
|
||
|
memcpy(newdata, indata, minsize);
|
||
|
LEPT_FREE(indata);
|
||
|
*pindata = NULL;
|
||
|
|
||
|
return newdata;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*--------------------------------------------------------------------*
|
||
|
* Read and write between file and memory *
|
||
|
*--------------------------------------------------------------------*/
|
||
|
/*!
|
||
|
* \brief l_binaryRead()
|
||
|
*
|
||
|
* \param[in] filename
|
||
|
* \param[out] pnbytes number of bytes read
|
||
|
* \return data, or NULL on error
|
||
|
*/
|
||
|
l_uint8 *
|
||
|
l_binaryRead(const char *filename,
|
||
|
size_t *pnbytes)
|
||
|
{
|
||
|
l_uint8 *data;
|
||
|
FILE *fp;
|
||
|
|
||
|
PROCNAME("l_binaryRead");
|
||
|
|
||
|
if (!pnbytes)
|
||
|
return (l_uint8 *)ERROR_PTR("pnbytes not defined", procName, NULL);
|
||
|
*pnbytes = 0;
|
||
|
if (!filename)
|
||
|
return (l_uint8 *)ERROR_PTR("filename not defined", procName, NULL);
|
||
|
|
||
|
if ((fp = fopenReadStream(filename)) == NULL)
|
||
|
return (l_uint8 *)ERROR_PTR("file stream not opened", procName, NULL);
|
||
|
data = l_binaryReadStream(fp, pnbytes);
|
||
|
fclose(fp);
|
||
|
return data;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief l_binaryReadStream()
|
||
|
*
|
||
|
* \param[in] fp file stream opened to read; can be stdin
|
||
|
* \param[out] pnbytes number of bytes read
|
||
|
* \return null-terminated array, or NULL on error; reading 0 bytes
|
||
|
* is not an error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) The returned array is terminated with a null byte so that it can
|
||
|
* be used to read ascii data from a file into a proper C string.
|
||
|
* (2) This can be used to capture data that is piped in via stdin,
|
||
|
* because it does not require seeking within the file.
|
||
|
* (3) For example, you can read an image from stdin into memory
|
||
|
* using shell redirection, with one of these shell commands:
|
||
|
* \code
|
||
|
* cat <imagefile> | readprog
|
||
|
* readprog < <imagefile>
|
||
|
* \endcode
|
||
|
* where readprog is:
|
||
|
* \code
|
||
|
* l_uint8 *data = l_binaryReadStream(stdin, &nbytes);
|
||
|
* Pix *pix = pixReadMem(data, nbytes);
|
||
|
* \endcode
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_uint8 *
|
||
|
l_binaryReadStream(FILE *fp,
|
||
|
size_t *pnbytes)
|
||
|
{
|
||
|
l_uint8 *data;
|
||
|
l_int32 seekable, navail, nadd, nread;
|
||
|
L_BBUFFER *bb;
|
||
|
|
||
|
PROCNAME("l_binaryReadStream");
|
||
|
|
||
|
if (!pnbytes)
|
||
|
return (l_uint8 *)ERROR_PTR("&nbytes not defined", procName, NULL);
|
||
|
*pnbytes = 0;
|
||
|
if (!fp)
|
||
|
return (l_uint8 *)ERROR_PTR("fp not defined", procName, NULL);
|
||
|
|
||
|
/* Test if the stream is seekable, by attempting to seek to
|
||
|
* the start of data. This is a no-op. If it is seekable, use
|
||
|
* l_binaryReadSelectStream() to determine the size of the
|
||
|
* data to be read in advance. */
|
||
|
seekable = (ftell(fp) == 0) ? 1 : 0;
|
||
|
if (seekable)
|
||
|
return l_binaryReadSelectStream(fp, 0, 0, pnbytes);
|
||
|
|
||
|
/* If it is not seekable, use the bbuffer to realloc memory
|
||
|
* as needed during reading. */
|
||
|
bb = bbufferCreate(NULL, 4096);
|
||
|
while (1) {
|
||
|
navail = bb->nalloc - bb->n;
|
||
|
if (navail < 4096) {
|
||
|
nadd = L_MAX(bb->nalloc, 4096);
|
||
|
bbufferExtendArray(bb, nadd);
|
||
|
}
|
||
|
nread = fread((void *)(bb->array + bb->n), 1, 4096, fp);
|
||
|
bb->n += nread;
|
||
|
if (nread != 4096) break;
|
||
|
}
|
||
|
|
||
|
/* Copy the data to a new array sized for the data, because
|
||
|
* the bbuffer array can be nearly twice the size we need. */
|
||
|
if ((data = (l_uint8 *)LEPT_CALLOC(bb->n + 1, sizeof(l_uint8))) != NULL) {
|
||
|
memcpy(data, bb->array, bb->n);
|
||
|
*pnbytes = bb->n;
|
||
|
} else {
|
||
|
L_ERROR("calloc fail for data\n", procName);
|
||
|
}
|
||
|
|
||
|
bbufferDestroy(&bb);
|
||
|
return data;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief l_binaryReadSelect()
|
||
|
*
|
||
|
* \param[in] filename
|
||
|
* \param[in] start first byte to read
|
||
|
* \param[in] nbytes number of bytes to read; use 0 to read to end of file
|
||
|
* \param[out] pnread number of bytes actually read
|
||
|
* \return data, or NULL on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) The returned array is terminated with a null byte so that it can
|
||
|
* be used to read ascii data from a file into a proper C string.
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_uint8 *
|
||
|
l_binaryReadSelect(const char *filename,
|
||
|
size_t start,
|
||
|
size_t nbytes,
|
||
|
size_t *pnread)
|
||
|
{
|
||
|
l_uint8 *data;
|
||
|
FILE *fp;
|
||
|
|
||
|
PROCNAME("l_binaryReadSelect");
|
||
|
|
||
|
if (!pnread)
|
||
|
return (l_uint8 *)ERROR_PTR("pnread not defined", procName, NULL);
|
||
|
*pnread = 0;
|
||
|
if (!filename)
|
||
|
return (l_uint8 *)ERROR_PTR("filename not defined", procName, NULL);
|
||
|
|
||
|
if ((fp = fopenReadStream(filename)) == NULL)
|
||
|
return (l_uint8 *)ERROR_PTR("file stream not opened", procName, NULL);
|
||
|
data = l_binaryReadSelectStream(fp, start, nbytes, pnread);
|
||
|
fclose(fp);
|
||
|
return data;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief l_binaryReadSelectStream()
|
||
|
*
|
||
|
* \param[in] fp file stream
|
||
|
* \param[in] start first byte to read
|
||
|
* \param[in] nbytes number of bytes to read; use 0 to read to end of file
|
||
|
* \param[out] pnread number of bytes actually read
|
||
|
* \return null-terminated array, or NULL on error; reading 0 bytes
|
||
|
* is not an error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) The returned array is terminated with a null byte so that it can
|
||
|
* be used to read ascii data from a file into a proper C string.
|
||
|
* If the file to be read is empty and %start == 0, an array
|
||
|
* with a single null byte is returned.
|
||
|
* (2) Side effect: the stream pointer is re-positioned to the
|
||
|
* beginning of the file.
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_uint8 *
|
||
|
l_binaryReadSelectStream(FILE *fp,
|
||
|
size_t start,
|
||
|
size_t nbytes,
|
||
|
size_t *pnread)
|
||
|
{
|
||
|
l_uint8 *data;
|
||
|
size_t bytesleft, bytestoread, nread, filebytes;
|
||
|
|
||
|
PROCNAME("l_binaryReadSelectStream");
|
||
|
|
||
|
if (!pnread)
|
||
|
return (l_uint8 *)ERROR_PTR("&nread not defined", procName, NULL);
|
||
|
*pnread = 0;
|
||
|
if (!fp)
|
||
|
return (l_uint8 *)ERROR_PTR("stream not defined", procName, NULL);
|
||
|
|
||
|
/* Verify and adjust the parameters if necessary */
|
||
|
fseek(fp, 0, SEEK_END); /* EOF */
|
||
|
filebytes = ftell(fp);
|
||
|
fseek(fp, 0, SEEK_SET);
|
||
|
if (start > filebytes) {
|
||
|
L_ERROR("start = %zu but filebytes = %zu\n", procName,
|
||
|
start, filebytes);
|
||
|
return NULL;
|
||
|
}
|
||
|
if (filebytes == 0) /* start == 0; nothing to read; return null byte */
|
||
|
return (l_uint8 *)LEPT_CALLOC(1, 1);
|
||
|
bytesleft = filebytes - start; /* greater than 0 */
|
||
|
if (nbytes == 0) nbytes = bytesleft;
|
||
|
bytestoread = (bytesleft >= nbytes) ? nbytes : bytesleft;
|
||
|
|
||
|
/* Read the data */
|
||
|
if ((data = (l_uint8 *)LEPT_CALLOC(1, bytestoread + 1)) == NULL)
|
||
|
return (l_uint8 *)ERROR_PTR("calloc fail for data", procName, NULL);
|
||
|
fseek(fp, start, SEEK_SET);
|
||
|
nread = fread(data, 1, bytestoread, fp);
|
||
|
if (nbytes != nread)
|
||
|
L_INFO("%zu bytes requested; %zu bytes read\n", procName,
|
||
|
nbytes, nread);
|
||
|
*pnread = nread;
|
||
|
fseek(fp, 0, SEEK_SET);
|
||
|
return data;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief l_binaryWrite()
|
||
|
*
|
||
|
* \param[in] filename output file
|
||
|
* \param[in] operation "w" for write; "a" for append
|
||
|
* \param[in] data binary data to be written
|
||
|
* \param[in] nbytes size of data array
|
||
|
* \return 0 if OK; 1 on error
|
||
|
*/
|
||
|
l_ok
|
||
|
l_binaryWrite(const char *filename,
|
||
|
const char *operation,
|
||
|
const void *data,
|
||
|
size_t nbytes)
|
||
|
{
|
||
|
char actualOperation[20];
|
||
|
FILE *fp;
|
||
|
|
||
|
PROCNAME("l_binaryWrite");
|
||
|
|
||
|
if (!filename)
|
||
|
return ERROR_INT("filename not defined", procName, 1);
|
||
|
if (!operation)
|
||
|
return ERROR_INT("operation not defined", procName, 1);
|
||
|
if (!data)
|
||
|
return ERROR_INT("data not defined", procName, 1);
|
||
|
if (nbytes <= 0)
|
||
|
return ERROR_INT("nbytes must be > 0", procName, 1);
|
||
|
|
||
|
if (strcmp(operation, "w") && strcmp(operation, "a"))
|
||
|
return ERROR_INT("operation not one of {'w','a'}", procName, 1);
|
||
|
|
||
|
/* The 'b' flag to fopen() is ignored for all POSIX
|
||
|
* conforming systems. However, Windows needs the 'b' flag. */
|
||
|
stringCopy(actualOperation, operation, 2);
|
||
|
strncat(actualOperation, "b", 2);
|
||
|
|
||
|
if ((fp = fopenWriteStream(filename, actualOperation)) == NULL)
|
||
|
return ERROR_INT("stream not opened", procName, 1);
|
||
|
fwrite(data, 1, nbytes, fp);
|
||
|
fclose(fp);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief nbytesInFile()
|
||
|
*
|
||
|
* \param[in] filename
|
||
|
* \return nbytes in file; 0 on error
|
||
|
*/
|
||
|
size_t
|
||
|
nbytesInFile(const char *filename)
|
||
|
{
|
||
|
size_t nbytes;
|
||
|
FILE *fp;
|
||
|
|
||
|
PROCNAME("nbytesInFile");
|
||
|
|
||
|
if (!filename)
|
||
|
return ERROR_INT("filename not defined", procName, 0);
|
||
|
if ((fp = fopenReadStream(filename)) == NULL)
|
||
|
return ERROR_INT("stream not opened", procName, 0);
|
||
|
nbytes = fnbytesInFile(fp);
|
||
|
fclose(fp);
|
||
|
return nbytes;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief fnbytesInFile()
|
||
|
*
|
||
|
* \param[in] fp file stream
|
||
|
* \return nbytes in file; 0 on error
|
||
|
*/
|
||
|
size_t
|
||
|
fnbytesInFile(FILE *fp)
|
||
|
{
|
||
|
l_int64 pos, nbytes;
|
||
|
|
||
|
PROCNAME("fnbytesInFile");
|
||
|
|
||
|
if (!fp)
|
||
|
return ERROR_INT("stream not open", procName, 0);
|
||
|
|
||
|
pos = ftell(fp); /* initial position */
|
||
|
if (pos < 0)
|
||
|
return ERROR_INT("seek position must be > 0", procName, 0);
|
||
|
fseek(fp, 0, SEEK_END); /* EOF */
|
||
|
nbytes = ftell(fp);
|
||
|
fseek(fp, pos, SEEK_SET); /* back to initial position */
|
||
|
return nbytes;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*--------------------------------------------------------------------*
|
||
|
* Copy and compare in memory *
|
||
|
*--------------------------------------------------------------------*/
|
||
|
/*!
|
||
|
* \brief l_binaryCopy()
|
||
|
*
|
||
|
* \param[in] datas
|
||
|
* \param[in] size of data array
|
||
|
* \return datad on heap, or NULL on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) We add 4 bytes to the zeroed output because in some cases
|
||
|
* (e.g., string handling) it is important to have the data
|
||
|
* be null terminated. This guarantees that after the memcpy,
|
||
|
* the result is automatically null terminated.
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_uint8 *
|
||
|
l_binaryCopy(const l_uint8 *datas,
|
||
|
size_t size)
|
||
|
{
|
||
|
l_uint8 *datad;
|
||
|
|
||
|
PROCNAME("l_binaryCopy");
|
||
|
|
||
|
if (!datas)
|
||
|
return (l_uint8 *)ERROR_PTR("datas not defined", procName, NULL);
|
||
|
|
||
|
if ((datad = (l_uint8 *)LEPT_CALLOC(size + 4, sizeof(l_uint8))) == NULL)
|
||
|
return (l_uint8 *)ERROR_PTR("datad not made", procName, NULL);
|
||
|
memcpy(datad, datas, size);
|
||
|
return datad;
|
||
|
}
|
||
|
|
||
|
|
||
|
l_ok
|
||
|
l_binaryCompare(const l_uint8 *data1,
|
||
|
size_t size1,
|
||
|
const l_uint8 *data2,
|
||
|
size_t size2,
|
||
|
l_int32 *psame)
|
||
|
{
|
||
|
l_int32 i;
|
||
|
|
||
|
PROCNAME("l_binaryCompare");
|
||
|
|
||
|
if (!psame)
|
||
|
return ERROR_INT("&same not defined", procName, 1);
|
||
|
*psame = FALSE;
|
||
|
if (!data1 || !data2)
|
||
|
return ERROR_INT("data1 and data2 not both defined", procName, 1);
|
||
|
if (size1 != size2) return 0;
|
||
|
for (i = 0; i < size1; i++) {
|
||
|
if (data1[i] != data2[i])
|
||
|
return 0;
|
||
|
}
|
||
|
*psame = TRUE;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
/*--------------------------------------------------------------------*
|
||
|
* File copy operations *
|
||
|
*--------------------------------------------------------------------*/
|
||
|
/*!
|
||
|
* \brief fileCopy()
|
||
|
*
|
||
|
* \param[in] srcfile copy from this file
|
||
|
* \param[in] newfile copy to this file
|
||
|
* \return 0 if OK, 1 on error
|
||
|
*/
|
||
|
l_ok
|
||
|
fileCopy(const char *srcfile,
|
||
|
const char *newfile)
|
||
|
{
|
||
|
l_int32 ret;
|
||
|
size_t nbytes;
|
||
|
l_uint8 *data;
|
||
|
|
||
|
PROCNAME("fileCopy");
|
||
|
|
||
|
if (!srcfile)
|
||
|
return ERROR_INT("srcfile not defined", procName, 1);
|
||
|
if (!newfile)
|
||
|
return ERROR_INT("newfile not defined", procName, 1);
|
||
|
|
||
|
if ((data = l_binaryRead(srcfile, &nbytes)) == NULL)
|
||
|
return ERROR_INT("data not returned", procName, 1);
|
||
|
ret = l_binaryWrite(newfile, "w", data, nbytes);
|
||
|
LEPT_FREE(data);
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief fileConcatenate()
|
||
|
*
|
||
|
* \param[in] srcfile append data from this file
|
||
|
* \param[in] destfile add data to this file
|
||
|
* \return 0 if OK, 1 on error
|
||
|
*/
|
||
|
l_ok
|
||
|
fileConcatenate(const char *srcfile,
|
||
|
const char *destfile)
|
||
|
{
|
||
|
size_t nbytes;
|
||
|
l_uint8 *data;
|
||
|
|
||
|
PROCNAME("fileConcatenate");
|
||
|
|
||
|
if (!srcfile)
|
||
|
return ERROR_INT("srcfile not defined", procName, 1);
|
||
|
if (!destfile)
|
||
|
return ERROR_INT("destfile not defined", procName, 1);
|
||
|
|
||
|
data = l_binaryRead(srcfile, &nbytes);
|
||
|
l_binaryWrite(destfile, "a", data, nbytes);
|
||
|
LEPT_FREE(data);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief fileAppendString()
|
||
|
*
|
||
|
* \param[in] filename
|
||
|
* \param[in] str string to append to file
|
||
|
* \return 0 if OK, 1 on error
|
||
|
*/
|
||
|
l_ok
|
||
|
fileAppendString(const char *filename,
|
||
|
const char *str)
|
||
|
{
|
||
|
FILE *fp;
|
||
|
|
||
|
PROCNAME("fileAppendString");
|
||
|
|
||
|
if (!filename)
|
||
|
return ERROR_INT("filename not defined", procName, 1);
|
||
|
if (!str)
|
||
|
return ERROR_INT("str not defined", procName, 1);
|
||
|
|
||
|
if ((fp = fopenWriteStream(filename, "a")) == NULL)
|
||
|
return ERROR_INT("stream not opened", procName, 1);
|
||
|
fprintf(fp, "%s", str);
|
||
|
fclose(fp);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*--------------------------------------------------------------------*
|
||
|
* Multi-platform functions for opening file streams *
|
||
|
*--------------------------------------------------------------------*/
|
||
|
/*!
|
||
|
* \brief fopenReadStream()
|
||
|
*
|
||
|
* \param[in] filename
|
||
|
* \return stream, or NULL on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This should be used whenever you want to run fopen() to
|
||
|
* read from a stream. Never call fopen() directory.
|
||
|
* (2) This handles the temp directory pathname conversion on windows:
|
||
|
* /tmp ==> [Windows Temp directory]
|
||
|
* </pre>
|
||
|
*/
|
||
|
FILE *
|
||
|
fopenReadStream(const char *filename)
|
||
|
{
|
||
|
char *fname, *tail;
|
||
|
FILE *fp;
|
||
|
|
||
|
PROCNAME("fopenReadStream");
|
||
|
|
||
|
if (!filename)
|
||
|
return (FILE *)ERROR_PTR("filename not defined", procName, NULL);
|
||
|
|
||
|
/* Try input filename */
|
||
|
fname = genPathname(filename, NULL);
|
||
|
fp = fopen(fname, "rb");
|
||
|
LEPT_FREE(fname);
|
||
|
if (fp) return fp;
|
||
|
|
||
|
/* Else, strip directory and try locally */
|
||
|
splitPathAtDirectory(filename, NULL, &tail);
|
||
|
fp = fopen(tail, "rb");
|
||
|
LEPT_FREE(tail);
|
||
|
|
||
|
if (!fp)
|
||
|
return (FILE *)ERROR_PTR("file not found", procName, NULL);
|
||
|
return fp;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief fopenWriteStream()
|
||
|
*
|
||
|
* \param[in] filename
|
||
|
* \param[in] modestring
|
||
|
* \return stream, or NULL on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This should be used whenever you want to run fopen() to
|
||
|
* write or append to a stream. Never call fopen() directory.
|
||
|
* (2) This handles the temp directory pathname conversion on windows:
|
||
|
* /tmp ==> [Windows Temp directory]
|
||
|
* </pre>
|
||
|
*/
|
||
|
FILE *
|
||
|
fopenWriteStream(const char *filename,
|
||
|
const char *modestring)
|
||
|
{
|
||
|
char *fname;
|
||
|
FILE *fp;
|
||
|
|
||
|
PROCNAME("fopenWriteStream");
|
||
|
|
||
|
if (!filename)
|
||
|
return (FILE *)ERROR_PTR("filename not defined", procName, NULL);
|
||
|
|
||
|
fname = genPathname(filename, NULL);
|
||
|
fp = fopen(fname, modestring);
|
||
|
LEPT_FREE(fname);
|
||
|
if (!fp)
|
||
|
return (FILE *)ERROR_PTR("stream not opened", procName, NULL);
|
||
|
return fp;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief fopenReadFromMemory()
|
||
|
*
|
||
|
* \param[in] data, size
|
||
|
* \return file stream, or NULL on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) Work-around if fmemopen() not available.
|
||
|
* (2) Windows tmpfile() writes into the root C:\ directory, which
|
||
|
* requires admin privileges. This also works around that.
|
||
|
* </pre>
|
||
|
*/
|
||
|
FILE *
|
||
|
fopenReadFromMemory(const l_uint8 *data,
|
||
|
size_t size)
|
||
|
{
|
||
|
FILE *fp;
|
||
|
|
||
|
PROCNAME("fopenReadFromMemory");
|
||
|
|
||
|
if (!data)
|
||
|
return (FILE *)ERROR_PTR("data not defined", procName, NULL);
|
||
|
|
||
|
#if HAVE_FMEMOPEN
|
||
|
if ((fp = fmemopen((void *)data, size, "rb")) == NULL)
|
||
|
return (FILE *)ERROR_PTR("stream not opened", procName, NULL);
|
||
|
#else /* write to tmp file */
|
||
|
L_INFO("work-around: writing to a temp file\n", procName);
|
||
|
#ifdef _WIN32
|
||
|
if ((fp = fopenWriteWinTempfile()) == NULL)
|
||
|
return (FILE *)ERROR_PTR("tmpfile stream not opened", procName, NULL);
|
||
|
#else
|
||
|
if ((fp = tmpfile()) == NULL)
|
||
|
return (FILE *)ERROR_PTR("tmpfile stream not opened", procName, NULL);
|
||
|
#endif /* _WIN32 */
|
||
|
fwrite(data, 1, size, fp);
|
||
|
rewind(fp);
|
||
|
#endif /* HAVE_FMEMOPEN */
|
||
|
|
||
|
return fp;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*--------------------------------------------------------------------*
|
||
|
* Opening a windows tmpfile for writing *
|
||
|
*--------------------------------------------------------------------*/
|
||
|
/*!
|
||
|
* \brief fopenWriteWinTempfile()
|
||
|
*
|
||
|
* \return file stream, or NULL on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) The Windows version of tmpfile() writes into the root
|
||
|
* C:\ directory, which requires admin privileges. This
|
||
|
* function provides an alternative implementation.
|
||
|
* </pre>
|
||
|
*/
|
||
|
FILE *
|
||
|
fopenWriteWinTempfile()
|
||
|
{
|
||
|
#ifdef _WIN32
|
||
|
l_int32 handle;
|
||
|
FILE *fp;
|
||
|
char *filename;
|
||
|
|
||
|
PROCNAME("fopenWriteWinTempfile");
|
||
|
|
||
|
if ((filename = l_makeTempFilename()) == NULL) {
|
||
|
L_ERROR("l_makeTempFilename failed, %s\n", procName, strerror(errno));
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
handle = _open(filename, _O_CREAT | _O_RDWR | _O_SHORT_LIVED |
|
||
|
_O_TEMPORARY | _O_BINARY, _S_IREAD | _S_IWRITE);
|
||
|
lept_free(filename);
|
||
|
if (handle == -1) {
|
||
|
L_ERROR("_open failed, %s\n", procName, strerror(errno));
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
if ((fp = _fdopen(handle, "r+b")) == NULL) {
|
||
|
L_ERROR("_fdopen failed, %s\n", procName, strerror(errno));
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
return fp;
|
||
|
#else
|
||
|
return NULL;
|
||
|
#endif /* _WIN32 */
|
||
|
}
|
||
|
|
||
|
|
||
|
/*--------------------------------------------------------------------*
|
||
|
* Multi-platform functions that avoid C-runtime boundary *
|
||
|
* crossing for applications with Windows DLLs *
|
||
|
*--------------------------------------------------------------------*/
|
||
|
/*
|
||
|
* Problems arise when pointers to streams and data are passed
|
||
|
* between two Windows DLLs that have been generated with different
|
||
|
* C runtimes. To avoid this, leptonica provides wrappers for
|
||
|
* several C library calls.
|
||
|
*/
|
||
|
/*!
|
||
|
* \brief lept_fopen()
|
||
|
*
|
||
|
* \param[in] filename
|
||
|
* \param[in] mode same as for fopen(); e.g., "rb"
|
||
|
* \return stream or NULL on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This must be used by any application that passes
|
||
|
* a file handle to a leptonica Windows DLL.
|
||
|
* </pre>
|
||
|
*/
|
||
|
FILE *
|
||
|
lept_fopen(const char *filename,
|
||
|
const char *mode)
|
||
|
{
|
||
|
PROCNAME("lept_fopen");
|
||
|
|
||
|
if (!filename)
|
||
|
return (FILE *)ERROR_PTR("filename not defined", procName, NULL);
|
||
|
if (!mode)
|
||
|
return (FILE *)ERROR_PTR("mode not defined", procName, NULL);
|
||
|
|
||
|
if (stringFindSubstr(mode, "r", NULL))
|
||
|
return fopenReadStream(filename);
|
||
|
else
|
||
|
return fopenWriteStream(filename, mode);
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief lept_fclose()
|
||
|
*
|
||
|
* \param[in] fp file stream
|
||
|
* \return 0 if OK, 1 on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This should be used by any application that accepts
|
||
|
* a file handle generated by a leptonica Windows DLL.
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_ok
|
||
|
lept_fclose(FILE *fp)
|
||
|
{
|
||
|
PROCNAME("lept_fclose");
|
||
|
|
||
|
if (!fp)
|
||
|
return ERROR_INT("stream not defined", procName, 1);
|
||
|
|
||
|
return fclose(fp);
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief lept_calloc()
|
||
|
*
|
||
|
* \param[in] nmemb number of members
|
||
|
* \param[in] size of each member
|
||
|
* \return void ptr, or NULL on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) For safety with windows DLLs, this can be used in conjunction
|
||
|
* with lept_free() to avoid C-runtime boundary problems.
|
||
|
* Just use these two functions throughout your application.
|
||
|
* </pre>
|
||
|
*/
|
||
|
void *
|
||
|
lept_calloc(size_t nmemb,
|
||
|
size_t size)
|
||
|
{
|
||
|
if (nmemb <= 0 || size <= 0)
|
||
|
return NULL;
|
||
|
return LEPT_CALLOC(nmemb, size);
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief lept_free()
|
||
|
*
|
||
|
* \param[in] ptr
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This should be used by any application that accepts
|
||
|
* heap data allocated by a leptonica Windows DLL.
|
||
|
* </pre>
|
||
|
*/
|
||
|
void
|
||
|
lept_free(void *ptr)
|
||
|
{
|
||
|
if (!ptr) return;
|
||
|
LEPT_FREE(ptr);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*--------------------------------------------------------------------*
|
||
|
* Multi-platform file system operations *
|
||
|
* [ These only write to /tmp or its subdirectories ] *
|
||
|
*--------------------------------------------------------------------*/
|
||
|
/*!
|
||
|
* \brief lept_mkdir()
|
||
|
*
|
||
|
* \param[in] subdir of /tmp or its equivalent on Windows
|
||
|
* \return 0 on success, non-zero on failure
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) %subdir is a partial path that can consist of one or more
|
||
|
* directories.
|
||
|
* (2) This makes any subdirectories of /tmp that are required.
|
||
|
* (3) The root temp directory is:
|
||
|
* /tmp (unix) [default]
|
||
|
* [Temp] (windows)
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_int32
|
||
|
lept_mkdir(const char *subdir)
|
||
|
{
|
||
|
char *dir, *tmpdir;
|
||
|
l_int32 i, n;
|
||
|
l_int32 ret = 0;
|
||
|
SARRAY *sa;
|
||
|
#ifdef _WIN32
|
||
|
l_uint32 attributes;
|
||
|
#endif /* _WIN32 */
|
||
|
|
||
|
PROCNAME("lept_mkdir");
|
||
|
|
||
|
if (!LeptDebugOK) {
|
||
|
L_INFO("making named temp subdirectory %s is disabled\n",
|
||
|
procName, subdir);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
if (!subdir)
|
||
|
return ERROR_INT("subdir not defined", procName, 1);
|
||
|
if ((strlen(subdir) == 0) || (subdir[0] == '.') || (subdir[0] == '/'))
|
||
|
return ERROR_INT("subdir not an actual subdirectory", procName, 1);
|
||
|
|
||
|
sa = sarrayCreate(0);
|
||
|
sarraySplitString(sa, subdir, "/");
|
||
|
n = sarrayGetCount(sa);
|
||
|
dir = genPathname("/tmp", NULL);
|
||
|
/* Make sure the tmp directory exists */
|
||
|
#ifndef _WIN32
|
||
|
ret = mkdir(dir, 0777);
|
||
|
#else
|
||
|
attributes = GetFileAttributes(dir);
|
||
|
if (attributes == INVALID_FILE_ATTRIBUTES)
|
||
|
ret = (CreateDirectory(dir, NULL) ? 0 : 1);
|
||
|
#endif
|
||
|
/* Make all the subdirectories */
|
||
|
for (i = 0; i < n; i++) {
|
||
|
tmpdir = pathJoin(dir, sarrayGetString(sa, i, L_NOCOPY));
|
||
|
#ifndef _WIN32
|
||
|
ret += mkdir(tmpdir, 0777);
|
||
|
#else
|
||
|
if (CreateDirectory(tmpdir, NULL) == 0)
|
||
|
ret += (GetLastError () != ERROR_ALREADY_EXISTS);
|
||
|
#endif
|
||
|
LEPT_FREE(dir);
|
||
|
dir = tmpdir;
|
||
|
}
|
||
|
LEPT_FREE(dir);
|
||
|
sarrayDestroy(&sa);
|
||
|
if (ret > 0)
|
||
|
L_ERROR("failure to create %d directories\n", procName, ret);
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief lept_rmdir()
|
||
|
*
|
||
|
* \param[in] subdir of /tmp or its equivalent on Windows
|
||
|
* \return 0 on success, non-zero on failure
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) %subdir is a partial path that can consist of one or more
|
||
|
* directories.
|
||
|
* (2) This removes all files from the specified subdirectory of
|
||
|
* the root temp directory:
|
||
|
* /tmp (unix)
|
||
|
* [Temp] (windows)
|
||
|
* and then removes the subdirectory.
|
||
|
* (3) The combination
|
||
|
* lept_rmdir(subdir);
|
||
|
* lept_mkdir(subdir);
|
||
|
* is guaranteed to give you an empty subdirectory.
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_int32
|
||
|
lept_rmdir(const char *subdir)
|
||
|
{
|
||
|
char *dir, *realdir, *fname, *fullname;
|
||
|
l_int32 exists, ret, i, nfiles;
|
||
|
SARRAY *sa;
|
||
|
#ifdef _WIN32
|
||
|
char *newpath;
|
||
|
#endif /* _WIN32 */
|
||
|
|
||
|
PROCNAME("lept_rmdir");
|
||
|
|
||
|
if (!subdir)
|
||
|
return ERROR_INT("subdir not defined", procName, 1);
|
||
|
if ((strlen(subdir) == 0) || (subdir[0] == '.') || (subdir[0] == '/'))
|
||
|
return ERROR_INT("subdir not an actual subdirectory", procName, 1);
|
||
|
|
||
|
/* Find the temp subdirectory */
|
||
|
dir = pathJoin("/tmp", subdir);
|
||
|
if (!dir)
|
||
|
return ERROR_INT("directory name not made", procName, 1);
|
||
|
lept_direxists(dir, &exists);
|
||
|
if (!exists) { /* fail silently */
|
||
|
LEPT_FREE(dir);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
/* List all the files in that directory */
|
||
|
if ((sa = getFilenamesInDirectory(dir)) == NULL) {
|
||
|
L_ERROR("directory %s does not exist!\n", procName, dir);
|
||
|
LEPT_FREE(dir);
|
||
|
return 1;
|
||
|
}
|
||
|
nfiles = sarrayGetCount(sa);
|
||
|
|
||
|
for (i = 0; i < nfiles; i++) {
|
||
|
fname = sarrayGetString(sa, i, L_NOCOPY);
|
||
|
fullname = genPathname(dir, fname);
|
||
|
remove(fullname);
|
||
|
LEPT_FREE(fullname);
|
||
|
}
|
||
|
|
||
|
#ifndef _WIN32
|
||
|
realdir = genPathname("/tmp", subdir);
|
||
|
ret = rmdir(realdir);
|
||
|
LEPT_FREE(realdir);
|
||
|
#else
|
||
|
newpath = genPathname(dir, NULL);
|
||
|
ret = (RemoveDirectory(newpath) ? 0 : 1);
|
||
|
LEPT_FREE(newpath);
|
||
|
#endif /* !_WIN32 */
|
||
|
|
||
|
sarrayDestroy(&sa);
|
||
|
LEPT_FREE(dir);
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief lept_direxists()
|
||
|
*
|
||
|
* \param[in] dir
|
||
|
* \param[out] pexists 1 if it exists; 0 otherwise
|
||
|
* \return void
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) Always use unix pathname separators.
|
||
|
* (2) By calling genPathname(), if the pathname begins with "/tmp"
|
||
|
* this does an automatic directory translation on windows
|
||
|
* to a path in the windows [Temp] directory:
|
||
|
* "/tmp" ==> [Temp] (windows)
|
||
|
* </pre>
|
||
|
*/
|
||
|
void
|
||
|
lept_direxists(const char *dir,
|
||
|
l_int32 *pexists)
|
||
|
{
|
||
|
char *realdir;
|
||
|
|
||
|
if (!pexists) return;
|
||
|
*pexists = 0;
|
||
|
if (!dir) return;
|
||
|
if ((realdir = genPathname(dir, NULL)) == NULL)
|
||
|
return;
|
||
|
|
||
|
#ifndef _WIN32
|
||
|
{
|
||
|
struct stat s;
|
||
|
l_int32 err = stat(realdir, &s);
|
||
|
if (err != -1 && S_ISDIR(s.st_mode))
|
||
|
*pexists = 1;
|
||
|
}
|
||
|
#else /* _WIN32 */
|
||
|
l_uint32 attributes;
|
||
|
attributes = GetFileAttributes(realdir);
|
||
|
if (attributes != INVALID_FILE_ATTRIBUTES &&
|
||
|
(attributes & FILE_ATTRIBUTE_DIRECTORY)) {
|
||
|
*pexists = 1;
|
||
|
}
|
||
|
#endif /* _WIN32 */
|
||
|
|
||
|
LEPT_FREE(realdir);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief lept_rm_match()
|
||
|
*
|
||
|
* \param[in] subdir [optional] if NULL, the removed files are in /tmp
|
||
|
* \param[in] substr [optional] pattern to match in filename
|
||
|
* \return 0 on success, non-zero on failure
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This removes the matched files in /tmp or a subdirectory of /tmp.
|
||
|
* Use NULL for %subdir if the files are in /tmp.
|
||
|
* (2) If %substr == NULL, this removes all files in the directory.
|
||
|
* If %substr == "" (empty), this removes no files.
|
||
|
* If both %subdir == NULL and %substr == NULL, this removes
|
||
|
* all files in /tmp.
|
||
|
* (3) Use unix pathname separators.
|
||
|
* (4) By calling genPathname(), if the pathname begins with "/tmp"
|
||
|
* this does an automatic directory translation on windows
|
||
|
* to a path in the windows [Temp] directory:
|
||
|
* "/tmp" ==> [Temp] (windows)
|
||
|
* (5) Error conditions:
|
||
|
* * returns -1 if the directory is not found
|
||
|
* * returns the number of files (> 0) that it was unable to remove.
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_int32
|
||
|
lept_rm_match(const char *subdir,
|
||
|
const char *substr)
|
||
|
{
|
||
|
char *path, *fname;
|
||
|
char tempdir[256];
|
||
|
l_int32 i, n, ret;
|
||
|
SARRAY *sa;
|
||
|
|
||
|
PROCNAME("lept_rm_match");
|
||
|
|
||
|
makeTempDirname(tempdir, sizeof(tempdir), subdir);
|
||
|
if ((sa = getSortedPathnamesInDirectory(tempdir, substr, 0, 0)) == NULL)
|
||
|
return ERROR_INT("sa not made", procName, -1);
|
||
|
n = sarrayGetCount(sa);
|
||
|
if (n == 0) {
|
||
|
L_WARNING("no matching files found\n", procName);
|
||
|
sarrayDestroy(&sa);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
ret = 0;
|
||
|
for (i = 0; i < n; i++) {
|
||
|
fname = sarrayGetString(sa, i, L_NOCOPY);
|
||
|
path = genPathname(fname, NULL);
|
||
|
if (lept_rmfile(path) != 0) {
|
||
|
L_ERROR("failed to remove %s\n", procName, path);
|
||
|
ret++;
|
||
|
}
|
||
|
LEPT_FREE(path);
|
||
|
}
|
||
|
sarrayDestroy(&sa);
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief lept_rm()
|
||
|
*
|
||
|
* \param[in] subdir [optional] subdir of '/tmp'; can be NULL
|
||
|
* \param[in] tail filename without the directory
|
||
|
* \return 0 on success, non-zero on failure
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) By calling genPathname(), this does an automatic directory
|
||
|
* translation on windows to a path in the windows [Temp] directory:
|
||
|
* "/tmp/..." ==> [Temp]/... (windows)
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_int32
|
||
|
lept_rm(const char *subdir,
|
||
|
const char *tail)
|
||
|
{
|
||
|
char *path;
|
||
|
char newtemp[256];
|
||
|
l_int32 ret;
|
||
|
|
||
|
PROCNAME("lept_rm");
|
||
|
|
||
|
if (!tail || strlen(tail) == 0)
|
||
|
return ERROR_INT("tail undefined or empty", procName, 1);
|
||
|
|
||
|
if (makeTempDirname(newtemp, sizeof(newtemp), subdir))
|
||
|
return ERROR_INT("temp dirname not made", procName, 1);
|
||
|
path = genPathname(newtemp, tail);
|
||
|
ret = lept_rmfile(path);
|
||
|
LEPT_FREE(path);
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief
|
||
|
*
|
||
|
* lept_rmfile()
|
||
|
*
|
||
|
* \param[in] filepath full path to file including the directory
|
||
|
* \return 0 on success, non-zero on failure
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This removes the named file.
|
||
|
* (2) Use unix pathname separators.
|
||
|
* (3) There is no name translation.
|
||
|
* (4) Unlike the other lept_* functions in this section, this can remove
|
||
|
* any file -- it is not restricted to files that are in /tmp or a
|
||
|
* subdirectory of it.
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_int32
|
||
|
lept_rmfile(const char *filepath)
|
||
|
{
|
||
|
l_int32 ret;
|
||
|
|
||
|
PROCNAME("lept_rmfile");
|
||
|
|
||
|
if (!filepath || strlen(filepath) == 0)
|
||
|
return ERROR_INT("filepath undefined or empty", procName, 1);
|
||
|
|
||
|
#ifndef _WIN32
|
||
|
ret = remove(filepath);
|
||
|
#else
|
||
|
/* Set attributes to allow deletion of read-only files */
|
||
|
SetFileAttributes(filepath, FILE_ATTRIBUTE_NORMAL);
|
||
|
ret = DeleteFile(filepath) ? 0 : 1;
|
||
|
#endif /* !_WIN32 */
|
||
|
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief lept_mv()
|
||
|
*
|
||
|
* \param[in] srcfile
|
||
|
* \param[in] newdir [optional]; can be NULL
|
||
|
* \param[in] newtail [optional]; can be NULL
|
||
|
* \param[out] pnewpath [optional] of actual path; can be NULL
|
||
|
* \return 0 on success, non-zero on failure
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This moves %srcfile to /tmp or to a subdirectory of /tmp.
|
||
|
* (2) %srcfile can either be a full path or relative to the
|
||
|
* current directory.
|
||
|
* (3) %newdir can either specify an existing subdirectory of /tmp
|
||
|
* or can be NULL. In the latter case, the file will be written
|
||
|
* into /tmp.
|
||
|
* (4) %newtail can either specify a filename tail or, if NULL,
|
||
|
* the filename is taken from src-tail, the tail of %srcfile.
|
||
|
* (5) For debugging, the computed newpath can be returned. It must
|
||
|
* be freed by the caller.
|
||
|
* (6) Reminders:
|
||
|
* (a) specify files using unix pathnames
|
||
|
* (b) for windows, translates
|
||
|
* /tmp ==> [Temp]
|
||
|
* where [Temp] is the windows temp directory
|
||
|
* (7) Examples:
|
||
|
* * newdir = NULL, newtail = NULL ==> /tmp/src-tail
|
||
|
* * newdir = NULL, newtail = abc ==> /tmp/abc
|
||
|
* * newdir = def/ghi, newtail = NULL ==> /tmp/def/ghi/src-tail
|
||
|
* * newdir = def/ghi, newtail = abc ==> /tmp/def/ghi/abc
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_int32
|
||
|
lept_mv(const char *srcfile,
|
||
|
const char *newdir,
|
||
|
const char *newtail,
|
||
|
char **pnewpath)
|
||
|
{
|
||
|
char *srcpath, *newpath, *dir, *srctail;
|
||
|
char newtemp[256];
|
||
|
l_int32 ret;
|
||
|
|
||
|
PROCNAME("lept_mv");
|
||
|
|
||
|
if (!srcfile)
|
||
|
return ERROR_INT("srcfile not defined", procName, 1);
|
||
|
|
||
|
/* Require output pathname to be in /tmp/ or a subdirectory */
|
||
|
if (makeTempDirname(newtemp, sizeof(newtemp), newdir) == 1)
|
||
|
return ERROR_INT("newdir not NULL or a subdir of /tmp", procName, 1);
|
||
|
|
||
|
/* Get canonical src pathname */
|
||
|
splitPathAtDirectory(srcfile, &dir, &srctail);
|
||
|
|
||
|
#ifndef _WIN32
|
||
|
srcpath = pathJoin(dir, srctail);
|
||
|
LEPT_FREE(dir);
|
||
|
|
||
|
/* Generate output pathname */
|
||
|
if (!newtail || newtail[0] == '\0')
|
||
|
newpath = pathJoin(newtemp, srctail);
|
||
|
else
|
||
|
newpath = pathJoin(newtemp, newtail);
|
||
|
LEPT_FREE(srctail);
|
||
|
|
||
|
/* Overwrite any existing file at 'newpath' */
|
||
|
ret = fileCopy(srcpath, newpath);
|
||
|
if (!ret) { /* and remove srcfile */
|
||
|
char *realpath = genPathname(srcpath, NULL);
|
||
|
remove(realpath);
|
||
|
LEPT_FREE(realpath);
|
||
|
}
|
||
|
#else
|
||
|
srcpath = genPathname(dir, srctail);
|
||
|
LEPT_FREE(dir);
|
||
|
|
||
|
/* Generate output pathname */
|
||
|
if (!newtail || newtail[0] == '\0')
|
||
|
newpath = genPathname(newtemp, srctail);
|
||
|
else
|
||
|
newpath = genPathname(newtemp, newtail);
|
||
|
LEPT_FREE(srctail);
|
||
|
|
||
|
/* Overwrite any existing file at 'newpath' */
|
||
|
ret = MoveFileEx(srcpath, newpath,
|
||
|
MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING) ? 0 : 1;
|
||
|
#endif /* ! _WIN32 */
|
||
|
|
||
|
LEPT_FREE(srcpath);
|
||
|
if (pnewpath)
|
||
|
*pnewpath = newpath;
|
||
|
else
|
||
|
LEPT_FREE(newpath);
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief lept_cp()
|
||
|
*
|
||
|
* \param[in] srcfile
|
||
|
* \param[in] newdir [optional]; can be NULL
|
||
|
* \param[in] newtail [optional]; can be NULL
|
||
|
* \param[out] pnewpath [optional] of actual path; can be NULL
|
||
|
* \return 0 on success, non-zero on failure
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This copies %srcfile to /tmp or to a subdirectory of /tmp.
|
||
|
* (2) %srcfile can either be a full path or relative to the
|
||
|
* current directory.
|
||
|
* (3) %newdir can either specify an existing subdirectory of /tmp,
|
||
|
* or can be NULL. In the latter case, the file will be written
|
||
|
* into /tmp.
|
||
|
* (4) %newtail can either specify a filename tail or, if NULL,
|
||
|
* the filename is taken from src-tail, the tail of %srcfile.
|
||
|
* (5) For debugging, the computed newpath can be returned. It must
|
||
|
* be freed by the caller.
|
||
|
* (6) Reminders:
|
||
|
* (a) specify files using unix pathnames
|
||
|
* (b) for windows, translates
|
||
|
* /tmp ==> [Temp]
|
||
|
* where [Temp] is the windows temp directory
|
||
|
* (7) Examples:
|
||
|
* * newdir = NULL, newtail = NULL ==> /tmp/src-tail
|
||
|
* * newdir = NULL, newtail = abc ==> /tmp/abc
|
||
|
* * newdir = def/ghi, newtail = NULL ==> /tmp/def/ghi/src-tail
|
||
|
* * newdir = def/ghi, newtail = abc ==> /tmp/def/ghi/abc
|
||
|
*
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_int32
|
||
|
lept_cp(const char *srcfile,
|
||
|
const char *newdir,
|
||
|
const char *newtail,
|
||
|
char **pnewpath)
|
||
|
{
|
||
|
char *srcpath, *newpath, *dir, *srctail;
|
||
|
char newtemp[256];
|
||
|
l_int32 ret;
|
||
|
|
||
|
PROCNAME("lept_cp");
|
||
|
|
||
|
if (!srcfile)
|
||
|
return ERROR_INT("srcfile not defined", procName, 1);
|
||
|
|
||
|
/* Require output pathname to be in /tmp or a subdirectory */
|
||
|
if (makeTempDirname(newtemp, sizeof(newtemp), newdir) == 1)
|
||
|
return ERROR_INT("newdir not NULL or a subdir of /tmp", procName, 1);
|
||
|
|
||
|
/* Get canonical src pathname */
|
||
|
splitPathAtDirectory(srcfile, &dir, &srctail);
|
||
|
|
||
|
#ifndef _WIN32
|
||
|
srcpath = pathJoin(dir, srctail);
|
||
|
LEPT_FREE(dir);
|
||
|
|
||
|
/* Generate output pathname */
|
||
|
if (!newtail || newtail[0] == '\0')
|
||
|
newpath = pathJoin(newtemp, srctail);
|
||
|
else
|
||
|
newpath = pathJoin(newtemp, newtail);
|
||
|
LEPT_FREE(srctail);
|
||
|
|
||
|
/* Overwrite any existing file at 'newpath' */
|
||
|
ret = fileCopy(srcpath, newpath);
|
||
|
#else
|
||
|
srcpath = genPathname(dir, srctail);
|
||
|
LEPT_FREE(dir);
|
||
|
|
||
|
/* Generate output pathname */
|
||
|
if (!newtail || newtail[0] == '\0')
|
||
|
newpath = genPathname(newtemp, srctail);
|
||
|
else
|
||
|
newpath = genPathname(newtemp, newtail);
|
||
|
LEPT_FREE(srctail);
|
||
|
|
||
|
/* Overwrite any existing file at 'newpath' */
|
||
|
ret = CopyFile(srcpath, newpath, FALSE) ? 0 : 1;
|
||
|
#endif /* !_WIN32 */
|
||
|
|
||
|
LEPT_FREE(srcpath);
|
||
|
if (pnewpath)
|
||
|
*pnewpath = newpath;
|
||
|
else
|
||
|
LEPT_FREE(newpath);
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*--------------------------------------------------------------------*
|
||
|
* Special debug/test function for calling 'system' *
|
||
|
*--------------------------------------------------------------------*/
|
||
|
#if defined(__APPLE__)
|
||
|
#include "TargetConditionals.h"
|
||
|
#endif /* __APPLE__ */
|
||
|
|
||
|
/*!
|
||
|
* \brief callSystemDebug()
|
||
|
*
|
||
|
* \param[in] cmd command to be exec'd
|
||
|
* \return void
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) The C library 'system' call is only made through this function.
|
||
|
* It only works in debug/test mode, where the global variable
|
||
|
* LeptDebugOK == TRUE. This variable is set to FALSE in the
|
||
|
* library as distributed, and calling this function will
|
||
|
* generate an error message.
|
||
|
* </pre>
|
||
|
*/
|
||
|
void
|
||
|
callSystemDebug(const char *cmd)
|
||
|
{
|
||
|
l_int32 ret;
|
||
|
|
||
|
PROCNAME("callSystemDebug");
|
||
|
|
||
|
if (!cmd) {
|
||
|
L_ERROR("cmd not defined\n", procName);
|
||
|
return;
|
||
|
}
|
||
|
if (LeptDebugOK == FALSE) {
|
||
|
L_INFO("'system' calls are disabled\n", procName);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
#if defined(__APPLE__) /* iOS 11 does not support system() */
|
||
|
|
||
|
#if TARGET_OS_OSX /* Mac OS X */
|
||
|
ret = system(cmd);
|
||
|
#elif TARGET_OS_IPHONE || defined(OS_IOS) /* iOS */
|
||
|
L_ERROR("iOS 11 does not support system()\n", procName);
|
||
|
#endif /* TARGET_OS_OSX */
|
||
|
|
||
|
#else /* ! __APPLE__ */
|
||
|
|
||
|
ret = system(cmd);
|
||
|
|
||
|
#endif /* __APPLE__ */
|
||
|
}
|
||
|
|
||
|
|
||
|
/*--------------------------------------------------------------------*
|
||
|
* General file name operations *
|
||
|
*--------------------------------------------------------------------*/
|
||
|
/*!
|
||
|
* \brief splitPathAtDirectory()
|
||
|
*
|
||
|
* \param[in] pathname full path; can be a directory
|
||
|
* \param[out] pdir [optional] root directory name of
|
||
|
* input path, including trailing '/'
|
||
|
* \param[out] ptail [optional] path tail, which is either
|
||
|
* the file name within the root directory or
|
||
|
* the last sub-directory in the path
|
||
|
* \return 0 if OK, 1 on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) If you only want the tail, input null for the root directory ptr.
|
||
|
* (2) If you only want the root directory name, input null for the
|
||
|
* tail ptr.
|
||
|
* (3) This function makes decisions based only on the lexical
|
||
|
* structure of the input. Examples:
|
||
|
* /usr/tmp/abc.d --> dir: /usr/tmp/ tail: abc.d
|
||
|
* /usr/tmp/ --> dir: /usr/tmp/ tail: [empty string]
|
||
|
* /usr/tmp --> dir: /usr/ tail: tmp
|
||
|
* abc.d --> dir: [empty string] tail: abc.d
|
||
|
* (4 Consider the first example above: /usr/tmp/abc.d.
|
||
|
* Suppose you want the stem of the file, abc, without either
|
||
|
* the directory or the extension. This can be extracted in two steps:
|
||
|
* splitPathAtDirectory("usr/tmp/abc.d", NULL, &tail);
|
||
|
* [sets tail: "abc.d"]
|
||
|
* splitPathAtExtension(tail, &basename, NULL);
|
||
|
* [sets basename: "abc"]
|
||
|
* (5) The input can have either forward (unix) or backward (win)
|
||
|
* slash separators. The output has unix separators.
|
||
|
* Note that Win32 pathname functions generally accept both
|
||
|
* slash forms, but the windows command line interpreter
|
||
|
* only accepts backward slashes, because forward slashes are
|
||
|
* used to demarcate switches (vs. dashes in unix).
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_ok
|
||
|
splitPathAtDirectory(const char *pathname,
|
||
|
char **pdir,
|
||
|
char **ptail)
|
||
|
{
|
||
|
char *cpathname, *lastslash;
|
||
|
|
||
|
PROCNAME("splitPathAtDirectory");
|
||
|
|
||
|
if (!pdir && !ptail)
|
||
|
return ERROR_INT("null input for both strings", procName, 1);
|
||
|
if (pdir) *pdir = NULL;
|
||
|
if (ptail) *ptail = NULL;
|
||
|
if (!pathname)
|
||
|
return ERROR_INT("pathname not defined", procName, 1);
|
||
|
|
||
|
cpathname = stringNew(pathname);
|
||
|
convertSepCharsInPath(cpathname, UNIX_PATH_SEPCHAR);
|
||
|
lastslash = strrchr(cpathname, '/');
|
||
|
if (lastslash) {
|
||
|
if (ptail)
|
||
|
*ptail = stringNew(lastslash + 1);
|
||
|
if (pdir) {
|
||
|
*(lastslash + 1) = '\0';
|
||
|
*pdir = cpathname;
|
||
|
} else {
|
||
|
LEPT_FREE(cpathname);
|
||
|
}
|
||
|
} else { /* no directory */
|
||
|
if (pdir)
|
||
|
*pdir = stringNew("");
|
||
|
if (ptail)
|
||
|
*ptail = cpathname;
|
||
|
else
|
||
|
LEPT_FREE(cpathname);
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief splitPathAtExtension()
|
||
|
*
|
||
|
* \param[in] pathname full path; can be a directory
|
||
|
* \param[out] pbasename [optional] pathname not including the
|
||
|
* last dot and characters after that
|
||
|
* \param[out] pextension [optional] path extension, which is
|
||
|
* the last dot and the characters after it. If
|
||
|
* there is no extension, it returns the empty string
|
||
|
* \return 0 if OK, 1 on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) If you only want the extension, input null for the basename ptr.
|
||
|
* (2) If you only want the basename without extension, input null
|
||
|
* for the extension ptr.
|
||
|
* (3) This function makes decisions based only on the lexical
|
||
|
* structure of the input. Examples:
|
||
|
* /usr/tmp/abc.jpg --> basename: /usr/tmp/abc ext: .jpg
|
||
|
* /usr/tmp/.jpg --> basename: /usr/tmp/ ext: .jpg
|
||
|
* /usr/tmp.jpg/ --> basename: /usr/tmp.jpg/ ext: [empty str]
|
||
|
* ./.jpg --> basename: ./ ext: .jpg
|
||
|
* (4) The input can have either forward (unix) or backward (win)
|
||
|
* slash separators. The output has unix separators.
|
||
|
* (5) Note that basename, as used here, is different from the result
|
||
|
* of the unix program 'basename'. Here, basename is the entire
|
||
|
* pathname up to a final extension and its preceding dot.
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_ok
|
||
|
splitPathAtExtension(const char *pathname,
|
||
|
char **pbasename,
|
||
|
char **pextension)
|
||
|
{
|
||
|
char *tail, *dir, *lastdot;
|
||
|
char empty[4] = "";
|
||
|
|
||
|
PROCNAME("splitPathExtension");
|
||
|
|
||
|
if (!pbasename && !pextension)
|
||
|
return ERROR_INT("null input for both strings", procName, 1);
|
||
|
if (pbasename) *pbasename = NULL;
|
||
|
if (pextension) *pextension = NULL;
|
||
|
if (!pathname)
|
||
|
return ERROR_INT("pathname not defined", procName, 1);
|
||
|
|
||
|
/* Split out the directory first */
|
||
|
splitPathAtDirectory(pathname, &dir, &tail);
|
||
|
|
||
|
/* Then look for a "." in the tail part.
|
||
|
* This way we ignore all "." in the directory. */
|
||
|
if ((lastdot = strrchr(tail, '.'))) {
|
||
|
if (pextension)
|
||
|
*pextension = stringNew(lastdot);
|
||
|
if (pbasename) {
|
||
|
*lastdot = '\0';
|
||
|
*pbasename = stringJoin(dir, tail);
|
||
|
}
|
||
|
} else {
|
||
|
if (pextension)
|
||
|
*pextension = stringNew(empty);
|
||
|
if (pbasename)
|
||
|
*pbasename = stringNew(pathname);
|
||
|
}
|
||
|
LEPT_FREE(dir);
|
||
|
LEPT_FREE(tail);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief pathJoin()
|
||
|
*
|
||
|
* \param[in] dir [optional] can be null
|
||
|
* \param[in] fname [optional] can be null
|
||
|
* \return specially concatenated path, or NULL on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) Use unix-style pathname separators ('/').
|
||
|
* (2) %fname can be the entire path, or part of the path containing
|
||
|
* at least one directory, or a tail without a directory, or NULL.
|
||
|
* (3) It produces a path that strips multiple slashes to a single
|
||
|
* slash, joins %dir and %fname by a slash, and has no trailing
|
||
|
* slashes (except in the cases where %dir == "/" and
|
||
|
* %fname == NULL, or v.v.).
|
||
|
* (4) If both %dir and %fname are null, produces an empty string.
|
||
|
* (5) Neither %dir nor %fname can begin with '..'.
|
||
|
* (6) The result is not canonicalized or tested for correctness:
|
||
|
* garbage in (e.g., /&%), garbage out.
|
||
|
* (7) Examples:
|
||
|
* //tmp// + //abc/ --> /tmp/abc
|
||
|
* tmp/ + /abc/ --> tmp/abc
|
||
|
* tmp/ + abc/ --> tmp/abc
|
||
|
* /tmp/ + /// --> /tmp
|
||
|
* /tmp/ + NULL --> /tmp
|
||
|
* // + /abc// --> /abc
|
||
|
* // + NULL --> /
|
||
|
* NULL + /abc/def/ --> /abc/def
|
||
|
* NULL + abc// --> abc
|
||
|
* NULL + // --> /
|
||
|
* NULL + NULL --> (empty string)
|
||
|
* "" + "" --> (empty string)
|
||
|
* "" + / --> /
|
||
|
* ".." + /etc/foo --> NULL
|
||
|
* /tmp + ".." --> NULL
|
||
|
* </pre>
|
||
|
*/
|
||
|
char *
|
||
|
pathJoin(const char *dir,
|
||
|
const char *fname)
|
||
|
{
|
||
|
const char *slash = "/";
|
||
|
char *str, *dest;
|
||
|
l_int32 i, n1, n2, emptydir;
|
||
|
size_t size;
|
||
|
SARRAY *sa1, *sa2;
|
||
|
L_BYTEA *ba;
|
||
|
|
||
|
PROCNAME("pathJoin");
|
||
|
|
||
|
if (!dir && !fname)
|
||
|
return stringNew("");
|
||
|
if (dir && strlen(dir) >= 2 && dir[0] == '.' && dir[1] == '.')
|
||
|
return (char *)ERROR_PTR("dir starts with '..'", procName, NULL);
|
||
|
if (fname && strlen(fname) >= 2 && fname[0] == '.' && fname[1] == '.')
|
||
|
return (char *)ERROR_PTR("fname starts with '..'", procName, NULL);
|
||
|
|
||
|
sa1 = sarrayCreate(0);
|
||
|
sa2 = sarrayCreate(0);
|
||
|
ba = l_byteaCreate(4);
|
||
|
|
||
|
/* Process %dir */
|
||
|
if (dir && strlen(dir) > 0) {
|
||
|
if (dir[0] == '/')
|
||
|
l_byteaAppendString(ba, slash);
|
||
|
sarraySplitString(sa1, dir, "/"); /* removes all slashes */
|
||
|
n1 = sarrayGetCount(sa1);
|
||
|
for (i = 0; i < n1; i++) {
|
||
|
str = sarrayGetString(sa1, i, L_NOCOPY);
|
||
|
l_byteaAppendString(ba, str);
|
||
|
l_byteaAppendString(ba, slash);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Special case to add leading slash: dir NULL or empty string */
|
||
|
emptydir = dir && strlen(dir) == 0;
|
||
|
if ((!dir || emptydir) && fname && strlen(fname) > 0 && fname[0] == '/')
|
||
|
l_byteaAppendString(ba, slash);
|
||
|
|
||
|
/* Process %fname */
|
||
|
if (fname && strlen(fname) > 0) {
|
||
|
sarraySplitString(sa2, fname, "/");
|
||
|
n2 = sarrayGetCount(sa2);
|
||
|
for (i = 0; i < n2; i++) {
|
||
|
str = sarrayGetString(sa2, i, L_NOCOPY);
|
||
|
l_byteaAppendString(ba, str);
|
||
|
l_byteaAppendString(ba, slash);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Remove trailing slash */
|
||
|
dest = (char *)l_byteaCopyData(ba, &size);
|
||
|
if (size > 1 && dest[size - 1] == '/')
|
||
|
dest[size - 1] = '\0';
|
||
|
|
||
|
sarrayDestroy(&sa1);
|
||
|
sarrayDestroy(&sa2);
|
||
|
l_byteaDestroy(&ba);
|
||
|
return dest;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief appendSubdirs()
|
||
|
*
|
||
|
* \param[in] basedir
|
||
|
* \param[in] subdirs
|
||
|
* \return concatenated full directory path without trailing slash,
|
||
|
* or NULL on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) Use unix pathname separators
|
||
|
* (2) Allocates a new string: [basedir]/[subdirs]
|
||
|
* </pre>
|
||
|
*/
|
||
|
char *
|
||
|
appendSubdirs(const char *basedir,
|
||
|
const char *subdirs)
|
||
|
{
|
||
|
char *newdir;
|
||
|
size_t len1, len2, len3, len4;
|
||
|
|
||
|
PROCNAME("appendSubdirs");
|
||
|
|
||
|
if (!basedir || !subdirs)
|
||
|
return (char *)ERROR_PTR("basedir and subdirs not both defined",
|
||
|
procName, NULL);
|
||
|
|
||
|
len1 = strlen(basedir);
|
||
|
len2 = strlen(subdirs);
|
||
|
len3 = len1 + len2 + 6;
|
||
|
if ((newdir = (char *)LEPT_CALLOC(len3 + 1, 1)) == NULL)
|
||
|
return (char *)ERROR_PTR("newdir not made", procName, NULL);
|
||
|
strncat(newdir, basedir, len3); /* add basedir */
|
||
|
if (newdir[len1 - 1] != '/') /* add '/' if necessary */
|
||
|
newdir[len1] = '/';
|
||
|
if (subdirs[0] == '/') /* add subdirs, stripping leading '/' */
|
||
|
strncat(newdir, subdirs + 1, len3);
|
||
|
else
|
||
|
strncat(newdir, subdirs, len3);
|
||
|
len4 = strlen(newdir);
|
||
|
if (newdir[len4 - 1] == '/') /* strip trailing '/' */
|
||
|
newdir[len4 - 1] = '\0';
|
||
|
|
||
|
return newdir;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*--------------------------------------------------------------------*
|
||
|
* Special file name operations *
|
||
|
*--------------------------------------------------------------------*/
|
||
|
/*!
|
||
|
* \brief convertSepCharsInPath()
|
||
|
*
|
||
|
* \param[in] path
|
||
|
* \param[in] type UNIX_PATH_SEPCHAR, WIN_PATH_SEPCHAR
|
||
|
* \return 0 if OK, 1 on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) In-place conversion.
|
||
|
* (2) Type is the resulting type:
|
||
|
* * UNIX_PATH_SEPCHAR: '\\' ==> '/'
|
||
|
* * WIN_PATH_SEPCHAR: '/' ==> '\\'
|
||
|
* (3) Virtually all path operations in leptonica use unix separators.
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_ok
|
||
|
convertSepCharsInPath(char *path,
|
||
|
l_int32 type)
|
||
|
{
|
||
|
l_int32 i;
|
||
|
size_t len;
|
||
|
|
||
|
PROCNAME("convertSepCharsInPath");
|
||
|
if (!path)
|
||
|
return ERROR_INT("path not defined", procName, 1);
|
||
|
if (type != UNIX_PATH_SEPCHAR && type != WIN_PATH_SEPCHAR)
|
||
|
return ERROR_INT("invalid type", procName, 1);
|
||
|
|
||
|
len = strlen(path);
|
||
|
if (type == UNIX_PATH_SEPCHAR) {
|
||
|
for (i = 0; i < len; i++) {
|
||
|
if (path[i] == '\\')
|
||
|
path[i] = '/';
|
||
|
}
|
||
|
} else { /* WIN_PATH_SEPCHAR */
|
||
|
for (i = 0; i < len; i++) {
|
||
|
if (path[i] == '/')
|
||
|
path[i] = '\\';
|
||
|
}
|
||
|
}
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief genPathname()
|
||
|
*
|
||
|
* \param[in] dir [optional] directory or full path name,
|
||
|
* with or without the trailing '/'
|
||
|
* \param[in] fname [optional] file name within a directory
|
||
|
* \return pathname either a directory or full path, or NULL on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This function generates actual paths in the following ways:
|
||
|
* * from two sub-parts (e.g., a directory and a file name).
|
||
|
* * from a single path full path, placed in %dir, with
|
||
|
* %fname == NULL.
|
||
|
* * from the name of a file in the local directory placed in
|
||
|
* %fname, with %dir == NULL.
|
||
|
* * if in a "/tmp" directory and on windows, the windows
|
||
|
* temp directory is used.
|
||
|
* (2) On windows, if the root of %dir is '/tmp', this does a name
|
||
|
* translation:
|
||
|
* "/tmp" ==> [Temp] (windows)
|
||
|
* where [Temp] is the windows temp directory.
|
||
|
* (3) On unix, the TMPDIR variable is ignored. No rewriting
|
||
|
* of temp directories is permitted.
|
||
|
* (4) There are four cases for the input:
|
||
|
* (a) %dir is a directory and %fname is defined: result is a full path
|
||
|
* (b) %dir is a directory and %fname is null: result is a directory
|
||
|
* (c) %dir is a full path and %fname is null: result is a full path
|
||
|
* (d) %dir is null or an empty string: start in the current dir;
|
||
|
* result is a full path
|
||
|
* (5) In all cases, the resulting pathname is not terminated with a slash
|
||
|
* (6) The caller is responsible for freeing the returned pathname.
|
||
|
* </pre>
|
||
|
*/
|
||
|
char *
|
||
|
genPathname(const char *dir,
|
||
|
const char *fname)
|
||
|
{
|
||
|
l_int32 is_win32 = FALSE;
|
||
|
char *cdir, *pathout;
|
||
|
l_int32 dirlen, namelen, size;
|
||
|
|
||
|
PROCNAME("genPathname");
|
||
|
|
||
|
if (!dir && !fname)
|
||
|
return (char *)ERROR_PTR("no input", procName, NULL);
|
||
|
|
||
|
/* Handle the case where we start from the current directory */
|
||
|
if (!dir || dir[0] == '\0') {
|
||
|
if ((cdir = getcwd(NULL, 0)) == NULL)
|
||
|
return (char *)ERROR_PTR("no current dir found", procName, NULL);
|
||
|
} else {
|
||
|
cdir = stringNew(dir);
|
||
|
}
|
||
|
|
||
|
/* Convert to unix path separators, and remove the trailing
|
||
|
* slash in the directory, except when dir == "/" */
|
||
|
convertSepCharsInPath(cdir, UNIX_PATH_SEPCHAR);
|
||
|
dirlen = strlen(cdir);
|
||
|
if (cdir[dirlen - 1] == '/' && dirlen != 1) {
|
||
|
cdir[dirlen - 1] = '\0';
|
||
|
dirlen--;
|
||
|
}
|
||
|
|
||
|
namelen = (fname) ? strlen(fname) : 0;
|
||
|
size = dirlen + namelen + 256;
|
||
|
if ((pathout = (char *)LEPT_CALLOC(size, sizeof(char))) == NULL) {
|
||
|
LEPT_FREE(cdir);
|
||
|
return (char *)ERROR_PTR("pathout not made", procName, NULL);
|
||
|
}
|
||
|
|
||
|
#ifdef _WIN32
|
||
|
is_win32 = TRUE;
|
||
|
#endif /* _WIN32 */
|
||
|
|
||
|
/* First handle %dir (which may be a full pathname).
|
||
|
* There is no path rewriting on unix, and on win32, we do not
|
||
|
* rewrite unless the specified directory is /tmp or
|
||
|
* a subdirectory of /tmp */
|
||
|
if (!is_win32 || dirlen < 4 ||
|
||
|
(dirlen == 4 && strncmp(cdir, "/tmp", 4) != 0) || /* not in "/tmp" */
|
||
|
(dirlen > 4 && strncmp(cdir, "/tmp/", 5) != 0)) { /* not in "/tmp/" */
|
||
|
stringCopy(pathout, cdir, dirlen);
|
||
|
} else { /* Rewrite for win32 with "/tmp" specified for the directory. */
|
||
|
#ifdef _WIN32
|
||
|
l_int32 tmpdirlen;
|
||
|
char tmpdir[MAX_PATH];
|
||
|
GetTempPath(sizeof(tmpdir), tmpdir); /* get the windows temp dir */
|
||
|
tmpdirlen = strlen(tmpdir);
|
||
|
if (tmpdirlen > 0 && tmpdir[tmpdirlen - 1] == '\\') {
|
||
|
tmpdir[tmpdirlen - 1] = '\0'; /* trim the trailing '\' */
|
||
|
}
|
||
|
tmpdirlen = strlen(tmpdir);
|
||
|
stringCopy(pathout, tmpdir, tmpdirlen);
|
||
|
|
||
|
/* Add the rest of cdir */
|
||
|
if (dirlen > 4)
|
||
|
stringCat(pathout, size, cdir + 4);
|
||
|
#endif /* _WIN32 */
|
||
|
}
|
||
|
|
||
|
/* Now handle %fname */
|
||
|
if (fname && strlen(fname) > 0) {
|
||
|
dirlen = strlen(pathout);
|
||
|
pathout[dirlen] = '/';
|
||
|
strncat(pathout, fname, namelen);
|
||
|
}
|
||
|
|
||
|
LEPT_FREE(cdir);
|
||
|
return pathout;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief makeTempDirname()
|
||
|
*
|
||
|
* \param[in] result preallocated on stack or heap and passed in
|
||
|
* \param[in] nbytes size of %result array, in bytes
|
||
|
* \param[in] subdir [optional]; can be NULL or an empty string
|
||
|
* \return 0 if OK, 1 on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This generates the directory path for output temp files,
|
||
|
* written into %result with unix separators.
|
||
|
* (2) Caller allocates %result, large enough to hold the path,
|
||
|
* which is:
|
||
|
* /tmp/%subdir (unix)
|
||
|
* [Temp]/%subdir (windows, mac, ios)
|
||
|
* where [Temp] is a path determined
|
||
|
* - on windows, mac: by GetTempPath()
|
||
|
* - on ios: by confstr() (see man page)
|
||
|
* and %subdir is in general a set of nested subdirectories:
|
||
|
* dir1/dir2/.../dirN
|
||
|
* which in use would not typically exceed 2 levels.
|
||
|
* (3) Usage example:
|
||
|
* \code
|
||
|
* char result[256];
|
||
|
* makeTempDirname(result, sizeof(result), "lept/golden");
|
||
|
* \endcode
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_ok
|
||
|
makeTempDirname(char *result,
|
||
|
size_t nbytes,
|
||
|
const char *subdir)
|
||
|
{
|
||
|
char *dir, *path;
|
||
|
l_int32 ret = 0;
|
||
|
size_t pathlen;
|
||
|
|
||
|
PROCNAME("makeTempDirname");
|
||
|
|
||
|
if (!result)
|
||
|
return ERROR_INT("result not defined", procName, 1);
|
||
|
if (subdir && ((subdir[0] == '.') || (subdir[0] == '/')))
|
||
|
return ERROR_INT("subdir not an actual subdirectory", procName, 1);
|
||
|
|
||
|
memset(result, 0, nbytes);
|
||
|
|
||
|
#ifdef OS_IOS
|
||
|
{
|
||
|
size_t n = confstr(_CS_DARWIN_USER_TEMP_DIR, result, nbytes);
|
||
|
if (n == 0) {
|
||
|
L_ERROR("failed to find tmp dir, %s\n", procName, strerror(errno));
|
||
|
return 1;
|
||
|
} else if (n > nbytes) {
|
||
|
return ERROR_INT("result array too small for path\n", procName, 1);
|
||
|
}
|
||
|
dir = pathJoin(result, subdir);
|
||
|
}
|
||
|
#else
|
||
|
dir = pathJoin("/tmp", subdir);
|
||
|
#endif /* ~ OS_IOS */
|
||
|
|
||
|
#ifndef _WIN32
|
||
|
path = stringNew(dir);
|
||
|
#else
|
||
|
path = genPathname(dir, NULL);
|
||
|
#endif /* ~ _WIN32 */
|
||
|
pathlen = strlen(path);
|
||
|
if (pathlen < nbytes - 1) {
|
||
|
strncpy(result, path, pathlen);
|
||
|
} else {
|
||
|
L_ERROR("result array too small for path\n", procName);
|
||
|
ret = 1;
|
||
|
}
|
||
|
|
||
|
LEPT_FREE(dir);
|
||
|
LEPT_FREE(path);
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief modifyTrailingSlash()
|
||
|
*
|
||
|
* \param[in] path preallocated on stack or heap and passed in
|
||
|
* \param[in] nbytes size of %path array, in bytes
|
||
|
* \param[in] flag L_ADD_TRAIL_SLASH or L_REMOVE_TRAIL_SLASH
|
||
|
* \return 0 if OK, 1 on error
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) This carries out the requested action if necessary.
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_ok
|
||
|
modifyTrailingSlash(char *path,
|
||
|
size_t nbytes,
|
||
|
l_int32 flag)
|
||
|
{
|
||
|
char lastchar;
|
||
|
size_t len;
|
||
|
|
||
|
PROCNAME("modifyTrailingSlash");
|
||
|
|
||
|
if (!path)
|
||
|
return ERROR_INT("path not defined", procName, 1);
|
||
|
if (flag != L_ADD_TRAIL_SLASH && flag != L_REMOVE_TRAIL_SLASH)
|
||
|
return ERROR_INT("invalid flag", procName, 1);
|
||
|
|
||
|
len = strlen(path);
|
||
|
lastchar = path[len - 1];
|
||
|
if (flag == L_ADD_TRAIL_SLASH && lastchar != '/' && len < nbytes - 2) {
|
||
|
path[len] = '/';
|
||
|
path[len + 1] = '\0';
|
||
|
} else if (flag == L_REMOVE_TRAIL_SLASH && lastchar == '/') {
|
||
|
path[len - 1] = '\0';
|
||
|
}
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief l_makeTempFilename()
|
||
|
*
|
||
|
* \return fname : heap allocated filename; returns NULL on failure.
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) On unix, this makes a filename of the form
|
||
|
* "/tmp/lept.XXXXXX",
|
||
|
* where each X is a random character.
|
||
|
* (2) On windows, this makes a filename of the form
|
||
|
* "/[Temp]/lp.XXXXXX".
|
||
|
* (3) On all systems, this fails if the file is not writable.
|
||
|
* (4) Safest usage is to write to a subdirectory in debug code.
|
||
|
* (5) The returned filename must be freed by the caller, using lept_free.
|
||
|
* (6) The tail of the filename has a '.', so that cygwin interprets
|
||
|
* the file as having an extension. Otherwise, cygwin assumes it
|
||
|
* is an executable and appends ".exe" to the filename.
|
||
|
* (7) On unix, whenever possible use tmpfile() instead. tmpfile()
|
||
|
* hides the file name, returns a stream opened for write,
|
||
|
* and deletes the temp file when the stream is closed.
|
||
|
* </pre>
|
||
|
*/
|
||
|
char *
|
||
|
l_makeTempFilename()
|
||
|
{
|
||
|
char dirname[240];
|
||
|
|
||
|
PROCNAME("l_makeTempFilename");
|
||
|
|
||
|
if (makeTempDirname(dirname, sizeof(dirname), NULL) == 1)
|
||
|
return (char *)ERROR_PTR("failed to make dirname", procName, NULL);
|
||
|
|
||
|
#ifndef _WIN32
|
||
|
{
|
||
|
char *pattern;
|
||
|
l_int32 fd;
|
||
|
pattern = stringConcatNew(dirname, "/lept.XXXXXX", NULL);
|
||
|
fd = mkstemp(pattern);
|
||
|
if (fd == -1) {
|
||
|
LEPT_FREE(pattern);
|
||
|
return (char *)ERROR_PTR("mkstemp failed", procName, NULL);
|
||
|
}
|
||
|
close(fd);
|
||
|
return pattern;
|
||
|
}
|
||
|
#else
|
||
|
{
|
||
|
char fname[MAX_PATH];
|
||
|
FILE *fp;
|
||
|
if (GetTempFileName(dirname, "lp.", 0, fname) == 0)
|
||
|
return (char *)ERROR_PTR("GetTempFileName failed", procName, NULL);
|
||
|
if ((fp = fopen(fname, "wb")) == NULL)
|
||
|
return (char *)ERROR_PTR("file cannot be written to", procName, NULL);
|
||
|
fclose(fp);
|
||
|
return stringNew(fname);
|
||
|
}
|
||
|
#endif /* ~ _WIN32 */
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
* \brief extractNumberFromFilename()
|
||
|
*
|
||
|
* \param[in] fname
|
||
|
* \param[in] numpre number of characters before the digits to be found
|
||
|
* \param[in] numpost number of characters after the digits to be found
|
||
|
* \return num number embedded in the filename; -1 on error or if
|
||
|
* not found
|
||
|
*
|
||
|
* <pre>
|
||
|
* Notes:
|
||
|
* (1) The number is to be found in the basename, which is the
|
||
|
* filename without either the directory or the last extension.
|
||
|
* (2) When a number is found, it is non-negative. If no number
|
||
|
* is found, this returns -1, without an error message. The
|
||
|
* caller needs to check.
|
||
|
* </pre>
|
||
|
*/
|
||
|
l_int32
|
||
|
extractNumberFromFilename(const char *fname,
|
||
|
l_int32 numpre,
|
||
|
l_int32 numpost)
|
||
|
{
|
||
|
char *tail, *basename;
|
||
|
l_int32 len, nret, num;
|
||
|
|
||
|
PROCNAME("extractNumberFromFilename");
|
||
|
|
||
|
if (!fname)
|
||
|
return ERROR_INT("fname not defined", procName, -1);
|
||
|
|
||
|
splitPathAtDirectory(fname, NULL, &tail);
|
||
|
splitPathAtExtension(tail, &basename, NULL);
|
||
|
LEPT_FREE(tail);
|
||
|
|
||
|
len = strlen(basename);
|
||
|
if (numpre + numpost > len - 1) {
|
||
|
LEPT_FREE(basename);
|
||
|
return ERROR_INT("numpre + numpost too big", procName, -1);
|
||
|
}
|
||
|
|
||
|
basename[len - numpost] = '\0';
|
||
|
nret = sscanf(basename + numpre, "%d", &num);
|
||
|
LEPT_FREE(basename);
|
||
|
|
||
|
if (nret == 1)
|
||
|
return num;
|
||
|
else
|
||
|
return -1; /* not found */
|
||
|
}
|