/*====================================================================* - Copyright (C) 2001 Leptonica. All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials - provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *====================================================================*/ /*! * \file utils2.c *
* * ------------------------------------------ * This file has these utilities: * - safe string operations * - find/replace operations on strings * - read/write between file and memory * - multi-platform file and directory operations * - file name operations * ------------------------------------------ * * Safe string procs * char *stringNew() * l_int32 stringCopy() * l_int32 stringCopySegment() * l_int32 stringReplace() * l_int32 stringLength() * l_int32 stringCat() * char *stringConcatNew() * char *stringJoin() * l_int32 stringJoinIP() * char *stringReverse() * char *strtokSafe() * l_int32 stringSplitOnToken() * * Find and replace string and array procs * l_int32 stringCheckForChars() * char *stringRemoveChars() * char *stringReplaceEachSubstr() * char *stringReplaceSubstr() * L_DNA *stringFindEachSubstr() * l_int32 stringFindSubstr() * l_uint8 *arrayReplaceEachSequence() * L_DNA *arrayFindEachSequence() * l_int32 arrayFindSequence() * * Safe realloc * void *reallocNew() * * Read and write between file and memory * l_uint8 *l_binaryRead() * l_uint8 *l_binaryReadStream() * l_uint8 *l_binaryReadSelect() * l_uint8 *l_binaryReadSelectStream() * l_int32 l_binaryWrite() * l_int32 nbytesInFile() * l_int32 fnbytesInFile() * * Copy and compare in memory * l_uint8 *l_binaryCopy() * l_uint8 *l_binaryCompare() * * File copy operations * l_int32 fileCopy() * l_int32 fileConcatenate() * l_int32 fileAppendString() * * Multi-platform functions for opening file streams * FILE *fopenReadStream() * FILE *fopenWriteStream() * FILE *fopenReadFromMemory() * * Opening a windows tmpfile for writing * FILE *fopenWriteWinTempfile() * * Multi-platform functions that avoid C-runtime boundary crossing * with Windows DLLs * FILE *lept_fopen() * l_int32 lept_fclose() * void lept_calloc() * void lept_free() * * Multi-platform file system operations in temp directories * l_int32 lept_mkdir() * l_int32 lept_rmdir() * l_int32 lept_direxists() * l_int32 lept_mv() * l_int32 lept_rm_match() * l_int32 lept_rm() * l_int32 lept_rmfile() * l_int32 lept_cp() * * Special debug/test function for calling 'system' * void callSystemDebug() * * General file name operations * l_int32 splitPathAtDirectory() * l_int32 splitPathAtExtension() * char *pathJoin() * char *appendSubdirs() * * Special file name operations * l_int32 convertSepCharsInPath() * char *genPathname() * l_int32 makeTempDirname() * l_int32 modifyTrailingSlash() * char *l_makeTempFilename() * l_int32 extractNumberFromFilename() * * * Notes on multi-platform development * ----------------------------------- * This is important: * (1) With the exception of splitPathAtDirectory(), splitPathAtExtension() * and genPathname(), all input pathnames must have unix separators. * (2) On Windows, when you specify a read or write to "/tmp/...", * the filename is rewritten to use the Windows temp directory: * /tmp ==> [Temp]... (windows) * (3) This filename rewrite, along with the conversion from unix * to windows pathnames, happens in genPathname(). * (4) Use fopenReadStream() and fopenWriteStream() to open files, * because these use genPathname() to find the platform-dependent * filenames. Likewise for l_binaryRead() and l_binaryWrite(). * (5) For moving, copying and removing files and directories that are in * subdirectories of /tmp, use the lept_*() file system shell wrappers: * lept_mkdir(), lept_rmdir(), lept_mv(), lept_rm() and lept_cp(). * (6) Use the lept_*() C library wrappers. These work properly on * Windows, where the same DLL must perform complementary operations * on file streams (open/close) and heap memory (malloc/free): * lept_fopen(), lept_fclose(), lept_calloc() and lept_free(). * (7) Why read and write files to temp directories? * The library needs the ability to read and write ephemeral * files to default places, both for generating debugging output * and for supporting regression tests. Applications also need * this ability for debugging. * (8) Why do the pathname rewrite on Windows? * The goal is to have the library, and programs using the library, * run on multiple platforms without changes. The location of * temporary files depends on the platform as well as the user's * configuration. Temp files on Windows are in some directory * not known a priori. To make everything work seamlessly on * Windows, every time you open a file for reading or writing, * use a special function such as fopenReadStream() or * fopenWriteStream(); these call genPathname() to ensure that * if it is a temp file, the correct path is used. To indicate * that this is a temp file, the application is written with the * root directory of the path in a canonical form: "/tmp". * (9) Why is it that multi-platform directory functions like lept_mkdir() * and lept_rmdir(), as well as associated file functions like * lept_rm(), lept_mv() and lept_cp(), only work in the temp dir? * These functions were designed to provide easy manipulation of * temp files. The restriction to temp files is for safety -- to * prevent an accidental deletion of important files. For example, * lept_rmdir() first deletes all files in a specified subdirectory * of temp, and then removes the directory. * **/ #ifdef HAVE_CONFIG_H #include "config_auto.h" #endif /* HAVE_CONFIG_H */ #ifdef _MSC_VER #include
* Notes: * (1) Relatively safe wrapper for strncpy, that checks the input, * and does not complain if %src is null or %n < 1. * If %n < 1, this is a no-op. * (2) %dest needs to be at least %n bytes in size. * (3) We don't call strncpy() because valgrind complains about * use of uninitialized values. **/ l_ok stringCopy(char *dest, const char *src, l_int32 n) { l_int32 i; PROCNAME("stringCopy"); if (!dest) return ERROR_INT("dest not defined", procName, 1); if (!src || n < 1) return 0; /* Implementation of strncpy that valgrind doesn't complain about */ for (i = 0; i < n && src[i] != '\0'; i++) dest[i] = src[i]; for (; i < n; i++) dest[i] = '\0'; return 0; } /*! * \brief stringCopySegment() * * * \param[in] src string * \param[in] start byte position at start of segment * \param[in] nbytes number of bytes in the segment; use 0 to go to end * \return copy of segment, or NULL on error * *
* Notes: * (1) This is a variant of stringNew() that makes a new string * from a segment of the input string. The segment is specified * by the starting position and the number of bytes. * (2) The start location %start must be within the string %src. * (3) The copy is truncated to the end of the source string. * Use %nbytes = 0 to copy to the end of %src. **/ char * stringCopySegment(const char *src, l_int32 start, l_int32 nbytes) { char *dest; l_int32 len; PROCNAME("stringCopySegment"); if (!src) return (char *)ERROR_PTR("src not defined", procName, NULL); len = strlen(src); if (start < 0 || start > len - 1) return (char *)ERROR_PTR("invalid start", procName, NULL); if (nbytes <= 0) /* copy to the end */ nbytes = len - start; if (start + nbytes > len) /* truncate to the end */ nbytes = len - start; if ((dest = (char *)LEPT_CALLOC(nbytes + 1, sizeof(char))) == NULL) return (char *)ERROR_PTR("dest not made", procName, NULL); stringCopy(dest, src + start, nbytes); return dest; } /*! * \brief stringReplace() * * \param[out] pdest string copy * \param[in] src [optional] string; can be null * \return 0 if OK; 1 on error * *
* Notes: * (1) Frees any existing dest string * (2) Puts a copy of src string in the dest * (3) If either or both strings are null, does something reasonable. **/ l_ok stringReplace(char **pdest, const char *src) { PROCNAME("stringReplace"); if (!pdest) return ERROR_INT("pdest not defined", procName, 1); if (*pdest) LEPT_FREE(*pdest); if (src) *pdest = stringNew(src); else *pdest = NULL; return 0; } /*! * \brief stringLength() * * \param[in] src string can be null or NULL-terminated string * \param[in] size size of src buffer * \return length of src in bytes. * *
* Notes: * (1) Safe implementation of strlen that only checks size bytes * for trailing NUL. * (2) Valid returned string lengths are between 0 and size - 1. * If size bytes are checked without finding a NUL byte, then * an error is indicated by returning size. **/ l_int32 stringLength(const char *src, size_t size) { l_int32 i; PROCNAME("stringLength"); if (!src) return ERROR_INT("src not defined", procName, 0); if (size < 1) return 0; for (i = 0; i < size; i++) { if (src[i] == '\0') return i; } return size; /* didn't find a NUL byte */ } /*! * \brief stringCat() * * \param[in] dest null-terminated byte buffer * \param[in] size size of dest * \param[in] src string can be null or NULL-terminated string * \return number of bytes added to dest; -1 on error * *
* Notes: * (1) Alternative implementation of strncat, that checks the input, * is easier to use (since the size of the dest buffer is specified * rather than the number of bytes to copy), and does not complain * if %src is null. * (2) Never writes past end of dest. * (3) If it can't append src (an error), it does nothing. * (4) N.B. The order of 2nd and 3rd args is reversed from that in * strncat, as in the Windows function strcat_s(). **/ l_int32 stringCat(char *dest, size_t size, const char *src) { l_int32 i, n; l_int32 lendest, lensrc; PROCNAME("stringCat"); if (!dest) return ERROR_INT("dest not defined", procName, -1); if (size < 1) return ERROR_INT("size < 1; too small", procName, -1); if (!src) return 0; lendest = stringLength(dest, size); if (lendest == size) return ERROR_INT("no terminating nul byte", procName, -1); lensrc = stringLength(src, size); if (lensrc == 0) return 0; n = (lendest + lensrc > size - 1 ? size - lendest - 1 : lensrc); if (n < 1) return ERROR_INT("dest too small for append", procName, -1); for (i = 0; i < n; i++) dest[lendest + i] = src[i]; dest[lendest + n] = '\0'; return n; } /*! * \brief stringConcatNew() * * \param[in] first first string in list * \param[in] ... NULL-terminated list of strings * \return result new string concatenating the input strings, or * NULL if first == NULL * *
* Notes: * (1) The last arg in the list of strings must be NULL. * (2) Caller must free the returned string. **/ char * stringConcatNew(const char *first, ...) { size_t len; char *result, *ptr; const char *arg; va_list args; if (!first) return NULL; /* Find the length of the output string */ va_start(args, first); len = strlen(first); while ((arg = va_arg(args, const char *)) != NULL) len += strlen(arg); va_end(args); result = (char *)LEPT_CALLOC(len + 1, sizeof(char)); /* Concatenate the args */ va_start(args, first); ptr = result; arg = first; while (*arg) *ptr++ = *arg++; while ((arg = va_arg(args, const char *)) != NULL) { while (*arg) *ptr++ = *arg++; } va_end(args); return result; } /*! * \brief stringJoin() * * \param[in] src1 [optional] string; can be null * \param[in] src2 [optional] string; can be null * \return concatenated string, or NULL on error * *
* Notes: * (1) This is a safe version of strcat; it makes a new string. * (2) It is not an error if either or both of the strings * are empty, or if either or both of the pointers are null. **/ char * stringJoin(const char *src1, const char *src2) { char *dest; l_int32 srclen1, srclen2, destlen; PROCNAME("stringJoin"); srclen1 = (src1) ? strlen(src1) : 0; srclen2 = (src2) ? strlen(src2) : 0; destlen = srclen1 + srclen2 + 3; if ((dest = (char *)LEPT_CALLOC(destlen, sizeof(char))) == NULL) return (char *)ERROR_PTR("calloc fail for dest", procName, NULL); if (src1) stringCopy(dest, src1, srclen1); if (src2) strncat(dest, src2, srclen2); return dest; } /*! * \brief stringJoinIP() * * \param[in,out] psrc1 address of string src1; cannot be on the stack * \param[in] src2 [optional] string; can be null * \return 0 if OK, 1 on error * *
* Notes: * (1) This is a safe in-place version of strcat. The contents of * src1 is replaced by the concatenation of src1 and src2. * (2) It is not an error if either or both of the strings * are empty (""), or if the pointers to the strings (*psrc1, src2) * are null. * (3) src1 should be initialized to null or an empty string * before the first call. Use one of these: * char *src1 = NULL; * char *src1 = stringNew(""); * Then call with: * stringJoinIP(&src1, src2); * (4) This can also be implemented as a macro: * \code * #define stringJoinIP(src1, src2) \ * {tmpstr = stringJoin((src1),(src2)); \ * LEPT_FREE(src1); \ * (src1) = tmpstr;} * \endcode * (5) Another function to consider for joining many strings is * stringConcatNew(). **/ l_ok stringJoinIP(char **psrc1, const char *src2) { char *tmpstr; PROCNAME("stringJoinIP"); if (!psrc1) return ERROR_INT("&src1 not defined", procName, 1); tmpstr = stringJoin(*psrc1, src2); LEPT_FREE(*psrc1); *psrc1 = tmpstr; return 0; } /*! * \brief stringReverse() * * \param[in] src string * \return dest newly-allocated reversed string */ char * stringReverse(const char *src) { char *dest; l_int32 i, len; PROCNAME("stringReverse"); if (!src) return (char *)ERROR_PTR("src not defined", procName, NULL); len = strlen(src); if ((dest = (char *)LEPT_CALLOC(len + 1, sizeof(char))) == NULL) return (char *)ERROR_PTR("calloc fail for dest", procName, NULL); for (i = 0; i < len; i++) dest[i] = src[len - 1 - i]; return dest; } /*! * \brief strtokSafe() * * \param[in] cstr input string to be sequentially parsed; * use NULL after the first call * \param[in] seps a string of character separators * \param[out] psaveptr ptr to the next char after * the last encountered separator * \return substr a new string that is copied from the previous * saveptr up to but not including the next * separator character, or NULL if end of cstr. * *
* Notes: * (1) This is a thread-safe implementation of strtok. * (2) It has the same interface as strtok_r. * (3) It differs from strtok_r in usage in two respects: * (a) the input string is not altered * (b) each returned substring is newly allocated and must * be freed after use. * (4) Let me repeat that. This is "safe" because the input * string is not altered and because each returned string * is newly allocated on the heap. * (5) It is here because, surprisingly, some C libraries don't * include strtok_r. * (6) Important usage points: * ~ Input the string to be parsed on the first invocation. * ~ Then input NULL after that; the value returned in saveptr * is used in all subsequent calls. * (7) This is only slightly slower than strtok_r. **/ char * strtokSafe(char *cstr, const char *seps, char **psaveptr) { char nextc; char *start, *substr; l_int32 istart, i, j, nchars; PROCNAME("strtokSafe"); if (!seps) return (char *)ERROR_PTR("seps not defined", procName, NULL); if (!psaveptr) return (char *)ERROR_PTR("&saveptr not defined", procName, NULL); if (!cstr) { start = *psaveptr; } else { start = cstr; *psaveptr = NULL; } if (!start) /* nothing to do */ return NULL; /* First time, scan for the first non-sep character */ istart = 0; if (cstr) { for (istart = 0;; istart++) { if ((nextc = start[istart]) == '\0') { *psaveptr = NULL; /* in case caller doesn't check ret value */ return NULL; } if (!strchr(seps, nextc)) break; } } /* Scan through, looking for a sep character; if none is * found, 'i' will be at the end of the string. */ for (i = istart;; i++) { if ((nextc = start[i]) == '\0') break; if (strchr(seps, nextc)) break; } /* Save the substring */ nchars = i - istart; substr = (char *)LEPT_CALLOC(nchars + 1, sizeof(char)); stringCopy(substr, start + istart, nchars); /* Look for the next non-sep character. * If this is the last substring, return a null saveptr. */ for (j = i;; j++) { if ((nextc = start[j]) == '\0') { *psaveptr = NULL; /* no more non-sep characters */ break; } if (!strchr(seps, nextc)) { *psaveptr = start + j; /* start here on next call */ break; } } return substr; } /*! * \brief stringSplitOnToken() * * \param[in] cstr input string to be split; not altered * \param[in] seps a string of character separators * \param[out] phead ptr to copy of the input string, up to * the first separator token encountered * \param[out] ptail ptr to copy of the part of the input string * starting with the first non-separator character * that occurs after the first separator is found * \return 0 if OK, 1 on error * *
* Notes: * (1) The input string is not altered; all split parts are new strings. * (2) The split occurs around the first consecutive sequence of * tokens encountered. * (3) The head goes from the beginning of the string up to * but not including the first token found. * (4) The tail contains the second part of the string, starting * with the first char in that part that is NOT a token. * (5) If no separator token is found, 'head' contains a copy * of the input string and 'tail' is null. **/ l_ok stringSplitOnToken(char *cstr, const char *seps, char **phead, char **ptail) { char *saveptr; PROCNAME("stringSplitOnToken"); if (!phead) return ERROR_INT("&head not defined", procName, 1); if (!ptail) return ERROR_INT("&tail not defined", procName, 1); *phead = *ptail = NULL; if (!cstr) return ERROR_INT("cstr not defined", procName, 1); if (!seps) return ERROR_INT("seps not defined", procName, 1); *phead = strtokSafe(cstr, seps, &saveptr); if (saveptr) *ptail = stringNew(saveptr); return 0; } /*--------------------------------------------------------------------* * Find and replace procs * *--------------------------------------------------------------------*/ /*! * \brief stringCheckForChars() * * \param[in] src input string; can be of zero length * \param[in] chars string of chars to be searched for in %src * \param[out] pfound 1 if any characters are found; 0 otherwise * \return 0 if OK, 1 on error * *
* Notes: * (1) This can be used to sanitize an operation by checking for * special characters that don't belong in a string. **/ l_ok stringCheckForChars(const char *src, const char *chars, l_int32 *pfound) { char ch; l_int32 i, n; PROCNAME("stringCheckForChars"); if (!pfound) return ERROR_INT("&found not defined", procName, 1); *pfound = FALSE; if (!src || !chars) return ERROR_INT("src and chars not both defined", procName, 1); n = strlen(src); for (i = 0; i < n; i++) { ch = src[i]; if (strchr(chars, ch)) { *pfound = TRUE; break; } } return 0; } /*! * \brief stringRemoveChars() * * \param[in] src input string; can be of zero length * \param[in] remchars string of chars to be removed from src * \return dest string with specified chars removed, or NULL on error */ char * stringRemoveChars(const char *src, const char *remchars) { char ch; char *dest; l_int32 nsrc, i, k; PROCNAME("stringRemoveChars"); if (!src) return (char *)ERROR_PTR("src not defined", procName, NULL); if (!remchars) return stringNew(src); if ((dest = (char *)LEPT_CALLOC(strlen(src) + 1, sizeof(char))) == NULL) return (char *)ERROR_PTR("dest not made", procName, NULL); nsrc = strlen(src); for (i = 0, k = 0; i < nsrc; i++) { ch = src[i]; if (!strchr(remchars, ch)) dest[k++] = ch; } return dest; } /*! * \brief stringReplaceEachSubstr() * * \param[in] src input string; can be of zero length * \param[in] sub1 substring to be replaced * \param[in] sub2 substring to put in; can be "" * \param[out] pcount [optional] the number of times that sub1 * is found in src; 0 if not found * \return dest string with substring replaced, or NULL if the * substring not found or on error. * *
* Notes: * (1) This is a wrapper for simple string substitution that uses * the more general function arrayReplaceEachSequence(). * (2) This finds every non-overlapping occurrence of %sub1 in * %src, and replaces it with %sub2. By "non-overlapping" * we mean that after it finds each match, it removes the * matching characters, replaces with the substitution string * (if not empty), and continues. For example, if you replace * 'aa' by 'X' in 'baaabbb', you find one match at position 1 * and return 'bXabbb'. * (3) To only remove each instance of sub1, use "" for sub2 * (4) Returns a copy of %src if sub1 and sub2 are the same. * (5) If the input %src is binary data that can have null characters, * use arrayReplaceEachSequence() directly. **/ char * stringReplaceEachSubstr(const char *src, const char *sub1, const char *sub2, l_int32 *pcount) { size_t datalen; PROCNAME("stringReplaceEachSubstr"); if (pcount) *pcount = 0; if (!src || !sub1 || !sub2) return (char *)ERROR_PTR("src, sub1, sub2 not all defined", procName, NULL); if (strlen(sub2) > 0) { return (char *)arrayReplaceEachSequence( (const l_uint8 *)src, strlen(src), (const l_uint8 *)sub1, strlen(sub1), (const l_uint8 *)sub2, strlen(sub2), &datalen, pcount); } else { /* empty replacement string; removal only */ return (char *)arrayReplaceEachSequence( (const l_uint8 *)src, strlen(src), (const l_uint8 *)sub1, strlen(sub1), NULL, 0, &datalen, pcount); } } /*! * \brief stringReplaceSubstr() * * \param[in] src input string; can be of zero length * \param[in] sub1 substring to be replaced * \param[in] sub2 substring to put in; can be "" * \param[in,out] ploc [optional] input start location for search; * returns the loc after replacement * \param[out] pfound [optional] 1 if sub1 is found; 0 otherwise * \return dest string with substring replaced, or NULL on error. * *
* Notes: * (1) Replaces the first instance. * (2) To remove sub1 without replacement, use "" for sub2. * (3) Returns a copy of %src if either no instance of %sub1 is found, * or if %sub1 and %sub2 are the same. * (4) If %ploc == NULL, the search will start at the beginning of %src. * If %ploc != NULL, *ploc must be initialized to the byte offset * within %src from which the search starts. To search the * string from the beginning, set %loc = 0 and input &loc. * After finding %sub1 and replacing it with %sub2, %loc will be * returned as the next position after %sub2 in the output string. * (5) Note that the output string also includes all the characters * from the input string that occur after the single substitution. **/ char * stringReplaceSubstr(const char *src, const char *sub1, const char *sub2, l_int32 *ploc, l_int32 *pfound) { const char *ptr; char *dest; l_int32 nsrc, nsub1, nsub2, len, npre, loc; PROCNAME("stringReplaceSubstr"); if (pfound) *pfound = 0; if (!src || !sub1 || !sub2) return (char *)ERROR_PTR("src, sub1, sub2 not all defined", procName, NULL); if (ploc) loc = *ploc; else loc = 0; if (!strcmp(sub1, sub2)) return stringNew(src); if ((ptr = strstr(src + loc, sub1)) == NULL) return stringNew(src); if (pfound) *pfound = 1; nsrc = strlen(src); nsub1 = strlen(sub1); nsub2 = strlen(sub2); len = nsrc + nsub2 - nsub1; if ((dest = (char *)LEPT_CALLOC(len + 1, sizeof(char))) == NULL) return (char *)ERROR_PTR("dest not made", procName, NULL); npre = ptr - src; memcpy(dest, src, npre); strcpy(dest + npre, sub2); strcpy(dest + npre + nsub2, ptr + nsub1); if (ploc) *ploc = npre + nsub2; return dest; } /*! * \brief stringFindEachSubstr() * * \param[in] src input string; can be of zero length * \param[in] sub substring to be searched for * \return dna of offsets where the sequence is found, or NULL if * none are found or on error * *
* Notes: * (1) This finds every non-overlapping occurrence in %src of %sub. * After it finds each match, it moves forward in %src by the length * of %sub before continuing the search. So for example, * if you search for the sequence 'aa' in the data 'baaabbb', * you find one match at position 1. **/ L_DNA * stringFindEachSubstr(const char *src, const char *sub) { PROCNAME("stringFindEachSubstr"); if (!src || !sub) return (L_DNA *)ERROR_PTR("src, sub not both defined", procName, NULL); return arrayFindEachSequence((const l_uint8 *)src, strlen(src), (const l_uint8 *)sub, strlen(sub)); } /*! * \brief stringFindSubstr() * * \param[in] src input string; can be of zero length * \param[in] sub substring to be searched for; must not be empty * \param[out] ploc [optional] location of substring in src * \return 1 if found; 0 if not found or on error * *
* Notes: * (1) This is a wrapper around strstr(). It finds the first * instance of %sub in %src. If the substring is not found * and the location is returned, it has the value -1. * (2) Both %src and %sub must be defined, and %sub must have * length of at least 1. **/ l_int32 stringFindSubstr(const char *src, const char *sub, l_int32 *ploc) { const char *ptr; PROCNAME("stringFindSubstr"); if (ploc) *ploc = -1; if (!src || !sub) return ERROR_INT("src and sub not both defined", procName, 0); if (strlen(sub) == 0) return ERROR_INT("substring length 0", procName, 0); if (strlen(src) == 0) return 0; if ((ptr = strstr(src, sub)) == NULL) /* not found */ return 0; if (ploc) *ploc = ptr - src; return 1; } /*! * \brief arrayReplaceEachSequence() * * \param[in] datas source byte array * \param[in] dataslen length of source data, in bytes * \param[in] seq subarray of bytes to find in source data * \param[in] seqlen length of subarray, in bytes * \param[in] newseq replacement subarray; can be null * \param[in] newseqlen length of replacement subarray, in bytes * \param[out] pdatadlen length of dest byte array, in bytes * \param[out] pcount [optional] the number of times that sub1 * is found in src; 0 if not found * \return datad with all all subarrays replaced (or removed) * *
* Notes: * (1) The byte arrays %datas, %seq and %newseq are not C strings, * because they can contain null bytes. Therefore, for each * we must give the length of the array. * (2) If %newseq == NULL, this just removes all instances of %seq. * Otherwise, it replaces every non-overlapping occurrence of * %seq in %datas with %newseq. A new array %datad and its * size are returned. See arrayFindEachSequence() for more * details on finding non-overlapping occurrences. * (3) If no instances of %seq are found, this returns a copy of %datas. * (4) The returned %datad is null terminated. * (5) Can use stringReplaceEachSubstr() if using C strings. **/ l_uint8 * arrayReplaceEachSequence(const l_uint8 *datas, size_t dataslen, const l_uint8 *seq, size_t seqlen, const l_uint8 *newseq, size_t newseqlen, size_t *pdatadlen, l_int32 *pcount) { l_uint8 *datad; size_t newsize; l_int32 n, i, j, di, si, index, incr; L_DNA *da; PROCNAME("arrayReplaceEachSequence"); if (pcount) *pcount = 0; if (!datas || !seq) return (l_uint8 *)ERROR_PTR("datas & seq not both defined", procName, NULL); if (!pdatadlen) return (l_uint8 *)ERROR_PTR("&datadlen not defined", procName, NULL); *pdatadlen = 0; /* Identify the locations of the sequence. If there are none, * return a copy of %datas. */ if ((da = arrayFindEachSequence(datas, dataslen, seq, seqlen)) == NULL) { *pdatadlen = dataslen; return l_binaryCopy(datas, dataslen); } /* Allocate the output data; insure null termination */ n = l_dnaGetCount(da); if (pcount) *pcount = n; if (!newseq) newseqlen = 0; newsize = dataslen + n * (newseqlen - seqlen) + 4; if ((datad = (l_uint8 *)LEPT_CALLOC(newsize, sizeof(l_uint8))) == NULL) { l_dnaDestroy(&da); return (l_uint8 *)ERROR_PTR("datad not made", procName, NULL); } /* Replace each sequence instance with a new sequence */ l_dnaGetIValue(da, 0, &si); for (i = 0, di = 0, index = 0; i < dataslen; i++) { if (i == si) { index++; if (index < n) { l_dnaGetIValue(da, index, &si); incr = L_MIN(seqlen, si - i); /* amount to remove from datas */ } else { incr = seqlen; } i += incr - 1; /* jump over the matched sequence in datas */ if (newseq) { /* add new sequence to datad */ for (j = 0; j < newseqlen; j++) datad[di++] = newseq[j]; } } else { datad[di++] = datas[i]; } } *pdatadlen = di; l_dnaDestroy(&da); return datad; } /*! * \brief arrayFindEachSequence() * * \param[in] data byte array * \param[in] datalen length of data, in bytes * \param[in] sequence subarray of bytes to find in data * \param[in] seqlen length of sequence, in bytes * \return dna of offsets where the sequence is found, or NULL if * none are found or on error * *
* Notes: * (1) The byte arrays %data and %sequence are not C strings, * because they can contain null bytes. Therefore, for each * we must give the length of the array. * (2) This finds every non-overlapping occurrence in %data of %sequence. * After it finds each match, it moves forward by the length * of the sequence before continuing the search. So for example, * if you search for the sequence 'aa' in the data 'baaabbb', * you find one match at position 1. **/ L_DNA * arrayFindEachSequence(const l_uint8 *data, size_t datalen, const l_uint8 *sequence, size_t seqlen) { l_int32 start, offset, realoffset, found; L_DNA *da; PROCNAME("arrayFindEachSequence"); if (!data || !sequence) return (L_DNA *)ERROR_PTR("data & sequence not both defined", procName, NULL); da = l_dnaCreate(0); start = 0; while (1) { arrayFindSequence(data + start, datalen - start, sequence, seqlen, &offset, &found); if (found == FALSE) break; realoffset = start + offset; l_dnaAddNumber(da, realoffset); start = realoffset + seqlen; if (start >= datalen) break; } if (l_dnaGetCount(da) == 0) l_dnaDestroy(&da); return da; } /*! * \brief arrayFindSequence() * * \param[in] data byte array * \param[in] datalen length of data, in bytes * \param[in] sequence subarray of bytes to find in data * \param[in] seqlen length of sequence, in bytes * \param[out] poffset offset from beginning of * data where the sequence begins * \param[out] pfound 1 if sequence is found; 0 otherwise * \return 0 if OK, 1 on error * *
* Notes: * (1) The byte arrays 'data' and 'sequence' are not C strings, * because they can contain null bytes. Therefore, for each * we must give the length of the array. * (2) This searches for the first occurrence in %data of %sequence, * which consists of %seqlen bytes. The parameter %seqlen * must not exceed the actual length of the %sequence byte array. * (3) If the sequence is not found, the offset will be 0, so you * must check %found. **/ l_ok arrayFindSequence(const l_uint8 *data, size_t datalen, const l_uint8 *sequence, size_t seqlen, l_int32 *poffset, l_int32 *pfound) { l_int32 i, j, found, lastpos; PROCNAME("arrayFindSequence"); if (poffset) *poffset = 0; if (pfound) *pfound = FALSE; if (!data || !sequence) return ERROR_INT("data & sequence not both defined", procName, 1); if (!poffset || !pfound) return ERROR_INT("&offset and &found not defined", procName, 1); lastpos = datalen - seqlen + 1; found = FALSE; for (i = 0; i < lastpos; i++) { for (j = 0; j < seqlen; j++) { if (data[i + j] != sequence[j]) break; if (j == seqlen - 1) found = TRUE; } if (found == TRUE) break; } if (found == TRUE) { *poffset = i; *pfound = TRUE; } return 0; } /*--------------------------------------------------------------------* * Safe realloc * *--------------------------------------------------------------------*/ /*! * \brief reallocNew() * * \param[in,out] pindata nulls indata before reallocing * \param[in] oldsize size of input data to be copied, in bytes * \param[in] newsize size of buffer to be reallocated in bytes * \return ptr to new data, or NULL on error * * Action: !N.B. 3) and (4! * 1 Allocates memory, initialized to 0 * 2 Copies as much of the input data as possible * to the new block, truncating the copy if necessary * 3 Frees the input data * 4 Zeroes the input data ptr * *
* Notes: * (1) If newsize <=0, just frees input data and nulls ptr * (2) If input data is null, just callocs new memory * (3) This differs from realloc in that it always allocates * new memory (if newsize > 0) and initializes it to 0, * it requires the amount of old data to be copied, * and it takes the address of the input ptr and * nulls the handle. **/ void * reallocNew(void **pindata, l_int32 oldsize, l_int32 newsize) { l_int32 minsize; void *indata; void *newdata; PROCNAME("reallocNew"); if (!pindata) return ERROR_PTR("input data not defined", procName, NULL); indata = *pindata; if (newsize <= 0) { /* nonstandard usage */ if (indata) { LEPT_FREE(indata); *pindata = NULL; } return NULL; } if (!indata) { /* nonstandard usage */ if ((newdata = (void *)LEPT_CALLOC(1, newsize)) == NULL) return ERROR_PTR("newdata not made", procName, NULL); return newdata; } /* Standard usage */ if ((newdata = (void *)LEPT_CALLOC(1, newsize)) == NULL) return ERROR_PTR("newdata not made", procName, NULL); minsize = L_MIN(oldsize, newsize); memcpy(newdata, indata, minsize); LEPT_FREE(indata); *pindata = NULL; return newdata; } /*--------------------------------------------------------------------* * Read and write between file and memory * *--------------------------------------------------------------------*/ /*! * \brief l_binaryRead() * * \param[in] filename * \param[out] pnbytes number of bytes read * \return data, or NULL on error */ l_uint8 * l_binaryRead(const char *filename, size_t *pnbytes) { l_uint8 *data; FILE *fp; PROCNAME("l_binaryRead"); if (!pnbytes) return (l_uint8 *)ERROR_PTR("pnbytes not defined", procName, NULL); *pnbytes = 0; if (!filename) return (l_uint8 *)ERROR_PTR("filename not defined", procName, NULL); if ((fp = fopenReadStream(filename)) == NULL) return (l_uint8 *)ERROR_PTR("file stream not opened", procName, NULL); data = l_binaryReadStream(fp, pnbytes); fclose(fp); return data; } /*! * \brief l_binaryReadStream() * * \param[in] fp file stream opened to read; can be stdin * \param[out] pnbytes number of bytes read * \return null-terminated array, or NULL on error; reading 0 bytes * is not an error * *
* Notes: * (1) The returned array is terminated with a null byte so that it can * be used to read ascii data from a file into a proper C string. * (2) This can be used to capture data that is piped in via stdin, * because it does not require seeking within the file. * (3) For example, you can read an image from stdin into memory * using shell redirection, with one of these shell commands: * \code * cat*/ l_uint8 * l_binaryReadStream(FILE *fp, size_t *pnbytes) { l_uint8 *data; l_int32 seekable, navail, nadd, nread; L_BBUFFER *bb; PROCNAME("l_binaryReadStream"); if (!pnbytes) return (l_uint8 *)ERROR_PTR("&nbytes not defined", procName, NULL); *pnbytes = 0; if (!fp) return (l_uint8 *)ERROR_PTR("fp not defined", procName, NULL); /* Test if the stream is seekable, by attempting to seek to * the start of data. This is a no-op. If it is seekable, use * l_binaryReadSelectStream() to determine the size of the * data to be read in advance. */ seekable = (ftell(fp) == 0) ? 1 : 0; if (seekable) return l_binaryReadSelectStream(fp, 0, 0, pnbytes); /* If it is not seekable, use the bbuffer to realloc memory * as needed during reading. */ bb = bbufferCreate(NULL, 4096); while (1) { navail = bb->nalloc - bb->n; if (navail < 4096) { nadd = L_MAX(bb->nalloc, 4096); bbufferExtendArray(bb, nadd); } nread = fread((void *)(bb->array + bb->n), 1, 4096, fp); bb->n += nread; if (nread != 4096) break; } /* Copy the data to a new array sized for the data, because * the bbuffer array can be nearly twice the size we need. */ if ((data = (l_uint8 *)LEPT_CALLOC(bb->n + 1, sizeof(l_uint8))) != NULL) { memcpy(data, bb->array, bb->n); *pnbytes = bb->n; } else { L_ERROR("calloc fail for data\n", procName); } bbufferDestroy(&bb); return data; } /*! * \brief l_binaryReadSelect() * * \param[in] filename * \param[in] start first byte to read * \param[in] nbytes number of bytes to read; use 0 to read to end of file * \param[out] pnread number of bytes actually read * \return data, or NULL on error * *| readprog * readprog < * \endcode * where readprog is: * \code * l_uint8 *data = l_binaryReadStream(stdin, &nbytes); * Pix *pix = pixReadMem(data, nbytes); * \endcode *
* Notes: * (1) The returned array is terminated with a null byte so that it can * be used to read ascii data from a file into a proper C string. **/ l_uint8 * l_binaryReadSelect(const char *filename, size_t start, size_t nbytes, size_t *pnread) { l_uint8 *data; FILE *fp; PROCNAME("l_binaryReadSelect"); if (!pnread) return (l_uint8 *)ERROR_PTR("pnread not defined", procName, NULL); *pnread = 0; if (!filename) return (l_uint8 *)ERROR_PTR("filename not defined", procName, NULL); if ((fp = fopenReadStream(filename)) == NULL) return (l_uint8 *)ERROR_PTR("file stream not opened", procName, NULL); data = l_binaryReadSelectStream(fp, start, nbytes, pnread); fclose(fp); return data; } /*! * \brief l_binaryReadSelectStream() * * \param[in] fp file stream * \param[in] start first byte to read * \param[in] nbytes number of bytes to read; use 0 to read to end of file * \param[out] pnread number of bytes actually read * \return null-terminated array, or NULL on error; reading 0 bytes * is not an error * *
* Notes: * (1) The returned array is terminated with a null byte so that it can * be used to read ascii data from a file into a proper C string. * If the file to be read is empty and %start == 0, an array * with a single null byte is returned. * (2) Side effect: the stream pointer is re-positioned to the * beginning of the file. **/ l_uint8 * l_binaryReadSelectStream(FILE *fp, size_t start, size_t nbytes, size_t *pnread) { l_uint8 *data; size_t bytesleft, bytestoread, nread, filebytes; PROCNAME("l_binaryReadSelectStream"); if (!pnread) return (l_uint8 *)ERROR_PTR("&nread not defined", procName, NULL); *pnread = 0; if (!fp) return (l_uint8 *)ERROR_PTR("stream not defined", procName, NULL); /* Verify and adjust the parameters if necessary */ fseek(fp, 0, SEEK_END); /* EOF */ filebytes = ftell(fp); fseek(fp, 0, SEEK_SET); if (start > filebytes) { L_ERROR("start = %zu but filebytes = %zu\n", procName, start, filebytes); return NULL; } if (filebytes == 0) /* start == 0; nothing to read; return null byte */ return (l_uint8 *)LEPT_CALLOC(1, 1); bytesleft = filebytes - start; /* greater than 0 */ if (nbytes == 0) nbytes = bytesleft; bytestoread = (bytesleft >= nbytes) ? nbytes : bytesleft; /* Read the data */ if ((data = (l_uint8 *)LEPT_CALLOC(1, bytestoread + 1)) == NULL) return (l_uint8 *)ERROR_PTR("calloc fail for data", procName, NULL); fseek(fp, start, SEEK_SET); nread = fread(data, 1, bytestoread, fp); if (nbytes != nread) L_INFO("%zu bytes requested; %zu bytes read\n", procName, nbytes, nread); *pnread = nread; fseek(fp, 0, SEEK_SET); return data; } /*! * \brief l_binaryWrite() * * \param[in] filename output file * \param[in] operation "w" for write; "a" for append * \param[in] data binary data to be written * \param[in] nbytes size of data array * \return 0 if OK; 1 on error */ l_ok l_binaryWrite(const char *filename, const char *operation, const void *data, size_t nbytes) { char actualOperation[20]; FILE *fp; PROCNAME("l_binaryWrite"); if (!filename) return ERROR_INT("filename not defined", procName, 1); if (!operation) return ERROR_INT("operation not defined", procName, 1); if (!data) return ERROR_INT("data not defined", procName, 1); if (nbytes <= 0) return ERROR_INT("nbytes must be > 0", procName, 1); if (strcmp(operation, "w") && strcmp(operation, "a")) return ERROR_INT("operation not one of {'w','a'}", procName, 1); /* The 'b' flag to fopen() is ignored for all POSIX * conforming systems. However, Windows needs the 'b' flag. */ stringCopy(actualOperation, operation, 2); strncat(actualOperation, "b", 2); if ((fp = fopenWriteStream(filename, actualOperation)) == NULL) return ERROR_INT("stream not opened", procName, 1); fwrite(data, 1, nbytes, fp); fclose(fp); return 0; } /*! * \brief nbytesInFile() * * \param[in] filename * \return nbytes in file; 0 on error */ size_t nbytesInFile(const char *filename) { size_t nbytes; FILE *fp; PROCNAME("nbytesInFile"); if (!filename) return ERROR_INT("filename not defined", procName, 0); if ((fp = fopenReadStream(filename)) == NULL) return ERROR_INT("stream not opened", procName, 0); nbytes = fnbytesInFile(fp); fclose(fp); return nbytes; } /*! * \brief fnbytesInFile() * * \param[in] fp file stream * \return nbytes in file; 0 on error */ size_t fnbytesInFile(FILE *fp) { l_int64 pos, nbytes; PROCNAME("fnbytesInFile"); if (!fp) return ERROR_INT("stream not open", procName, 0); pos = ftell(fp); /* initial position */ if (pos < 0) return ERROR_INT("seek position must be > 0", procName, 0); fseek(fp, 0, SEEK_END); /* EOF */ nbytes = ftell(fp); fseek(fp, pos, SEEK_SET); /* back to initial position */ return nbytes; } /*--------------------------------------------------------------------* * Copy and compare in memory * *--------------------------------------------------------------------*/ /*! * \brief l_binaryCopy() * * \param[in] datas * \param[in] size of data array * \return datad on heap, or NULL on error * *
* Notes: * (1) We add 4 bytes to the zeroed output because in some cases * (e.g., string handling) it is important to have the data * be null terminated. This guarantees that after the memcpy, * the result is automatically null terminated. **/ l_uint8 * l_binaryCopy(const l_uint8 *datas, size_t size) { l_uint8 *datad; PROCNAME("l_binaryCopy"); if (!datas) return (l_uint8 *)ERROR_PTR("datas not defined", procName, NULL); if ((datad = (l_uint8 *)LEPT_CALLOC(size + 4, sizeof(l_uint8))) == NULL) return (l_uint8 *)ERROR_PTR("datad not made", procName, NULL); memcpy(datad, datas, size); return datad; } l_ok l_binaryCompare(const l_uint8 *data1, size_t size1, const l_uint8 *data2, size_t size2, l_int32 *psame) { l_int32 i; PROCNAME("l_binaryCompare"); if (!psame) return ERROR_INT("&same not defined", procName, 1); *psame = FALSE; if (!data1 || !data2) return ERROR_INT("data1 and data2 not both defined", procName, 1); if (size1 != size2) return 0; for (i = 0; i < size1; i++) { if (data1[i] != data2[i]) return 0; } *psame = TRUE; return 0; } /*--------------------------------------------------------------------* * File copy operations * *--------------------------------------------------------------------*/ /*! * \brief fileCopy() * * \param[in] srcfile copy from this file * \param[in] newfile copy to this file * \return 0 if OK, 1 on error */ l_ok fileCopy(const char *srcfile, const char *newfile) { l_int32 ret; size_t nbytes; l_uint8 *data; PROCNAME("fileCopy"); if (!srcfile) return ERROR_INT("srcfile not defined", procName, 1); if (!newfile) return ERROR_INT("newfile not defined", procName, 1); if ((data = l_binaryRead(srcfile, &nbytes)) == NULL) return ERROR_INT("data not returned", procName, 1); ret = l_binaryWrite(newfile, "w", data, nbytes); LEPT_FREE(data); return ret; } /*! * \brief fileConcatenate() * * \param[in] srcfile append data from this file * \param[in] destfile add data to this file * \return 0 if OK, 1 on error */ l_ok fileConcatenate(const char *srcfile, const char *destfile) { size_t nbytes; l_uint8 *data; PROCNAME("fileConcatenate"); if (!srcfile) return ERROR_INT("srcfile not defined", procName, 1); if (!destfile) return ERROR_INT("destfile not defined", procName, 1); data = l_binaryRead(srcfile, &nbytes); l_binaryWrite(destfile, "a", data, nbytes); LEPT_FREE(data); return 0; } /*! * \brief fileAppendString() * * \param[in] filename * \param[in] str string to append to file * \return 0 if OK, 1 on error */ l_ok fileAppendString(const char *filename, const char *str) { FILE *fp; PROCNAME("fileAppendString"); if (!filename) return ERROR_INT("filename not defined", procName, 1); if (!str) return ERROR_INT("str not defined", procName, 1); if ((fp = fopenWriteStream(filename, "a")) == NULL) return ERROR_INT("stream not opened", procName, 1); fprintf(fp, "%s", str); fclose(fp); return 0; } /*--------------------------------------------------------------------* * Multi-platform functions for opening file streams * *--------------------------------------------------------------------*/ /*! * \brief fopenReadStream() * * \param[in] filename * \return stream, or NULL on error * *
* Notes: * (1) This should be used whenever you want to run fopen() to * read from a stream. Never call fopen() directory. * (2) This handles the temp directory pathname conversion on windows: * /tmp ==> [Windows Temp directory] **/ FILE * fopenReadStream(const char *filename) { char *fname, *tail; FILE *fp; PROCNAME("fopenReadStream"); if (!filename) return (FILE *)ERROR_PTR("filename not defined", procName, NULL); /* Try input filename */ fname = genPathname(filename, NULL); fp = fopen(fname, "rb"); LEPT_FREE(fname); if (fp) return fp; /* Else, strip directory and try locally */ splitPathAtDirectory(filename, NULL, &tail); fp = fopen(tail, "rb"); LEPT_FREE(tail); if (!fp) return (FILE *)ERROR_PTR("file not found", procName, NULL); return fp; } /*! * \brief fopenWriteStream() * * \param[in] filename * \param[in] modestring * \return stream, or NULL on error * *
* Notes: * (1) This should be used whenever you want to run fopen() to * write or append to a stream. Never call fopen() directory. * (2) This handles the temp directory pathname conversion on windows: * /tmp ==> [Windows Temp directory] **/ FILE * fopenWriteStream(const char *filename, const char *modestring) { char *fname; FILE *fp; PROCNAME("fopenWriteStream"); if (!filename) return (FILE *)ERROR_PTR("filename not defined", procName, NULL); fname = genPathname(filename, NULL); fp = fopen(fname, modestring); LEPT_FREE(fname); if (!fp) return (FILE *)ERROR_PTR("stream not opened", procName, NULL); return fp; } /*! * \brief fopenReadFromMemory() * * \param[in] data, size * \return file stream, or NULL on error * *
* Notes: * (1) Work-around if fmemopen() not available. * (2) Windows tmpfile() writes into the root C:\ directory, which * requires admin privileges. This also works around that. **/ FILE * fopenReadFromMemory(const l_uint8 *data, size_t size) { FILE *fp; PROCNAME("fopenReadFromMemory"); if (!data) return (FILE *)ERROR_PTR("data not defined", procName, NULL); #if HAVE_FMEMOPEN if ((fp = fmemopen((void *)data, size, "rb")) == NULL) return (FILE *)ERROR_PTR("stream not opened", procName, NULL); #else /* write to tmp file */ L_INFO("work-around: writing to a temp file\n", procName); #ifdef _WIN32 if ((fp = fopenWriteWinTempfile()) == NULL) return (FILE *)ERROR_PTR("tmpfile stream not opened", procName, NULL); #else if ((fp = tmpfile()) == NULL) return (FILE *)ERROR_PTR("tmpfile stream not opened", procName, NULL); #endif /* _WIN32 */ fwrite(data, 1, size, fp); rewind(fp); #endif /* HAVE_FMEMOPEN */ return fp; } /*--------------------------------------------------------------------* * Opening a windows tmpfile for writing * *--------------------------------------------------------------------*/ /*! * \brief fopenWriteWinTempfile() * * \return file stream, or NULL on error * *
* Notes: * (1) The Windows version of tmpfile() writes into the root * C:\ directory, which requires admin privileges. This * function provides an alternative implementation. **/ FILE * fopenWriteWinTempfile() { #ifdef _WIN32 l_int32 handle; FILE *fp; char *filename; PROCNAME("fopenWriteWinTempfile"); if ((filename = l_makeTempFilename()) == NULL) { L_ERROR("l_makeTempFilename failed, %s\n", procName, strerror(errno)); return NULL; } handle = _open(filename, _O_CREAT | _O_RDWR | _O_SHORT_LIVED | _O_TEMPORARY | _O_BINARY, _S_IREAD | _S_IWRITE); lept_free(filename); if (handle == -1) { L_ERROR("_open failed, %s\n", procName, strerror(errno)); return NULL; } if ((fp = _fdopen(handle, "r+b")) == NULL) { L_ERROR("_fdopen failed, %s\n", procName, strerror(errno)); return NULL; } return fp; #else return NULL; #endif /* _WIN32 */ } /*--------------------------------------------------------------------* * Multi-platform functions that avoid C-runtime boundary * * crossing for applications with Windows DLLs * *--------------------------------------------------------------------*/ /* * Problems arise when pointers to streams and data are passed * between two Windows DLLs that have been generated with different * C runtimes. To avoid this, leptonica provides wrappers for * several C library calls. */ /*! * \brief lept_fopen() * * \param[in] filename * \param[in] mode same as for fopen(); e.g., "rb" * \return stream or NULL on error * *
* Notes: * (1) This must be used by any application that passes * a file handle to a leptonica Windows DLL. **/ FILE * lept_fopen(const char *filename, const char *mode) { PROCNAME("lept_fopen"); if (!filename) return (FILE *)ERROR_PTR("filename not defined", procName, NULL); if (!mode) return (FILE *)ERROR_PTR("mode not defined", procName, NULL); if (stringFindSubstr(mode, "r", NULL)) return fopenReadStream(filename); else return fopenWriteStream(filename, mode); } /*! * \brief lept_fclose() * * \param[in] fp file stream * \return 0 if OK, 1 on error * *
* Notes: * (1) This should be used by any application that accepts * a file handle generated by a leptonica Windows DLL. **/ l_ok lept_fclose(FILE *fp) { PROCNAME("lept_fclose"); if (!fp) return ERROR_INT("stream not defined", procName, 1); return fclose(fp); } /*! * \brief lept_calloc() * * \param[in] nmemb number of members * \param[in] size of each member * \return void ptr, or NULL on error * *
* Notes: * (1) For safety with windows DLLs, this can be used in conjunction * with lept_free() to avoid C-runtime boundary problems. * Just use these two functions throughout your application. **/ void * lept_calloc(size_t nmemb, size_t size) { if (nmemb <= 0 || size <= 0) return NULL; return LEPT_CALLOC(nmemb, size); } /*! * \brief lept_free() * * \param[in] ptr * *
* Notes: * (1) This should be used by any application that accepts * heap data allocated by a leptonica Windows DLL. **/ void lept_free(void *ptr) { if (!ptr) return; LEPT_FREE(ptr); return; } /*--------------------------------------------------------------------* * Multi-platform file system operations * * [ These only write to /tmp or its subdirectories ] * *--------------------------------------------------------------------*/ /*! * \brief lept_mkdir() * * \param[in] subdir of /tmp or its equivalent on Windows * \return 0 on success, non-zero on failure * *
* Notes: * (1) %subdir is a partial path that can consist of one or more * directories. * (2) This makes any subdirectories of /tmp that are required. * (3) The root temp directory is: * /tmp (unix) [default] * [Temp] (windows) **/ l_int32 lept_mkdir(const char *subdir) { char *dir, *tmpdir; l_int32 i, n; l_int32 ret = 0; SARRAY *sa; #ifdef _WIN32 l_uint32 attributes; #endif /* _WIN32 */ PROCNAME("lept_mkdir"); if (!LeptDebugOK) { L_INFO("making named temp subdirectory %s is disabled\n", procName, subdir); return 0; } if (!subdir) return ERROR_INT("subdir not defined", procName, 1); if ((strlen(subdir) == 0) || (subdir[0] == '.') || (subdir[0] == '/')) return ERROR_INT("subdir not an actual subdirectory", procName, 1); sa = sarrayCreate(0); sarraySplitString(sa, subdir, "/"); n = sarrayGetCount(sa); dir = genPathname("/tmp", NULL); /* Make sure the tmp directory exists */ #ifndef _WIN32 ret = mkdir(dir, 0777); #else attributes = GetFileAttributes(dir); if (attributes == INVALID_FILE_ATTRIBUTES) ret = (CreateDirectory(dir, NULL) ? 0 : 1); #endif /* Make all the subdirectories */ for (i = 0; i < n; i++) { tmpdir = pathJoin(dir, sarrayGetString(sa, i, L_NOCOPY)); #ifndef _WIN32 ret += mkdir(tmpdir, 0777); #else if (CreateDirectory(tmpdir, NULL) == 0) ret += (GetLastError () != ERROR_ALREADY_EXISTS); #endif LEPT_FREE(dir); dir = tmpdir; } LEPT_FREE(dir); sarrayDestroy(&sa); if (ret > 0) L_ERROR("failure to create %d directories\n", procName, ret); return ret; } /*! * \brief lept_rmdir() * * \param[in] subdir of /tmp or its equivalent on Windows * \return 0 on success, non-zero on failure * *
* Notes: * (1) %subdir is a partial path that can consist of one or more * directories. * (2) This removes all files from the specified subdirectory of * the root temp directory: * /tmp (unix) * [Temp] (windows) * and then removes the subdirectory. * (3) The combination * lept_rmdir(subdir); * lept_mkdir(subdir); * is guaranteed to give you an empty subdirectory. **/ l_int32 lept_rmdir(const char *subdir) { char *dir, *realdir, *fname, *fullname; l_int32 exists, ret, i, nfiles; SARRAY *sa; #ifdef _WIN32 char *newpath; #endif /* _WIN32 */ PROCNAME("lept_rmdir"); if (!subdir) return ERROR_INT("subdir not defined", procName, 1); if ((strlen(subdir) == 0) || (subdir[0] == '.') || (subdir[0] == '/')) return ERROR_INT("subdir not an actual subdirectory", procName, 1); /* Find the temp subdirectory */ dir = pathJoin("/tmp", subdir); if (!dir) return ERROR_INT("directory name not made", procName, 1); lept_direxists(dir, &exists); if (!exists) { /* fail silently */ LEPT_FREE(dir); return 0; } /* List all the files in that directory */ if ((sa = getFilenamesInDirectory(dir)) == NULL) { L_ERROR("directory %s does not exist!\n", procName, dir); LEPT_FREE(dir); return 1; } nfiles = sarrayGetCount(sa); for (i = 0; i < nfiles; i++) { fname = sarrayGetString(sa, i, L_NOCOPY); fullname = genPathname(dir, fname); remove(fullname); LEPT_FREE(fullname); } #ifndef _WIN32 realdir = genPathname("/tmp", subdir); ret = rmdir(realdir); LEPT_FREE(realdir); #else newpath = genPathname(dir, NULL); ret = (RemoveDirectory(newpath) ? 0 : 1); LEPT_FREE(newpath); #endif /* !_WIN32 */ sarrayDestroy(&sa); LEPT_FREE(dir); return ret; } /*! * \brief lept_direxists() * * \param[in] dir * \param[out] pexists 1 if it exists; 0 otherwise * \return void * *
* Notes: * (1) Always use unix pathname separators. * (2) By calling genPathname(), if the pathname begins with "/tmp" * this does an automatic directory translation on windows * to a path in the windows [Temp] directory: * "/tmp" ==> [Temp] (windows) **/ void lept_direxists(const char *dir, l_int32 *pexists) { char *realdir; if (!pexists) return; *pexists = 0; if (!dir) return; if ((realdir = genPathname(dir, NULL)) == NULL) return; #ifndef _WIN32 { struct stat s; l_int32 err = stat(realdir, &s); if (err != -1 && S_ISDIR(s.st_mode)) *pexists = 1; } #else /* _WIN32 */ l_uint32 attributes; attributes = GetFileAttributes(realdir); if (attributes != INVALID_FILE_ATTRIBUTES && (attributes & FILE_ATTRIBUTE_DIRECTORY)) { *pexists = 1; } #endif /* _WIN32 */ LEPT_FREE(realdir); return; } /*! * \brief lept_rm_match() * * \param[in] subdir [optional] if NULL, the removed files are in /tmp * \param[in] substr [optional] pattern to match in filename * \return 0 on success, non-zero on failure * *
* Notes: * (1) This removes the matched files in /tmp or a subdirectory of /tmp. * Use NULL for %subdir if the files are in /tmp. * (2) If %substr == NULL, this removes all files in the directory. * If %substr == "" (empty), this removes no files. * If both %subdir == NULL and %substr == NULL, this removes * all files in /tmp. * (3) Use unix pathname separators. * (4) By calling genPathname(), if the pathname begins with "/tmp" * this does an automatic directory translation on windows * to a path in the windows [Temp] directory: * "/tmp" ==> [Temp] (windows) * (5) Error conditions: * * returns -1 if the directory is not found * * returns the number of files (> 0) that it was unable to remove. **/ l_int32 lept_rm_match(const char *subdir, const char *substr) { char *path, *fname; char tempdir[256]; l_int32 i, n, ret; SARRAY *sa; PROCNAME("lept_rm_match"); makeTempDirname(tempdir, sizeof(tempdir), subdir); if ((sa = getSortedPathnamesInDirectory(tempdir, substr, 0, 0)) == NULL) return ERROR_INT("sa not made", procName, -1); n = sarrayGetCount(sa); if (n == 0) { L_WARNING("no matching files found\n", procName); sarrayDestroy(&sa); return 0; } ret = 0; for (i = 0; i < n; i++) { fname = sarrayGetString(sa, i, L_NOCOPY); path = genPathname(fname, NULL); if (lept_rmfile(path) != 0) { L_ERROR("failed to remove %s\n", procName, path); ret++; } LEPT_FREE(path); } sarrayDestroy(&sa); return ret; } /*! * \brief lept_rm() * * \param[in] subdir [optional] subdir of '/tmp'; can be NULL * \param[in] tail filename without the directory * \return 0 on success, non-zero on failure * *
* Notes: * (1) By calling genPathname(), this does an automatic directory * translation on windows to a path in the windows [Temp] directory: * "/tmp/..." ==> [Temp]/... (windows) **/ l_int32 lept_rm(const char *subdir, const char *tail) { char *path; char newtemp[256]; l_int32 ret; PROCNAME("lept_rm"); if (!tail || strlen(tail) == 0) return ERROR_INT("tail undefined or empty", procName, 1); if (makeTempDirname(newtemp, sizeof(newtemp), subdir)) return ERROR_INT("temp dirname not made", procName, 1); path = genPathname(newtemp, tail); ret = lept_rmfile(path); LEPT_FREE(path); return ret; } /*! * \brief * * lept_rmfile() * * \param[in] filepath full path to file including the directory * \return 0 on success, non-zero on failure * *
* Notes: * (1) This removes the named file. * (2) Use unix pathname separators. * (3) There is no name translation. * (4) Unlike the other lept_* functions in this section, this can remove * any file -- it is not restricted to files that are in /tmp or a * subdirectory of it. **/ l_int32 lept_rmfile(const char *filepath) { l_int32 ret; PROCNAME("lept_rmfile"); if (!filepath || strlen(filepath) == 0) return ERROR_INT("filepath undefined or empty", procName, 1); #ifndef _WIN32 ret = remove(filepath); #else /* Set attributes to allow deletion of read-only files */ SetFileAttributes(filepath, FILE_ATTRIBUTE_NORMAL); ret = DeleteFile(filepath) ? 0 : 1; #endif /* !_WIN32 */ return ret; } /*! * \brief lept_mv() * * \param[in] srcfile * \param[in] newdir [optional]; can be NULL * \param[in] newtail [optional]; can be NULL * \param[out] pnewpath [optional] of actual path; can be NULL * \return 0 on success, non-zero on failure * *
* Notes: * (1) This moves %srcfile to /tmp or to a subdirectory of /tmp. * (2) %srcfile can either be a full path or relative to the * current directory. * (3) %newdir can either specify an existing subdirectory of /tmp * or can be NULL. In the latter case, the file will be written * into /tmp. * (4) %newtail can either specify a filename tail or, if NULL, * the filename is taken from src-tail, the tail of %srcfile. * (5) For debugging, the computed newpath can be returned. It must * be freed by the caller. * (6) Reminders: * (a) specify files using unix pathnames * (b) for windows, translates * /tmp ==> [Temp] * where [Temp] is the windows temp directory * (7) Examples: * * newdir = NULL, newtail = NULL ==> /tmp/src-tail * * newdir = NULL, newtail = abc ==> /tmp/abc * * newdir = def/ghi, newtail = NULL ==> /tmp/def/ghi/src-tail * * newdir = def/ghi, newtail = abc ==> /tmp/def/ghi/abc **/ l_int32 lept_mv(const char *srcfile, const char *newdir, const char *newtail, char **pnewpath) { char *srcpath, *newpath, *dir, *srctail; char newtemp[256]; l_int32 ret; PROCNAME("lept_mv"); if (!srcfile) return ERROR_INT("srcfile not defined", procName, 1); /* Require output pathname to be in /tmp/ or a subdirectory */ if (makeTempDirname(newtemp, sizeof(newtemp), newdir) == 1) return ERROR_INT("newdir not NULL or a subdir of /tmp", procName, 1); /* Get canonical src pathname */ splitPathAtDirectory(srcfile, &dir, &srctail); #ifndef _WIN32 srcpath = pathJoin(dir, srctail); LEPT_FREE(dir); /* Generate output pathname */ if (!newtail || newtail[0] == '\0') newpath = pathJoin(newtemp, srctail); else newpath = pathJoin(newtemp, newtail); LEPT_FREE(srctail); /* Overwrite any existing file at 'newpath' */ ret = fileCopy(srcpath, newpath); if (!ret) { /* and remove srcfile */ char *realpath = genPathname(srcpath, NULL); remove(realpath); LEPT_FREE(realpath); } #else srcpath = genPathname(dir, srctail); LEPT_FREE(dir); /* Generate output pathname */ if (!newtail || newtail[0] == '\0') newpath = genPathname(newtemp, srctail); else newpath = genPathname(newtemp, newtail); LEPT_FREE(srctail); /* Overwrite any existing file at 'newpath' */ ret = MoveFileEx(srcpath, newpath, MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING) ? 0 : 1; #endif /* ! _WIN32 */ LEPT_FREE(srcpath); if (pnewpath) *pnewpath = newpath; else LEPT_FREE(newpath); return ret; } /*! * \brief lept_cp() * * \param[in] srcfile * \param[in] newdir [optional]; can be NULL * \param[in] newtail [optional]; can be NULL * \param[out] pnewpath [optional] of actual path; can be NULL * \return 0 on success, non-zero on failure * *
* Notes: * (1) This copies %srcfile to /tmp or to a subdirectory of /tmp. * (2) %srcfile can either be a full path or relative to the * current directory. * (3) %newdir can either specify an existing subdirectory of /tmp, * or can be NULL. In the latter case, the file will be written * into /tmp. * (4) %newtail can either specify a filename tail or, if NULL, * the filename is taken from src-tail, the tail of %srcfile. * (5) For debugging, the computed newpath can be returned. It must * be freed by the caller. * (6) Reminders: * (a) specify files using unix pathnames * (b) for windows, translates * /tmp ==> [Temp] * where [Temp] is the windows temp directory * (7) Examples: * * newdir = NULL, newtail = NULL ==> /tmp/src-tail * * newdir = NULL, newtail = abc ==> /tmp/abc * * newdir = def/ghi, newtail = NULL ==> /tmp/def/ghi/src-tail * * newdir = def/ghi, newtail = abc ==> /tmp/def/ghi/abc * **/ l_int32 lept_cp(const char *srcfile, const char *newdir, const char *newtail, char **pnewpath) { char *srcpath, *newpath, *dir, *srctail; char newtemp[256]; l_int32 ret; PROCNAME("lept_cp"); if (!srcfile) return ERROR_INT("srcfile not defined", procName, 1); /* Require output pathname to be in /tmp or a subdirectory */ if (makeTempDirname(newtemp, sizeof(newtemp), newdir) == 1) return ERROR_INT("newdir not NULL or a subdir of /tmp", procName, 1); /* Get canonical src pathname */ splitPathAtDirectory(srcfile, &dir, &srctail); #ifndef _WIN32 srcpath = pathJoin(dir, srctail); LEPT_FREE(dir); /* Generate output pathname */ if (!newtail || newtail[0] == '\0') newpath = pathJoin(newtemp, srctail); else newpath = pathJoin(newtemp, newtail); LEPT_FREE(srctail); /* Overwrite any existing file at 'newpath' */ ret = fileCopy(srcpath, newpath); #else srcpath = genPathname(dir, srctail); LEPT_FREE(dir); /* Generate output pathname */ if (!newtail || newtail[0] == '\0') newpath = genPathname(newtemp, srctail); else newpath = genPathname(newtemp, newtail); LEPT_FREE(srctail); /* Overwrite any existing file at 'newpath' */ ret = CopyFile(srcpath, newpath, FALSE) ? 0 : 1; #endif /* !_WIN32 */ LEPT_FREE(srcpath); if (pnewpath) *pnewpath = newpath; else LEPT_FREE(newpath); return ret; } /*--------------------------------------------------------------------* * Special debug/test function for calling 'system' * *--------------------------------------------------------------------*/ #if defined(__APPLE__) #include "TargetConditionals.h" #endif /* __APPLE__ */ /*! * \brief callSystemDebug() * * \param[in] cmd command to be exec'd * \return void * *
* Notes: * (1) The C library 'system' call is only made through this function. * It only works in debug/test mode, where the global variable * LeptDebugOK == TRUE. This variable is set to FALSE in the * library as distributed, and calling this function will * generate an error message. **/ void callSystemDebug(const char *cmd) { l_int32 ret; PROCNAME("callSystemDebug"); if (!cmd) { L_ERROR("cmd not defined\n", procName); return; } if (LeptDebugOK == FALSE) { L_INFO("'system' calls are disabled\n", procName); return; } #if defined(__APPLE__) /* iOS 11 does not support system() */ #if TARGET_OS_OSX /* Mac OS X */ ret = system(cmd); #elif TARGET_OS_IPHONE || defined(OS_IOS) /* iOS */ L_ERROR("iOS 11 does not support system()\n", procName); #endif /* TARGET_OS_OSX */ #else /* ! __APPLE__ */ ret = system(cmd); #endif /* __APPLE__ */ } /*--------------------------------------------------------------------* * General file name operations * *--------------------------------------------------------------------*/ /*! * \brief splitPathAtDirectory() * * \param[in] pathname full path; can be a directory * \param[out] pdir [optional] root directory name of * input path, including trailing '/' * \param[out] ptail [optional] path tail, which is either * the file name within the root directory or * the last sub-directory in the path * \return 0 if OK, 1 on error * *
* Notes: * (1) If you only want the tail, input null for the root directory ptr. * (2) If you only want the root directory name, input null for the * tail ptr. * (3) This function makes decisions based only on the lexical * structure of the input. Examples: * /usr/tmp/abc.d --> dir: /usr/tmp/ tail: abc.d * /usr/tmp/ --> dir: /usr/tmp/ tail: [empty string] * /usr/tmp --> dir: /usr/ tail: tmp * abc.d --> dir: [empty string] tail: abc.d * (4 Consider the first example above: /usr/tmp/abc.d. * Suppose you want the stem of the file, abc, without either * the directory or the extension. This can be extracted in two steps: * splitPathAtDirectory("usr/tmp/abc.d", NULL, &tail); * [sets tail: "abc.d"] * splitPathAtExtension(tail, &basename, NULL); * [sets basename: "abc"] * (5) The input can have either forward (unix) or backward (win) * slash separators. The output has unix separators. * Note that Win32 pathname functions generally accept both * slash forms, but the windows command line interpreter * only accepts backward slashes, because forward slashes are * used to demarcate switches (vs. dashes in unix). **/ l_ok splitPathAtDirectory(const char *pathname, char **pdir, char **ptail) { char *cpathname, *lastslash; PROCNAME("splitPathAtDirectory"); if (!pdir && !ptail) return ERROR_INT("null input for both strings", procName, 1); if (pdir) *pdir = NULL; if (ptail) *ptail = NULL; if (!pathname) return ERROR_INT("pathname not defined", procName, 1); cpathname = stringNew(pathname); convertSepCharsInPath(cpathname, UNIX_PATH_SEPCHAR); lastslash = strrchr(cpathname, '/'); if (lastslash) { if (ptail) *ptail = stringNew(lastslash + 1); if (pdir) { *(lastslash + 1) = '\0'; *pdir = cpathname; } else { LEPT_FREE(cpathname); } } else { /* no directory */ if (pdir) *pdir = stringNew(""); if (ptail) *ptail = cpathname; else LEPT_FREE(cpathname); } return 0; } /*! * \brief splitPathAtExtension() * * \param[in] pathname full path; can be a directory * \param[out] pbasename [optional] pathname not including the * last dot and characters after that * \param[out] pextension [optional] path extension, which is * the last dot and the characters after it. If * there is no extension, it returns the empty string * \return 0 if OK, 1 on error * *
* Notes: * (1) If you only want the extension, input null for the basename ptr. * (2) If you only want the basename without extension, input null * for the extension ptr. * (3) This function makes decisions based only on the lexical * structure of the input. Examples: * /usr/tmp/abc.jpg --> basename: /usr/tmp/abc ext: .jpg * /usr/tmp/.jpg --> basename: /usr/tmp/ ext: .jpg * /usr/tmp.jpg/ --> basename: /usr/tmp.jpg/ ext: [empty str] * ./.jpg --> basename: ./ ext: .jpg * (4) The input can have either forward (unix) or backward (win) * slash separators. The output has unix separators. * (5) Note that basename, as used here, is different from the result * of the unix program 'basename'. Here, basename is the entire * pathname up to a final extension and its preceding dot. **/ l_ok splitPathAtExtension(const char *pathname, char **pbasename, char **pextension) { char *tail, *dir, *lastdot; char empty[4] = ""; PROCNAME("splitPathExtension"); if (!pbasename && !pextension) return ERROR_INT("null input for both strings", procName, 1); if (pbasename) *pbasename = NULL; if (pextension) *pextension = NULL; if (!pathname) return ERROR_INT("pathname not defined", procName, 1); /* Split out the directory first */ splitPathAtDirectory(pathname, &dir, &tail); /* Then look for a "." in the tail part. * This way we ignore all "." in the directory. */ if ((lastdot = strrchr(tail, '.'))) { if (pextension) *pextension = stringNew(lastdot); if (pbasename) { *lastdot = '\0'; *pbasename = stringJoin(dir, tail); } } else { if (pextension) *pextension = stringNew(empty); if (pbasename) *pbasename = stringNew(pathname); } LEPT_FREE(dir); LEPT_FREE(tail); return 0; } /*! * \brief pathJoin() * * \param[in] dir [optional] can be null * \param[in] fname [optional] can be null * \return specially concatenated path, or NULL on error * *
* Notes: * (1) Use unix-style pathname separators ('/'). * (2) %fname can be the entire path, or part of the path containing * at least one directory, or a tail without a directory, or NULL. * (3) It produces a path that strips multiple slashes to a single * slash, joins %dir and %fname by a slash, and has no trailing * slashes (except in the cases where %dir == "/" and * %fname == NULL, or v.v.). * (4) If both %dir and %fname are null, produces an empty string. * (5) Neither %dir nor %fname can begin with '..'. * (6) The result is not canonicalized or tested for correctness: * garbage in (e.g., /&%), garbage out. * (7) Examples: * //tmp// + //abc/ --> /tmp/abc * tmp/ + /abc/ --> tmp/abc * tmp/ + abc/ --> tmp/abc * /tmp/ + /// --> /tmp * /tmp/ + NULL --> /tmp * // + /abc// --> /abc * // + NULL --> / * NULL + /abc/def/ --> /abc/def * NULL + abc// --> abc * NULL + // --> / * NULL + NULL --> (empty string) * "" + "" --> (empty string) * "" + / --> / * ".." + /etc/foo --> NULL * /tmp + ".." --> NULL **/ char * pathJoin(const char *dir, const char *fname) { const char *slash = "/"; char *str, *dest; l_int32 i, n1, n2, emptydir; size_t size; SARRAY *sa1, *sa2; L_BYTEA *ba; PROCNAME("pathJoin"); if (!dir && !fname) return stringNew(""); if (dir && strlen(dir) >= 2 && dir[0] == '.' && dir[1] == '.') return (char *)ERROR_PTR("dir starts with '..'", procName, NULL); if (fname && strlen(fname) >= 2 && fname[0] == '.' && fname[1] == '.') return (char *)ERROR_PTR("fname starts with '..'", procName, NULL); sa1 = sarrayCreate(0); sa2 = sarrayCreate(0); ba = l_byteaCreate(4); /* Process %dir */ if (dir && strlen(dir) > 0) { if (dir[0] == '/') l_byteaAppendString(ba, slash); sarraySplitString(sa1, dir, "/"); /* removes all slashes */ n1 = sarrayGetCount(sa1); for (i = 0; i < n1; i++) { str = sarrayGetString(sa1, i, L_NOCOPY); l_byteaAppendString(ba, str); l_byteaAppendString(ba, slash); } } /* Special case to add leading slash: dir NULL or empty string */ emptydir = dir && strlen(dir) == 0; if ((!dir || emptydir) && fname && strlen(fname) > 0 && fname[0] == '/') l_byteaAppendString(ba, slash); /* Process %fname */ if (fname && strlen(fname) > 0) { sarraySplitString(sa2, fname, "/"); n2 = sarrayGetCount(sa2); for (i = 0; i < n2; i++) { str = sarrayGetString(sa2, i, L_NOCOPY); l_byteaAppendString(ba, str); l_byteaAppendString(ba, slash); } } /* Remove trailing slash */ dest = (char *)l_byteaCopyData(ba, &size); if (size > 1 && dest[size - 1] == '/') dest[size - 1] = '\0'; sarrayDestroy(&sa1); sarrayDestroy(&sa2); l_byteaDestroy(&ba); return dest; } /*! * \brief appendSubdirs() * * \param[in] basedir * \param[in] subdirs * \return concatenated full directory path without trailing slash, * or NULL on error * *
* Notes: * (1) Use unix pathname separators * (2) Allocates a new string: [basedir]/[subdirs] **/ char * appendSubdirs(const char *basedir, const char *subdirs) { char *newdir; size_t len1, len2, len3, len4; PROCNAME("appendSubdirs"); if (!basedir || !subdirs) return (char *)ERROR_PTR("basedir and subdirs not both defined", procName, NULL); len1 = strlen(basedir); len2 = strlen(subdirs); len3 = len1 + len2 + 6; if ((newdir = (char *)LEPT_CALLOC(len3 + 1, 1)) == NULL) return (char *)ERROR_PTR("newdir not made", procName, NULL); strncat(newdir, basedir, len3); /* add basedir */ if (newdir[len1 - 1] != '/') /* add '/' if necessary */ newdir[len1] = '/'; if (subdirs[0] == '/') /* add subdirs, stripping leading '/' */ strncat(newdir, subdirs + 1, len3); else strncat(newdir, subdirs, len3); len4 = strlen(newdir); if (newdir[len4 - 1] == '/') /* strip trailing '/' */ newdir[len4 - 1] = '\0'; return newdir; } /*--------------------------------------------------------------------* * Special file name operations * *--------------------------------------------------------------------*/ /*! * \brief convertSepCharsInPath() * * \param[in] path * \param[in] type UNIX_PATH_SEPCHAR, WIN_PATH_SEPCHAR * \return 0 if OK, 1 on error * *
* Notes: * (1) In-place conversion. * (2) Type is the resulting type: * * UNIX_PATH_SEPCHAR: '\\' ==> '/' * * WIN_PATH_SEPCHAR: '/' ==> '\\' * (3) Virtually all path operations in leptonica use unix separators. **/ l_ok convertSepCharsInPath(char *path, l_int32 type) { l_int32 i; size_t len; PROCNAME("convertSepCharsInPath"); if (!path) return ERROR_INT("path not defined", procName, 1); if (type != UNIX_PATH_SEPCHAR && type != WIN_PATH_SEPCHAR) return ERROR_INT("invalid type", procName, 1); len = strlen(path); if (type == UNIX_PATH_SEPCHAR) { for (i = 0; i < len; i++) { if (path[i] == '\\') path[i] = '/'; } } else { /* WIN_PATH_SEPCHAR */ for (i = 0; i < len; i++) { if (path[i] == '/') path[i] = '\\'; } } return 0; } /*! * \brief genPathname() * * \param[in] dir [optional] directory or full path name, * with or without the trailing '/' * \param[in] fname [optional] file name within a directory * \return pathname either a directory or full path, or NULL on error * *
* Notes: * (1) This function generates actual paths in the following ways: * * from two sub-parts (e.g., a directory and a file name). * * from a single path full path, placed in %dir, with * %fname == NULL. * * from the name of a file in the local directory placed in * %fname, with %dir == NULL. * * if in a "/tmp" directory and on windows, the windows * temp directory is used. * (2) On windows, if the root of %dir is '/tmp', this does a name * translation: * "/tmp" ==> [Temp] (windows) * where [Temp] is the windows temp directory. * (3) On unix, the TMPDIR variable is ignored. No rewriting * of temp directories is permitted. * (4) There are four cases for the input: * (a) %dir is a directory and %fname is defined: result is a full path * (b) %dir is a directory and %fname is null: result is a directory * (c) %dir is a full path and %fname is null: result is a full path * (d) %dir is null or an empty string: start in the current dir; * result is a full path * (5) In all cases, the resulting pathname is not terminated with a slash * (6) The caller is responsible for freeing the returned pathname. **/ char * genPathname(const char *dir, const char *fname) { l_int32 is_win32 = FALSE; char *cdir, *pathout; l_int32 dirlen, namelen, size; PROCNAME("genPathname"); if (!dir && !fname) return (char *)ERROR_PTR("no input", procName, NULL); /* Handle the case where we start from the current directory */ if (!dir || dir[0] == '\0') { if ((cdir = getcwd(NULL, 0)) == NULL) return (char *)ERROR_PTR("no current dir found", procName, NULL); } else { cdir = stringNew(dir); } /* Convert to unix path separators, and remove the trailing * slash in the directory, except when dir == "/" */ convertSepCharsInPath(cdir, UNIX_PATH_SEPCHAR); dirlen = strlen(cdir); if (cdir[dirlen - 1] == '/' && dirlen != 1) { cdir[dirlen - 1] = '\0'; dirlen--; } namelen = (fname) ? strlen(fname) : 0; size = dirlen + namelen + 256; if ((pathout = (char *)LEPT_CALLOC(size, sizeof(char))) == NULL) { LEPT_FREE(cdir); return (char *)ERROR_PTR("pathout not made", procName, NULL); } #ifdef _WIN32 is_win32 = TRUE; #endif /* _WIN32 */ /* First handle %dir (which may be a full pathname). * There is no path rewriting on unix, and on win32, we do not * rewrite unless the specified directory is /tmp or * a subdirectory of /tmp */ if (!is_win32 || dirlen < 4 || (dirlen == 4 && strncmp(cdir, "/tmp", 4) != 0) || /* not in "/tmp" */ (dirlen > 4 && strncmp(cdir, "/tmp/", 5) != 0)) { /* not in "/tmp/" */ stringCopy(pathout, cdir, dirlen); } else { /* Rewrite for win32 with "/tmp" specified for the directory. */ #ifdef _WIN32 l_int32 tmpdirlen; char tmpdir[MAX_PATH]; GetTempPath(sizeof(tmpdir), tmpdir); /* get the windows temp dir */ tmpdirlen = strlen(tmpdir); if (tmpdirlen > 0 && tmpdir[tmpdirlen - 1] == '\\') { tmpdir[tmpdirlen - 1] = '\0'; /* trim the trailing '\' */ } tmpdirlen = strlen(tmpdir); stringCopy(pathout, tmpdir, tmpdirlen); /* Add the rest of cdir */ if (dirlen > 4) stringCat(pathout, size, cdir + 4); #endif /* _WIN32 */ } /* Now handle %fname */ if (fname && strlen(fname) > 0) { dirlen = strlen(pathout); pathout[dirlen] = '/'; strncat(pathout, fname, namelen); } LEPT_FREE(cdir); return pathout; } /*! * \brief makeTempDirname() * * \param[in] result preallocated on stack or heap and passed in * \param[in] nbytes size of %result array, in bytes * \param[in] subdir [optional]; can be NULL or an empty string * \return 0 if OK, 1 on error * *
* Notes: * (1) This generates the directory path for output temp files, * written into %result with unix separators. * (2) Caller allocates %result, large enough to hold the path, * which is: * /tmp/%subdir (unix) * [Temp]/%subdir (windows, mac, ios) * where [Temp] is a path determined * - on windows, mac: by GetTempPath() * - on ios: by confstr() (see man page) * and %subdir is in general a set of nested subdirectories: * dir1/dir2/.../dirN * which in use would not typically exceed 2 levels. * (3) Usage example: * \code * char result[256]; * makeTempDirname(result, sizeof(result), "lept/golden"); * \endcode **/ l_ok makeTempDirname(char *result, size_t nbytes, const char *subdir) { char *dir, *path; l_int32 ret = 0; size_t pathlen; PROCNAME("makeTempDirname"); if (!result) return ERROR_INT("result not defined", procName, 1); if (subdir && ((subdir[0] == '.') || (subdir[0] == '/'))) return ERROR_INT("subdir not an actual subdirectory", procName, 1); memset(result, 0, nbytes); #ifdef OS_IOS { size_t n = confstr(_CS_DARWIN_USER_TEMP_DIR, result, nbytes); if (n == 0) { L_ERROR("failed to find tmp dir, %s\n", procName, strerror(errno)); return 1; } else if (n > nbytes) { return ERROR_INT("result array too small for path\n", procName, 1); } dir = pathJoin(result, subdir); } #else dir = pathJoin("/tmp", subdir); #endif /* ~ OS_IOS */ #ifndef _WIN32 path = stringNew(dir); #else path = genPathname(dir, NULL); #endif /* ~ _WIN32 */ pathlen = strlen(path); if (pathlen < nbytes - 1) { strncpy(result, path, pathlen); } else { L_ERROR("result array too small for path\n", procName); ret = 1; } LEPT_FREE(dir); LEPT_FREE(path); return ret; } /*! * \brief modifyTrailingSlash() * * \param[in] path preallocated on stack or heap and passed in * \param[in] nbytes size of %path array, in bytes * \param[in] flag L_ADD_TRAIL_SLASH or L_REMOVE_TRAIL_SLASH * \return 0 if OK, 1 on error * *
* Notes: * (1) This carries out the requested action if necessary. **/ l_ok modifyTrailingSlash(char *path, size_t nbytes, l_int32 flag) { char lastchar; size_t len; PROCNAME("modifyTrailingSlash"); if (!path) return ERROR_INT("path not defined", procName, 1); if (flag != L_ADD_TRAIL_SLASH && flag != L_REMOVE_TRAIL_SLASH) return ERROR_INT("invalid flag", procName, 1); len = strlen(path); lastchar = path[len - 1]; if (flag == L_ADD_TRAIL_SLASH && lastchar != '/' && len < nbytes - 2) { path[len] = '/'; path[len + 1] = '\0'; } else if (flag == L_REMOVE_TRAIL_SLASH && lastchar == '/') { path[len - 1] = '\0'; } return 0; } /*! * \brief l_makeTempFilename() * * \return fname : heap allocated filename; returns NULL on failure. * *
* Notes: * (1) On unix, this makes a filename of the form * "/tmp/lept.XXXXXX", * where each X is a random character. * (2) On windows, this makes a filename of the form * "/[Temp]/lp.XXXXXX". * (3) On all systems, this fails if the file is not writable. * (4) Safest usage is to write to a subdirectory in debug code. * (5) The returned filename must be freed by the caller, using lept_free. * (6) The tail of the filename has a '.', so that cygwin interprets * the file as having an extension. Otherwise, cygwin assumes it * is an executable and appends ".exe" to the filename. * (7) On unix, whenever possible use tmpfile() instead. tmpfile() * hides the file name, returns a stream opened for write, * and deletes the temp file when the stream is closed. **/ char * l_makeTempFilename() { char dirname[240]; PROCNAME("l_makeTempFilename"); if (makeTempDirname(dirname, sizeof(dirname), NULL) == 1) return (char *)ERROR_PTR("failed to make dirname", procName, NULL); #ifndef _WIN32 { char *pattern; l_int32 fd; pattern = stringConcatNew(dirname, "/lept.XXXXXX", NULL); fd = mkstemp(pattern); if (fd == -1) { LEPT_FREE(pattern); return (char *)ERROR_PTR("mkstemp failed", procName, NULL); } close(fd); return pattern; } #else { char fname[MAX_PATH]; FILE *fp; if (GetTempFileName(dirname, "lp.", 0, fname) == 0) return (char *)ERROR_PTR("GetTempFileName failed", procName, NULL); if ((fp = fopen(fname, "wb")) == NULL) return (char *)ERROR_PTR("file cannot be written to", procName, NULL); fclose(fp); return stringNew(fname); } #endif /* ~ _WIN32 */ } /*! * \brief extractNumberFromFilename() * * \param[in] fname * \param[in] numpre number of characters before the digits to be found * \param[in] numpost number of characters after the digits to be found * \return num number embedded in the filename; -1 on error or if * not found * *
* Notes: * (1) The number is to be found in the basename, which is the * filename without either the directory or the last extension. * (2) When a number is found, it is non-negative. If no number * is found, this returns -1, without an error message. The * caller needs to check. **/ l_int32 extractNumberFromFilename(const char *fname, l_int32 numpre, l_int32 numpost) { char *tail, *basename; l_int32 len, nret, num; PROCNAME("extractNumberFromFilename"); if (!fname) return ERROR_INT("fname not defined", procName, -1); splitPathAtDirectory(fname, NULL, &tail); splitPathAtExtension(tail, &basename, NULL); LEPT_FREE(tail); len = strlen(basename); if (numpre + numpost > len - 1) { LEPT_FREE(basename); return ERROR_INT("numpre + numpost too big", procName, -1); } basename[len - numpost] = '\0'; nret = sscanf(basename + numpre, "%d", &num); LEPT_FREE(basename); if (nret == 1) return num; else return -1; /* not found */ }