Fixed potential infinite loop (until memory is exhausted) and added several utility functions for processing strings.

2019-12-06 19:39:08 -05:00 · 2019-12-06 19:39:08 -05:00 · 7496060d80
parent e6c14b3eb0
commit 7496060d80
3 changed files with 794 additions and 157 deletions
--- a/src/include/ngspice/ngspice.h
+++ b/src/include/ngspice/ngspice.h
@ -241,6 +241,7 @@ extern double x_atanh(double);
 #define HUGE HUGE_VAL
 #endif

+void findtok_noparen(char **p_str, char **p_token, char **p_token_end);
 extern char *gettok_noparens(char **s);
 extern char *gettok_node(char **s);
 extern char *gettok_iv(char **s);
--- a/src/include/ngspice/stringutil.h
+++ b/src/include/ngspice/stringutil.h
@ -6,37 +6,117 @@
 #ifndef ngspice_STRINGUTIL_H
 #define ngspice_STRINGUTIL_H

+#include <stdarg.h>
+#include <string.h>
+
 #include "ngspice/config.h"
 #include "ngspice/bool.h"

-#include <stdarg.h>
-
-
-int prefix(const char *p, const char *s);
-char * copy(const char *str);
-char * copy_substring(const char *str, const char *end);
-int substring(const char *sub, const char *str);
-void appendc(char *s, char c);
-int scannum(char *str);
-int cieq(const char *p, const char *s);
-int ciprefix(const char *p, const char *s);
-void strtolower(char *str);
-void strtoupper(char *str);
-char * stripWhiteSpacesInsideParens(const char *str);
-char * gettok(char **s);
-char * gettok_instance(char **);
-char * gettok_char(char **s, char p, bool inc_p, bool nested);
-int model_name_match(const char *token, const char *model_name);
-
-extern char *tvprintf(const char *fmt, va_list args);

 #ifdef __GNUC__
-extern char *tprintf(const char *fmt, ...) __attribute__ ((format (__printf__, 1, 2)));
+#define ATTR_TPRINTF    __attribute__ ((format (__printf__, 1, 2)))
 #else
-extern char *tprintf(const char *fmt, ...);
+#define ATTR_TPRINTF
 #endif


+/* Structure for storing state to find substring matches in a string */
+struct substring_match_info {
+    /* Input data */
+    size_t n_char_pattern; /* length of pattern being located */
+    const char *p_pattern; /* pattern to find */
+    size_t n_char_string; /* length of string to search */
+    const char *p_string; /* String to search. Final null not required */
+    bool f_overlap; /* flag that substring matches can overlap */
+
+    /* Intermediate results */
+    size_t n_char_pattern_1; /* length of pattern being located - 1 */
+    size_t msb_factor; /* constant related to updating hash */
+    size_t h_pattern; /* hash value of pattern */
+    size_t h_string; /* current hash value of string */
+    const char *p_last; /* last possible substring match location */
+    bool f_done; /* flag that last match was found */
+};
+
+void appendc(char *s, char c);
+int cieq(const char *p, const char *s);
+int cieqn(const char *p, const char *s, size_t n);
+int ciprefix(const char *p, const char *s);
+char *dup_string(const char *str, size_t n_char);
+char *find_first_of(const char *haystack,
+        unsigned int n_needle, const char *p_needle);
+int get_comma_separated_values(char *values[], char *str);
+int get_int_n(const char *str, size_t n, int *p_value);
+#ifdef COMPILE_UNUSED_FUNCTIONS
+size_t get_substring_matches(size_t n_char_pattern, const char *p_pattern,
+        size_t n_char_string, const char *p_string,
+        size_t n_elem_buf, char *p_match_buf, bool f_overlap);
+#endif
+char *gettok(char **s);
+char *gettok_char(char **s, char p, bool inc_p, bool nested);
+char *gettok_instance(char **);
+bool has_escape_or_quote(size_t n, const char *str);
+bool is_arith_char(char c);
+bool isquote(char ch);
+int model_name_match(const char *token, const char *model_name);
+int prefix(const char *p, const char *s);
+int prefix_n(size_t n_char_prefix, const char *prefix,
+        size_t n_char_string, const char *string);
+int scannum_adv(char **p_str);
+bool str_has_arith_char(char *s);
+char *stripWhiteSpacesInsideParens(const char *str);
+void strtolower(char *str);
+void strtoupper(char *str);
+void substring_match_init(size_t n_char_pattern, const char *p_pattern,
+        size_t n_char_string, const char *p_string, bool f_overlap,
+        struct substring_match_info *p_scan_state);
+char *substring_match_next(struct substring_match_info *p_scan_state);
+int substring_n(size_t n_char_pattern, const char *p_pattern,
+        size_t n_char_str, const char *p_str);
+char *tprintf(const char *fmt, ...) ATTR_TPRINTF;
+char *tvprintf(const char *fmt, va_list args);
+
+
+
+/* Allocate and create a copy of a string if the argument is not null or
+ * returns null if it is. */
+inline char *copy(const char *str)
+{
+    return str == (char *) NULL ?
+            (char *) NULL : dup_string(str, strlen(str));
+} /* end of function copy */
+
+
+
+/* Allocate a buffer and copy a substring, from 'str' to 'end'
+ *   including *str, excluding *end
+ */
+inline char *copy_substring(const char *str, const char *end)
+{
+    return dup_string(str, (size_t) (end - str));
+} /* end of function copy_substring */
+
+
+
+/* Like scannum but *p_str is advanced past the number */
+
+/* Try to identify an unsigned integer that begins a string. Stop when a
+ * non- numeric character is reached. There is no way to distinguish
+ * between a value of 0 and a string that does not contain a numeric
+ * value. */
+inline int scannum(const char *str)
+{
+    return scannum_adv((char **) &str);
+} /* end of function scannum */
+
+
+
+/* Determine whether sub is a substring of str. */
+inline int substring(const char *sub, const char *str)
+{
+    return strstr(sub, str) != (char *) NULL;
+} /* end of function substring */
+
 #ifdef CIDER
 /* cider integration */ 

@ -44,9 +124,5 @@ int cinprefix(register char *p, register char *s, register int n);
 int cimatch(register char *p, register char *s); 
 #endif

-bool isquote(char ch);
-bool is_arith_char(char c);
-bool str_has_arith_char(char *s);
-int get_comma_separated_values( char *values[], char *str );

-#endif
+#endif /* include guard */
--- a/src/misc/string.c
+++ b/src/misc/string.c
@ -5,97 +5,132 @@ Copyright 1990 Regents of the University of California.  All rights reserved.
 /*
 * String functions
 */
+#include <ctype.h>
+#include <stdarg.h>

 #include "ngspice/ngspice.h"
 #include "ngspice/stringutil.h"
 #include "ngspice/stringskip.h"
 #include "ngspice/dstring.h"

-#include <stdarg.h>
+
+/* Instantiations of string functions in case inlining is not performed */
+char *copy(const char *str);
+char *copy_substring(const char *str, const char *end);
+int scannum(const char *str);
+int substring(const char *sub, const char *str);


-int
-prefix(const char *p, const char *s)
+
+static size_t get_kr_msb_factor(size_t n);
+static size_t kr_hash(size_t n, const char *p);
+static inline const char *next_substr(
+        size_t n_char_pattern, const char *p_pattern,
+        const char **pp_string, const char * const p_last,
+        const size_t msb_factor, const size_t h_pattern, size_t *p_h_string);
+static bool can_overlap(size_t n_char_pattern, const char * const p_pattern);
+
+
+/* This function returns true if the string s begins with the
+ * string p and false otherwise. */
+int prefix(const char *p, const char *s)
 {
    while (*p && (*p == *s))
        p++, s++;

    return *p == '\0';
-}
+} /* end of function prefix */


-/* Create a copy of a string. */

-char *
-copy(const char *str)
+/* This function returns 1 if string begins with prefix and 0 otherwise.
+ * Neither the prefix nor string needs a null termination. */
+int prefix_n(size_t n_char_prefix, const char *prefix,
+        size_t n_char_string, const char *string)
 {
-    char *p;
+    /*Test that string is long enough */
+    if (n_char_prefix > n_char_string) {
+        return 0;
+    }

-    if (!str)
-        return NULL;
-
-    if ((p = TMALLOC(char, strlen(str) + 1)) != NULL)
-        (void) strcpy(p, str);
-    return p;
-}
+    return memcmp(prefix, string, n_char_prefix) == 0;
+} /* end of function prefix_n */


-/* copy a substring, from 'str' to 'end'
- *   including *str, excluding *end
+
+
+/* This function allocates a buffer and copies the specified number of
+ * characters from the input string into the buffer followed by a
+ * terminating null.
+ *
+ * Paramters
+ * str: String to copy
+ * n_char: Number of characters to copy
+ *
+ * Return values
+ * NULL: Allocation failure
+ * otherwise: The initialized string.
 */
-char *
-copy_substring(const char *str, const char *end)
+char *dup_string(const char *str, size_t n_char)
 {
-    size_t n = (size_t) (end - str);
    char *p;

-    if ((p = TMALLOC(char, n + 1)) != NULL) {
-        (void) strncpy(p, str, n);
-        p[n] = '\0';
+    if ((p = TMALLOC(char, n_char + 1)) != NULL) {
+        (void) strncpy(p, str, n_char);
+        p[n_char] = '\0';
    }
    return p;
-}
+} /* end of function dup_string */


-char *
-tvprintf(const char *fmt, va_list args)
+
+char *tvprintf(const char *fmt, va_list args)
 {
    char buf[1024];
    char *p = buf;
    int size = sizeof(buf);
+    int nchars;

    for (;;) {

-        int nchars;
        va_list ap;

        va_copy(ap, args);
        nchars = vsnprintf(p, (size_t) size, fmt, ap);
        va_end(ap);

-        if (nchars == -1) {     // compatibility to old implementations
-            size *= 2;
+        /* This case was previously handled by doubling the size of
+         * the buffer for "compatibility to old implementations."
+         * However, vsnprintf is defined in both C99 and SUSv2 from 1997.
+         * There is a slight difference which does not affect this
+         * usage, but both return negative values (possibly -1) on an
+         * encoding error, which would lead to an infinte loop (until
+         * memory was exhausted) with the old behavior */
+        if (nchars < 0) {
+            controlled_exit(-1);
        }
-        else if (nchars >= size) {
-            /* Output was truncated. Returned value is the number of chars
-             * that would have been written if the buffer were large enough
-             * excluding the terminiating null. */
-            size = nchars + 1; /* min required allocation size */
-        }
-        else { /* String formatted OK */
+
+        if (nchars < size) { /* String formatted OK */
            break;
        }

+        /* Output was truncated. Returned value is the number of chars
+         * that would have been written if the buffer were large enough
+         * excluding the terminiating null. */
+        size = nchars + 1; /* min required allocation size */
+
        /* Allocate a larger buffer */
-        if (p == buf)
+        if (p == buf) {
            p = TMALLOC(char, size);
-        else
+        }
+        else {
            p = TREALLOC(char, p, size);
+        }
    }

    /* Return the formatted string, making a copy on the heap if the
     * stack's buffer (buf) contains the string */
-    return (p == buf) ? copy(p) : p;
+    return (p == buf) ? dup_string(p, (size_t) nchars) : p;
 } /* end of function tvprintf */


@ -103,8 +138,7 @@ tvprintf(const char *fmt, va_list args)
 /* This function returns an allocation containing the string formatted
 * according to fmt and the variadic argument list provided. It is a wrapper
 * around tvprintf() which processes the argumens as a va_list. */
-char *
-tprintf(const char *fmt, ...)
+char *tprintf(const char *fmt, ...)
 {
    char *rv;
    va_list ap;
@ -117,103 +151,173 @@ tprintf(const char *fmt, ...)
 } /* end of function tprintf */


-/* Determine whether sub is a substring of str. */
-/* Like strstr( ) XXX */
-
-int
-substring(const char *sub, const char *str)
-{
-    for (; *str; str++)
-        if (*str == *sub) {
-            const char *s = sub, *t = str;
-            for (; *s; s++, t++)
-                if (!*t || (*s != *t))
-                    break;
-            if (*s == '\0')
-                return TRUE;
-        }
-
-    return FALSE;
-}
-
-
 /* Append one character to a string. Don't check for overflow. */
 /* Almost like strcat( ) XXX */
-
-void
-appendc(char *s, char c)
+void appendc(char *s, char c)
 {
-    while (*s)
+    while (*s) {
        s++;
+    }
    *s++ = c;
    *s = '\0';
-}
+} /* end of function appendc */


-/* Try to identify an integer that begins a string. Stop when a non-
- * numeric character is reached.
- */
-/* Like atoi( ) XXX */

-int
-scannum(char *str)
+/* Returns the unsigned number at *p_str or 0 if there is none. *p_str
+ * points to the first character after the number that was read, so
+ * it is possible to distingish between the value 0 and a missing number
+ * by testing if the string has been advanced. */
+int scannum_adv(char **p_str)
 {
+    const char *str = *p_str;
    int i = 0;

-    while (isdigit_c(*str))
+    while (isdigit_c(*str)) {
        i = i * 10 + *(str++) - '0';
+    }

+    *p_str = (char *) str; /* locate end of number */
    return i;
-}
+} /* end of function scannum_adv */
+
+
+
+/* This function returns the integer at the current string location.
+ * The string does not need to be null-terminated.
+ *
+ * Parameters
+ * str: String containing the integer to return at the beginning
+ * n: Number of characters in the string
+ * p_value: Address where the integer is returned
+ *
+ * Return values
+ * -1: No integer present
+ * -2: Overflow
+ * >0: Number of characters in the integer
+ */
+int get_int_n(const char *str, size_t n, int *p_value)
+{
+    if (n == 0) { /* no string */
+        return -1;
+    }
+
+    unsigned int value = 0;
+    const char *p_cur = str;
+    const char * const p_end = str + n;
+    bool f_neg;
+    if (*p_cur == '-') { /* Check for leading negative sign */
+        f_neg = 1;
+        ++p_cur;
+    }
+    else {
+        f_neg = 0;
+    }
+   
+    /* Iterate over chars until end or char that is not numeric */ 
+    for ( ; p_cur != p_end; ++p_cur) {
+        char ch_cur = *str;
+        if (!isdigit(ch_cur)) { /* Test for exit due to non-numeric char */
+            break;
+        }
+        
+        /* Compute new value and check for overflow. */
+        const unsigned int value_new = 10 * value + (ch_cur - '0');
+        if (value_new < value) {
+            return -2;
+        }
+        value = value_new;
+    } /* end of loop over digits */
+
+    /* Test for at least one digit */
+    if (p_cur == str + f_neg) {
+        return -1; /* no digit */
+    }
+
+    /* Test for overflow.
+     * If negative, can be 1 greater (-2**n vs 2**n -1) */
+    if (value - f_neg > INT_MAX) {
+        return -2;
+    }
+
+    /* Take negative if negative sign present. (This operation works
+     * correctly if value == INT_MIN since -INT_MIN == INT_MIN */
+    *p_value = f_neg ? -(int) value : (int) value;
+
+    return (int) (p_cur - str); /* number of chars in the number */
+} /* end of function get_int_n */
+


 /* Case insensitive str eq. */
 /* Like strcasecmp( ) XXX */
-
-int
-cieq(const char *p, const char *s)
+int cieq(const char *p, const char *s)
 {
-    for (; *p; p++, s++)
-        if (tolower_c(*p) != tolower_c(*s))
+    for (; *p; p++, s++) {
+        if (tolower_c(*p) != tolower_c(*s)) {
            return FALSE;
+        }
+    }

    return *s == '\0';
-}
+} /* end of function cieq */
+
+
+
+/* Case-insensitive string compare fore equialty with explicit length
+ * given. Neither character array needs to be null terminated. By not
+ * including the trailing null in the count, it can be used to check
+ * for a prefix. This function is useful for avoiding string copies
+ * to temporary buffers and the potential for buffer overruns that
+ * can occur when using temporary buffers without checking lengths. */
+int cieqn(const char *p, const char *s, size_t n)
+{
+    size_t i;
+    for (i = 0; i < n; ++i) {
+        if (tolower_c(p[i]) != tolower_c(s[i])) {
+            return FALSE;
+        }
+    }
+    return TRUE; /* all chars matched */
+} /* end of function cineq */


 /* Case insensitive prefix. */
-
-int
-ciprefix(const char *p, const char *s)
+int ciprefix(const char *p, const char *s)
 {
    for (; *p; p++, s++)
-        if (tolower_c(*p) != tolower_c(*s))
+        if (tolower_c(*p) != tolower_c(*s)) {
            return FALSE;
+        }

    return TRUE;
-}
+} /* end of function ciprefix */


-void
-strtolower(char *str)
+
+void strtolower(char *str)
 {
-    if (!str)
+    if (!str) {
        return;
+    }

-    for (; *str; str++)
+    for (; *str; str++) {
        *str = tolower_c(*str);
-}
+    }
+} /* end of function strtolower */


-void
-strtoupper(char *str)
+
+void strtoupper(char *str)
 {
-    if (!str)
+    if (!str) {
        return;
+    }

-    for (; *str; str++)
+    for (; *str; str++) {
        *str = toupper_c(*str);
-}
+    }
+} /* end of function strtoupper */


 #ifdef CIDER
@ -230,18 +334,21 @@ strtoupper(char *str)
 * first n characters are the same
 */

-int
-cinprefix(char *p, char *s, int n)
+int cinprefix(char *p, char *s, int n)
 {
-    if (!p || !s)
+    if (!p || !s) {
        return 0;
+    }

-    for (; *p; p++, s++, n--)
-        if (tolower_c(*p) != tolower_c(*s))
+    for (; *p; p++, s++, n--) {
+        if (tolower_c(*p) != tolower_c(*s)) {
            return 0;
+        }
+    }

    return n <= 0;
-}
+} /* end of function cinprefix */
+


 /*
@ -410,6 +517,41 @@ gettok_iv(char **s)



+/* findtok_noparen() does the string scanning for gettok_noparens() but
+ * does not allocate a token. Hence it is useful when a copy of the token
+ * is not required */
+void findtok_noparen(char **p_str, char **p_token, char **p_token_end)
+{
+    char *str = *p_str;
+
+    str = skip_ws(str);
+
+    if (!*str) {
+        *p_str = str;
+        *p_token = (char *) NULL;
+        return;
+    }
+
+    *p_token = str; /* Token starts after whitespace */
+    {
+        char c;
+        while ((c = *str) != '\0' &&
+               !isspace_c(c) &&
+               (c != '(') &&
+               (c != ')') &&
+               (c != ',')
+            ) {
+            str++;
+        }
+    }
+    *p_token_end = str;
+
+    str = skip_ws(str);
+    *p_str = str;
+} /* end of function findtok_noparen */
+
+
+
 /*-------------------------------------------------------------------------*
 * gettok_noparens was added by SDB on 4.21.2003.
 * It acts like gettok, except that it treats parens and commas like
@ -417,33 +559,17 @@ gettok_iv(char **s)
 * parsing and returns when it finds one of those chars.  It is called from
 * 'translate' (subckt.c).
 *-------------------------------------------------------------------------*/
-
-char *
-gettok_noparens(char **s)
+char *gettok_noparens(char **s)
 {
-    char c;
-    const char *token, *token_e;
-
-    *s = skip_ws(*s);
-
-    if (!**s)
-        return NULL;  /* return NULL if we come to end of line */
-
-    token = *s;
-    while ((c = **s) != '\0' &&
-           !isspace_c(c) &&
-           (**s != '(') &&
-           (**s != ')') &&
-           (**s != ',')
-        ) {
-        (*s)++;
+    char *token, *token_e;
+    findtok_noparen(s, &token, &token_e);
+    if (token == (char *) NULL) {
+        return (char *) NULL; /* return NULL if we come to end of line */
    }
-    token_e = *s;
-
-    *s = skip_ws(*s);

    return copy_substring(token, token_e);
-}
+} /* end of function gettok_noparens */
+

 /*-------------------------------------------------------------------------*
 * gettok_model acts like gettok_noparens, however when it encounters a '{', 
@ -746,8 +872,8 @@ str_has_arith_char(char *s)
 }


-int
-get_comma_separated_values(char *values[], char *str) {
+int get_comma_separated_values(char *values[], char *str)
+{
    int count = 0;
    char *comma_ptr;

@ -769,9 +895,7 @@ get_comma_separated_values(char *values[], char *str) {
  modulo a trailing model binning extension '\.[0-9]+'
  then return 2
 */
-
-int
-model_name_match(const char *token, const char *model_name)
+int model_name_match(const char *token, const char *model_name)
 {
    const char *p;
    size_t token_len = strlen(token);
@ -799,7 +923,443 @@ model_name_match(const char *token, const char *model_name)
            return 0;

    return 2;
-}
+} /* end of funtion model_name_match */



+/* This function returns 1 if pattern is a substring anywhere in str and
+ * 0 otherwise. A null pattern is considered a mismatch.
+ *
+ * Uses Karp-Rabin substring matching with base=256 and modulus=1009
+ */
+int substring_n(size_t n_char_pattern, const char *p_pattern,
+        size_t n_char_string, const char *p_string)
+{
+    /* Test for a pattern to match */
+    if (n_char_pattern == 0) {
+        return 0;
+    }
+
+    /* Test for a string of sufficient length */
+    if (n_char_pattern > n_char_string) {
+        return 0;
+    }
+
+    /* Factor for rolling hash computation */
+    const size_t msb_factor = get_kr_msb_factor(n_char_pattern);
+
+    const size_t h_pattern = kr_hash(n_char_pattern, p_pattern);
+    size_t h_string = kr_hash(n_char_pattern, p_string);
+
+    /* Compare at beginning. If hashes match, do full compare */
+    if (h_pattern == h_string &&
+            memcmp(p_pattern, p_string, n_char_pattern) == 0) {
+        return 1; /* match at start */
+    }
+
+    /* Compare at each possible starting point in the string */
+    const char *p_last = p_string + (n_char_string - n_char_pattern - 1);
+
+    return next_substr(n_char_pattern, p_pattern, &p_string, p_last,
+            msb_factor, h_pattern, &h_string) == (char *) NULL ?
+            0 : 1;
+} /* end of function substring_n */
+
+
+
+/* This function initializes a scan for substring matches */
+void substring_match_init(size_t n_char_pattern, const char *p_pattern,
+        size_t n_char_string, const char *p_string, bool f_overlap,
+        struct substring_match_info *p_scan_state)
+{
+    /* Save input info into structure. Note that the strings are not
+     * copied, so they must remain allocated and unaltered while the
+     * search is in progress. */
+    p_scan_state->n_char_pattern = n_char_pattern;
+    p_scan_state->p_pattern = p_pattern;
+    p_scan_state->n_char_string = n_char_string;
+    p_scan_state->p_string = p_string;
+
+    /*** Calculate intermediate data ***/
+
+    /* Test for a pattern to match */
+    if (n_char_pattern == 0) {
+        p_scan_state->f_done = TRUE;
+    }
+    /* Test for a string of sufficient length */
+    else if (n_char_pattern > n_char_string) {
+        p_scan_state->f_done = TRUE;
+    }
+    else {
+        p_scan_state->f_done = FALSE;
+
+        /* Look for overlaps only if possible */
+        p_scan_state->f_overlap= f_overlap ?
+                !can_overlap(n_char_pattern, p_pattern) : FALSE;
+        p_scan_state->n_char_pattern_1 = n_char_pattern - 1;
+        p_scan_state->msb_factor = get_kr_msb_factor(n_char_pattern);
+        p_scan_state->h_pattern = kr_hash(n_char_pattern, p_pattern);
+        p_scan_state->h_string = kr_hash(n_char_pattern, p_string);
+        p_scan_state->p_last =
+                p_string + (n_char_string - n_char_pattern - 1);
+    }
+
+    return;
+} /* end of function substring_match_init */
+
+
+
+/* This function finds the next substring match
+ *
+ * Parameter
+ * p_scan_state: Address of struct substring_match_info initialized by
+ *      substring_match_init()
+ *
+ * Return value
+ * NULL if there is no match or the address of the next match otherwise
+ */
+char *substring_match_next(struct substring_match_info *p_scan_state)
+{
+    /* First test if there are no more possible matches */
+    if (p_scan_state->f_done) {
+        return (char *) NULL;
+    }
+
+    /* Find next match, if any */
+    const char * const p_match = next_substr(
+            p_scan_state->n_char_pattern, p_scan_state->p_pattern,
+            &p_scan_state->p_string, p_scan_state->p_last,
+            p_scan_state->msb_factor,p_scan_state->h_pattern,
+            &p_scan_state->h_string);
+
+    /* Update done status if changed */
+    if (p_match == (char *) NULL) {
+        p_scan_state->f_done = TRUE;
+    }
+    else {
+        if (!p_scan_state->f_overlap) {
+            p_scan_state->p_string +=
+                    p_scan_state->n_char_pattern_1; /* end of match */
+            p_scan_state->h_string = p_scan_state->h_pattern;
+        }
+    }
+
+    return (char *) p_match; /* Return result */
+} /* end of function substring_match_next */
+
+
+
+#ifdef COMPILE_UNUSED_FUNCTIONS
+/* This funtion returns the locations of optionally non-overlapping substring
+ * matches. For example, in the string aaaaa, aa is found in non-overlapping
+ * locations at 0-based offsets 0 and 2 ahd with overlapping allowed atr
+ * offsets 0, 1, 2, and 3 */
+size_t get_substring_matches(size_t n_char_pattern, const char *p_pattern,
+        size_t n_char_string, const char *p_string,
+        size_t n_elem_buf, char *p_match_buf, bool f_overlap)
+{
+    /* Test for a pattern to match */
+    if (n_char_pattern == 0) {
+        return 0;
+    }
+
+    /* Test for a string of sufficient length */
+    if (n_char_pattern > n_char_string) {
+        return 0;
+    }
+
+    /* Handle 0-sized buffer */
+    if (n_elem_buf == 0) {
+        return 0;
+    }
+
+    /* Factor for rolling hash computation */
+    const size_t msb_factor = get_kr_msb_factor(n_char_pattern);
+
+    const size_t h_pattern = kr_hash(n_char_pattern, p_pattern);
+    size_t h_string = kr_hash(n_char_pattern, p_string);
+
+    /* Compare at beginning. If hashes match, do full compare */
+    if (h_pattern == h_string &&
+            memcmp(p_pattern, p_string, n_char_pattern) == 0) {
+        return 1; /* match at start */
+    }
+
+    /* Compare at each possible starting point in the string */
+    const char *p_last = p_string + (n_char_string - n_char_pattern - 1);
+    const size_t n_char_pattern_1 = n_char_pattern - 1;
+    char **pp_match_buf_cur = &p_match_buf;
+    char * const * const pp_match_buf_end = pp_match_buf_cur + n_elem_buf;
+
+    /* Look for overlaps only if possible */
+    f_overlap = f_overlap ? !can_overlap(n_char_pattern, p_pattern) : FALSE;
+
+    for ( ; pp_match_buf_cur < pp_match_buf_end; pp_match_buf_cur++) {
+        const char *p_match = next_substr(n_char_pattern, p_pattern,
+                &p_string, p_last, msb_factor, h_pattern, &h_string);
+        if (p_match == (char *) NULL) { /* if no match, done */
+            return (int) (pp_match_buf_cur - &p_match_buf);
+        }
+
+        /* Save result */
+        *pp_match_buf_cur = (char *) p_match;
+
+        /* If overlapping is not allowed, contniue search after the match.
+         * Note that in this case, the string hash is the pattern hash. */
+        if (!f_overlap) {
+            p_string += n_char_pattern_1; /* end of match */
+            h_string = h_pattern;
+        }
+    } /* end of loop over string */
+
+    return n_elem_buf; /* full buffer */
+} /* end of funtion get_substring_matches */
+#endif /* COMPILE_UNUSED_FUNCTIONS */
+
+
+
+/* This function determines if a pattern can allow overlapping matches.
+ * For example, the pattern "starts" would have overlapped matches in the
+ * string "startstarts".
+ *
+ * Remarks
+ * While not directly related to this function, there is only a binary yes/no
+ * interest regarding overlap rather than an offset into the the string where
+ * such overlap may occur. That is because the hash value is being computed
+ * incremetally, so the only time when there is substantial computational
+ * savings in this approach is when the hash value is known, as it would be
+ * at the end of a match (since the hash of the pattern is knonw.)
+ */
+static bool can_overlap(size_t n_char_pattern, const char * const p_pattern)
+{
+    if (n_char_pattern < 2) { /* does not matter */
+        return TRUE;
+    }
+
+    /* Find the last occurrance of the first character */
+    const char * const p_end = p_pattern + n_char_pattern;
+    const char *p_cur = p_end - 1;
+    const char ch_first = *p_pattern;
+    for ( ; p_cur > p_pattern; --p_cur) {
+        if (*p_cur == ch_first) {
+            break;
+        }
+    } /* end of loop finding the first char */
+
+    /* Test for no duplicate */
+    if (p_cur == p_pattern) { /* not found */
+        return FALSE; /* no duplicate so cannot overlap */
+    }
+
+    /* Now must match from this char onward to overlap */
+    const char *p_src = p_pattern;
+    for ( ; p_cur != p_end; ++p_cur, ++p_src) {
+        if (*p_cur != *p_src) { /* comparing 'b' to 'd' in "abcad"
+                                 * for example */
+            return FALSE; /* Mismatch, so not an overlap */
+        }
+    } /* end of loop finding the first char */
+
+    return TRUE; /* Matched to end of word */
+} /* end of function can_overlap */
+
+
+
+/* Prime number of Karp-Rabin hashing. Tradeoff between number of hash
+ * collisions and number of times modulus must be taken. */
+#define KR_MODULUS 1009
+/* Compute (256^(n-1))%KR_MODULUS */
+static size_t get_kr_msb_factor(size_t n)
+{
+    size_t i;
+    size_t factor = 1;
+    const size_t n_itr = n - 1;
+    for (i = 0; i < n_itr; ++i) {
+        size_t factor_new = (factor << 8);
+        if (factor_new < factor) { /* overflow */
+            factor %= KR_MODULUS; /* take modulus */
+            factor <<= 8; /* and recompute */
+        }
+    } /* end of loop building factor */
+
+    /* Return the factor after final modulus if necessary */
+    if (factor >= KR_MODULUS) {
+        factor %= KR_MODULUS;
+    }
+    return factor;
+} /* end of function get_kr_msb_factor */
+
+
+
+/* Compute KR hash assuming n >= 1 */
+static size_t kr_hash(size_t n, const char *p)
+{
+    const char * const p_end = p + n;
+    size_t hash = *(unsigned char *) p;
+    for (p++; p < p_end; p++) {
+        unsigned char ch = *(unsigned char *) p;
+        size_t hash_new = (hash << 8) + ch;
+        if (hash_new < hash) { /* overflow */
+            hash %= KR_MODULUS; /* take modulus */
+            hash = (hash << 8) + ch; /* and recompute */
+        }
+        else { /* no overflow, so no need for modulus yet */
+            hash = hash_new;
+        }
+    } /* end of loop hasing chars */
+
+    /* Do final modulus if necessary */
+    if (hash >= KR_MODULUS) {
+        hash %= KR_MODULUS;
+    }
+
+    return hash;
+} /* end of function kr_hash */
+
+
+
+/* This function locates the next substring match. It is intended to be called
+ * as part of the scanning of a string for a substring
+ *
+ * Parameters
+ * n_char_pattern: Length of pattern to find
+ * p_pattern: Pattern to find. Need not be null-terminated
+ * pp_string: Address containing the current location in the string. Updated
+ *      if a match is found.
+ * p_last: Address of last possible location of a match
+ * msb_factor: Constant related to hash update
+ * h_pattern: Computed hash of pattern
+ * p_h_string: Address containing the current hash value of the location
+ *      in the string being considered. It is updated in the function.
+ *
+ * Return value
+ * NULL if no substring, or the address of the substring if one exists.
+ */
+static inline const char *next_substr(
+        size_t n_char_pattern, const char *p_pattern,
+        const char **pp_string, const char * const p_last,
+        const size_t msb_factor, const size_t h_pattern, size_t *p_h_string)
+{
+    const char *p_string = *pp_string;
+    size_t h_string = *p_h_string;
+
+    for ( ; ; ) {
+        /* Update hash for next starting point at p_string + 1 */
+        if ((h_string = (((h_string - (unsigned char) p_string[0] *
+                msb_factor) << 8) + p_string[n_char_pattern]) %
+                KR_MODULUS) > KR_MODULUS) { /* negative value when signed */
+            h_string += KR_MODULUS;
+        }
+        ++p_string; /* step to next starting point */
+
+        /* Compare at current starting point. If hashes match,
+         * do full compare */
+        if (h_pattern == h_string &&
+                memcmp(p_pattern, p_string, n_char_pattern) == 0) {
+            *pp_string = p_string; /* Update string location */
+            *p_h_string = h_string; /* and hash for another call */
+            return p_string; /* match here */
+        }
+
+        /* Exit with no match if at last starting point */
+        if (p_string == p_last) {
+            return (char *) NULL; /* no match found */
+        }
+    } /* end of loop over starting points in string */
+} /* end of function next_substr */
+
+
+
+/* This function returns TRUE if '\0' is among the n characters at p and
+ * FALSE otherwise. */
+static inline bool have_null(size_t n, const char *p)
+{
+    /* Scan backwards to make the common case of using a null termination
+     * of a string for the null char be faster */
+    const char *p_cur = p + n - 1;
+    for ( ; p_cur >= p; --p_cur) { /* Locate '\0' among the chars */
+        if (*p_cur == '\0') { /* found */
+            return TRUE;
+        }
+    }
+    return FALSE;
+} /* end of function have_null */
+
+
+
+/* This function "finds a needle in a haystack" aka the first occurrence of
+ * any character of needle in haystack. NULL is returned if none is found.
+ * haystack must be terminated with '\0'.
+ *
+ * Remarks
+ * p_needle does not need to be null terminated. In fact, a null can be
+ * included among the characters to be located so that this funtion will
+ * locate the end of haystack if none of the other characters is found and
+ * would guarantee that the returned value is not NULL.
+ *
+ * The case of a '\0' included among the chars to locate is treated as a
+ * special case for improved efficiency.
+ *
+ * For a sufficiently large haystack, further gains in performance can be
+ * achieved by analyzing the characteristics of the needle values and
+ * developing comparisons based on bit values or range values. As a
+ * trivial example, for the needle string "01234567", instead of 8
+ * comparisons for the 8 values, 2 comparisons can be used by comparing
+ * against >= 0 and against <= 7. Without a large enough haystack, the
+ * computational time required for the analysis would not be recovered.
+ */
+char *find_first_of(const char *haystack,
+        unsigned int n_needle, const char *p_needle)
+{
+    /* Hanldle case of nothing to find */
+    if (n_needle == 0) {
+        return (char *) NULL;
+    }
+
+    const char * const p_needle_end = p_needle + n_needle;
+    if (have_null(n_needle, p_needle)) { /* searching for '\0' */
+        for ( ; ; ++haystack) { /* iterate over straws in haystack */
+            const char straw = *haystack;
+            const char *p_needle_cur = p_needle;
+            for ( ; p_needle_cur != p_needle_end; ++p_needle_cur) {
+                const char needle = *p_needle_cur;
+                if (straw == needle) { /* found needle */
+                    return (char *) haystack;
+                }
+            } /* end of loop over needles */
+        } /* end of loop over straws in haystack */
+    } /* end of case that '\0' among items being located */
+
+    /* Else '\0' is not among the items being located */
+    for ( ; ; ++haystack) { /* iterate over straws in haystack */
+        const char straw = *haystack;
+        const char *p_needle_cur = p_needle;
+        for ( ; p_needle_cur != p_needle_end; ++p_needle_cur) {
+            const char needle = *p_needle_cur;
+            if (straw == needle) { /* found needle */
+                return (char *) haystack;
+            }
+        } /* end of loop over needles */
+        if (straw == '\0') { /* entire haystack searched */
+            return (char *) NULL;
+        }
+    } /* end of loop over straws in haystack */
+} /* end of function find_first_of */
+
+
+
+/* This function returns TRUE if the string has any of the characters
+ * '"', '\'' or '\\' */
+bool has_escape_or_quote(size_t n, const char *str)
+{
+    const char *str_end = str + n;
+    for ( ; str != str_end; ++str) {
+        const char ch_cur = *str;
+        if (ch_cur == '"' || ch_cur == '\'' || ch_cur == '\\') {
+            return TRUE;
+        }
+    } /* end of loop over chars in string */
+
+    return FALSE;
+} /* end of function may_have_eq */
+