Fixed potential infinite loop (until memory is exhausted) and added several utility functions for processing strings.

This commit is contained in:
Jim Monte 2019-12-06 19:39:08 -05:00
parent e6c14b3eb0
commit 7496060d80
3 changed files with 794 additions and 157 deletions

View File

@ -241,6 +241,7 @@ extern double x_atanh(double);
#define HUGE HUGE_VAL
#endif
void findtok_noparen(char **p_str, char **p_token, char **p_token_end);
extern char *gettok_noparens(char **s);
extern char *gettok_node(char **s);
extern char *gettok_iv(char **s);

View File

@ -6,37 +6,117 @@
#ifndef ngspice_STRINGUTIL_H
#define ngspice_STRINGUTIL_H
#include <stdarg.h>
#include <string.h>
#include "ngspice/config.h"
#include "ngspice/bool.h"
#include <stdarg.h>
int prefix(const char *p, const char *s);
char * copy(const char *str);
char * copy_substring(const char *str, const char *end);
int substring(const char *sub, const char *str);
void appendc(char *s, char c);
int scannum(char *str);
int cieq(const char *p, const char *s);
int ciprefix(const char *p, const char *s);
void strtolower(char *str);
void strtoupper(char *str);
char * stripWhiteSpacesInsideParens(const char *str);
char * gettok(char **s);
char * gettok_instance(char **);
char * gettok_char(char **s, char p, bool inc_p, bool nested);
int model_name_match(const char *token, const char *model_name);
extern char *tvprintf(const char *fmt, va_list args);
#ifdef __GNUC__
extern char *tprintf(const char *fmt, ...) __attribute__ ((format (__printf__, 1, 2)));
#define ATTR_TPRINTF __attribute__ ((format (__printf__, 1, 2)))
#else
extern char *tprintf(const char *fmt, ...);
#define ATTR_TPRINTF
#endif
/* Structure for storing state to find substring matches in a string */
struct substring_match_info {
/* Input data */
size_t n_char_pattern; /* length of pattern being located */
const char *p_pattern; /* pattern to find */
size_t n_char_string; /* length of string to search */
const char *p_string; /* String to search. Final null not required */
bool f_overlap; /* flag that substring matches can overlap */
/* Intermediate results */
size_t n_char_pattern_1; /* length of pattern being located - 1 */
size_t msb_factor; /* constant related to updating hash */
size_t h_pattern; /* hash value of pattern */
size_t h_string; /* current hash value of string */
const char *p_last; /* last possible substring match location */
bool f_done; /* flag that last match was found */
};
void appendc(char *s, char c);
int cieq(const char *p, const char *s);
int cieqn(const char *p, const char *s, size_t n);
int ciprefix(const char *p, const char *s);
char *dup_string(const char *str, size_t n_char);
char *find_first_of(const char *haystack,
unsigned int n_needle, const char *p_needle);
int get_comma_separated_values(char *values[], char *str);
int get_int_n(const char *str, size_t n, int *p_value);
#ifdef COMPILE_UNUSED_FUNCTIONS
size_t get_substring_matches(size_t n_char_pattern, const char *p_pattern,
size_t n_char_string, const char *p_string,
size_t n_elem_buf, char *p_match_buf, bool f_overlap);
#endif
char *gettok(char **s);
char *gettok_char(char **s, char p, bool inc_p, bool nested);
char *gettok_instance(char **);
bool has_escape_or_quote(size_t n, const char *str);
bool is_arith_char(char c);
bool isquote(char ch);
int model_name_match(const char *token, const char *model_name);
int prefix(const char *p, const char *s);
int prefix_n(size_t n_char_prefix, const char *prefix,
size_t n_char_string, const char *string);
int scannum_adv(char **p_str);
bool str_has_arith_char(char *s);
char *stripWhiteSpacesInsideParens(const char *str);
void strtolower(char *str);
void strtoupper(char *str);
void substring_match_init(size_t n_char_pattern, const char *p_pattern,
size_t n_char_string, const char *p_string, bool f_overlap,
struct substring_match_info *p_scan_state);
char *substring_match_next(struct substring_match_info *p_scan_state);
int substring_n(size_t n_char_pattern, const char *p_pattern,
size_t n_char_str, const char *p_str);
char *tprintf(const char *fmt, ...) ATTR_TPRINTF;
char *tvprintf(const char *fmt, va_list args);
/* Allocate and create a copy of a string if the argument is not null or
* returns null if it is. */
inline char *copy(const char *str)
{
return str == (char *) NULL ?
(char *) NULL : dup_string(str, strlen(str));
} /* end of function copy */
/* Allocate a buffer and copy a substring, from 'str' to 'end'
* including *str, excluding *end
*/
inline char *copy_substring(const char *str, const char *end)
{
return dup_string(str, (size_t) (end - str));
} /* end of function copy_substring */
/* Like scannum but *p_str is advanced past the number */
/* Try to identify an unsigned integer that begins a string. Stop when a
* non- numeric character is reached. There is no way to distinguish
* between a value of 0 and a string that does not contain a numeric
* value. */
inline int scannum(const char *str)
{
return scannum_adv((char **) &str);
} /* end of function scannum */
/* Determine whether sub is a substring of str. */
inline int substring(const char *sub, const char *str)
{
return strstr(sub, str) != (char *) NULL;
} /* end of function substring */
#ifdef CIDER
/* cider integration */
@ -44,9 +124,5 @@ int cinprefix(register char *p, register char *s, register int n);
int cimatch(register char *p, register char *s);
#endif
bool isquote(char ch);
bool is_arith_char(char c);
bool str_has_arith_char(char *s);
int get_comma_separated_values( char *values[], char *str );
#endif
#endif /* include guard */

View File

@ -5,97 +5,132 @@ Copyright 1990 Regents of the University of California. All rights reserved.
/*
* String functions
*/
#include <ctype.h>
#include <stdarg.h>
#include "ngspice/ngspice.h"
#include "ngspice/stringutil.h"
#include "ngspice/stringskip.h"
#include "ngspice/dstring.h"
#include <stdarg.h>
/* Instantiations of string functions in case inlining is not performed */
char *copy(const char *str);
char *copy_substring(const char *str, const char *end);
int scannum(const char *str);
int substring(const char *sub, const char *str);
int
prefix(const char *p, const char *s)
static size_t get_kr_msb_factor(size_t n);
static size_t kr_hash(size_t n, const char *p);
static inline const char *next_substr(
size_t n_char_pattern, const char *p_pattern,
const char **pp_string, const char * const p_last,
const size_t msb_factor, const size_t h_pattern, size_t *p_h_string);
static bool can_overlap(size_t n_char_pattern, const char * const p_pattern);
/* This function returns true if the string s begins with the
* string p and false otherwise. */
int prefix(const char *p, const char *s)
{
while (*p && (*p == *s))
p++, s++;
return *p == '\0';
}
} /* end of function prefix */
/* Create a copy of a string. */
char *
copy(const char *str)
/* This function returns 1 if string begins with prefix and 0 otherwise.
* Neither the prefix nor string needs a null termination. */
int prefix_n(size_t n_char_prefix, const char *prefix,
size_t n_char_string, const char *string)
{
char *p;
/*Test that string is long enough */
if (n_char_prefix > n_char_string) {
return 0;
}
if (!str)
return NULL;
if ((p = TMALLOC(char, strlen(str) + 1)) != NULL)
(void) strcpy(p, str);
return p;
}
return memcmp(prefix, string, n_char_prefix) == 0;
} /* end of function prefix_n */
/* copy a substring, from 'str' to 'end'
* including *str, excluding *end
/* This function allocates a buffer and copies the specified number of
* characters from the input string into the buffer followed by a
* terminating null.
*
* Paramters
* str: String to copy
* n_char: Number of characters to copy
*
* Return values
* NULL: Allocation failure
* otherwise: The initialized string.
*/
char *
copy_substring(const char *str, const char *end)
char *dup_string(const char *str, size_t n_char)
{
size_t n = (size_t) (end - str);
char *p;
if ((p = TMALLOC(char, n + 1)) != NULL) {
(void) strncpy(p, str, n);
p[n] = '\0';
if ((p = TMALLOC(char, n_char + 1)) != NULL) {
(void) strncpy(p, str, n_char);
p[n_char] = '\0';
}
return p;
}
} /* end of function dup_string */
char *
tvprintf(const char *fmt, va_list args)
char *tvprintf(const char *fmt, va_list args)
{
char buf[1024];
char *p = buf;
int size = sizeof(buf);
int nchars;
for (;;) {
int nchars;
va_list ap;
va_copy(ap, args);
nchars = vsnprintf(p, (size_t) size, fmt, ap);
va_end(ap);
if (nchars == -1) { // compatibility to old implementations
size *= 2;
/* This case was previously handled by doubling the size of
* the buffer for "compatibility to old implementations."
* However, vsnprintf is defined in both C99 and SUSv2 from 1997.
* There is a slight difference which does not affect this
* usage, but both return negative values (possibly -1) on an
* encoding error, which would lead to an infinte loop (until
* memory was exhausted) with the old behavior */
if (nchars < 0) {
controlled_exit(-1);
}
else if (nchars >= size) {
/* Output was truncated. Returned value is the number of chars
* that would have been written if the buffer were large enough
* excluding the terminiating null. */
size = nchars + 1; /* min required allocation size */
}
else { /* String formatted OK */
if (nchars < size) { /* String formatted OK */
break;
}
/* Output was truncated. Returned value is the number of chars
* that would have been written if the buffer were large enough
* excluding the terminiating null. */
size = nchars + 1; /* min required allocation size */
/* Allocate a larger buffer */
if (p == buf)
if (p == buf) {
p = TMALLOC(char, size);
else
}
else {
p = TREALLOC(char, p, size);
}
}
/* Return the formatted string, making a copy on the heap if the
* stack's buffer (buf) contains the string */
return (p == buf) ? copy(p) : p;
return (p == buf) ? dup_string(p, (size_t) nchars) : p;
} /* end of function tvprintf */
@ -103,8 +138,7 @@ tvprintf(const char *fmt, va_list args)
/* This function returns an allocation containing the string formatted
* according to fmt and the variadic argument list provided. It is a wrapper
* around tvprintf() which processes the argumens as a va_list. */
char *
tprintf(const char *fmt, ...)
char *tprintf(const char *fmt, ...)
{
char *rv;
va_list ap;
@ -117,103 +151,173 @@ tprintf(const char *fmt, ...)
} /* end of function tprintf */
/* Determine whether sub is a substring of str. */
/* Like strstr( ) XXX */
int
substring(const char *sub, const char *str)
{
for (; *str; str++)
if (*str == *sub) {
const char *s = sub, *t = str;
for (; *s; s++, t++)
if (!*t || (*s != *t))
break;
if (*s == '\0')
return TRUE;
}
return FALSE;
}
/* Append one character to a string. Don't check for overflow. */
/* Almost like strcat( ) XXX */
void
appendc(char *s, char c)
void appendc(char *s, char c)
{
while (*s)
while (*s) {
s++;
}
*s++ = c;
*s = '\0';
}
} /* end of function appendc */
/* Try to identify an integer that begins a string. Stop when a non-
* numeric character is reached.
*/
/* Like atoi( ) XXX */
int
scannum(char *str)
/* Returns the unsigned number at *p_str or 0 if there is none. *p_str
* points to the first character after the number that was read, so
* it is possible to distingish between the value 0 and a missing number
* by testing if the string has been advanced. */
int scannum_adv(char **p_str)
{
const char *str = *p_str;
int i = 0;
while (isdigit_c(*str))
while (isdigit_c(*str)) {
i = i * 10 + *(str++) - '0';
}
*p_str = (char *) str; /* locate end of number */
return i;
}
} /* end of function scannum_adv */
/* This function returns the integer at the current string location.
* The string does not need to be null-terminated.
*
* Parameters
* str: String containing the integer to return at the beginning
* n: Number of characters in the string
* p_value: Address where the integer is returned
*
* Return values
* -1: No integer present
* -2: Overflow
* >0: Number of characters in the integer
*/
int get_int_n(const char *str, size_t n, int *p_value)
{
if (n == 0) { /* no string */
return -1;
}
unsigned int value = 0;
const char *p_cur = str;
const char * const p_end = str + n;
bool f_neg;
if (*p_cur == '-') { /* Check for leading negative sign */
f_neg = 1;
++p_cur;
}
else {
f_neg = 0;
}
/* Iterate over chars until end or char that is not numeric */
for ( ; p_cur != p_end; ++p_cur) {
char ch_cur = *str;
if (!isdigit(ch_cur)) { /* Test for exit due to non-numeric char */
break;
}
/* Compute new value and check for overflow. */
const unsigned int value_new = 10 * value + (ch_cur - '0');
if (value_new < value) {
return -2;
}
value = value_new;
} /* end of loop over digits */
/* Test for at least one digit */
if (p_cur == str + f_neg) {
return -1; /* no digit */
}
/* Test for overflow.
* If negative, can be 1 greater (-2**n vs 2**n -1) */
if (value - f_neg > INT_MAX) {
return -2;
}
/* Take negative if negative sign present. (This operation works
* correctly if value == INT_MIN since -INT_MIN == INT_MIN */
*p_value = f_neg ? -(int) value : (int) value;
return (int) (p_cur - str); /* number of chars in the number */
} /* end of function get_int_n */
/* Case insensitive str eq. */
/* Like strcasecmp( ) XXX */
int
cieq(const char *p, const char *s)
int cieq(const char *p, const char *s)
{
for (; *p; p++, s++)
if (tolower_c(*p) != tolower_c(*s))
for (; *p; p++, s++) {
if (tolower_c(*p) != tolower_c(*s)) {
return FALSE;
}
}
return *s == '\0';
}
} /* end of function cieq */
/* Case-insensitive string compare fore equialty with explicit length
* given. Neither character array needs to be null terminated. By not
* including the trailing null in the count, it can be used to check
* for a prefix. This function is useful for avoiding string copies
* to temporary buffers and the potential for buffer overruns that
* can occur when using temporary buffers without checking lengths. */
int cieqn(const char *p, const char *s, size_t n)
{
size_t i;
for (i = 0; i < n; ++i) {
if (tolower_c(p[i]) != tolower_c(s[i])) {
return FALSE;
}
}
return TRUE; /* all chars matched */
} /* end of function cineq */
/* Case insensitive prefix. */
int
ciprefix(const char *p, const char *s)
int ciprefix(const char *p, const char *s)
{
for (; *p; p++, s++)
if (tolower_c(*p) != tolower_c(*s))
if (tolower_c(*p) != tolower_c(*s)) {
return FALSE;
}
return TRUE;
}
} /* end of function ciprefix */
void
strtolower(char *str)
void strtolower(char *str)
{
if (!str)
if (!str) {
return;
}
for (; *str; str++)
for (; *str; str++) {
*str = tolower_c(*str);
}
}
} /* end of function strtolower */
void
strtoupper(char *str)
void strtoupper(char *str)
{
if (!str)
if (!str) {
return;
}
for (; *str; str++)
for (; *str; str++) {
*str = toupper_c(*str);
}
}
} /* end of function strtoupper */
#ifdef CIDER
@ -230,18 +334,21 @@ strtoupper(char *str)
* first n characters are the same
*/
int
cinprefix(char *p, char *s, int n)
int cinprefix(char *p, char *s, int n)
{
if (!p || !s)
if (!p || !s) {
return 0;
}
for (; *p; p++, s++, n--)
if (tolower_c(*p) != tolower_c(*s))
for (; *p; p++, s++, n--) {
if (tolower_c(*p) != tolower_c(*s)) {
return 0;
}
}
return n <= 0;
}
} /* end of function cinprefix */
/*
@ -410,6 +517,41 @@ gettok_iv(char **s)
/* findtok_noparen() does the string scanning for gettok_noparens() but
* does not allocate a token. Hence it is useful when a copy of the token
* is not required */
void findtok_noparen(char **p_str, char **p_token, char **p_token_end)
{
char *str = *p_str;
str = skip_ws(str);
if (!*str) {
*p_str = str;
*p_token = (char *) NULL;
return;
}
*p_token = str; /* Token starts after whitespace */
{
char c;
while ((c = *str) != '\0' &&
!isspace_c(c) &&
(c != '(') &&
(c != ')') &&
(c != ',')
) {
str++;
}
}
*p_token_end = str;
str = skip_ws(str);
*p_str = str;
} /* end of function findtok_noparen */
/*-------------------------------------------------------------------------*
* gettok_noparens was added by SDB on 4.21.2003.
* It acts like gettok, except that it treats parens and commas like
@ -417,33 +559,17 @@ gettok_iv(char **s)
* parsing and returns when it finds one of those chars. It is called from
* 'translate' (subckt.c).
*-------------------------------------------------------------------------*/
char *
gettok_noparens(char **s)
char *gettok_noparens(char **s)
{
char c;
const char *token, *token_e;
*s = skip_ws(*s);
if (!**s)
return NULL; /* return NULL if we come to end of line */
token = *s;
while ((c = **s) != '\0' &&
!isspace_c(c) &&
(**s != '(') &&
(**s != ')') &&
(**s != ',')
) {
(*s)++;
char *token, *token_e;
findtok_noparen(s, &token, &token_e);
if (token == (char *) NULL) {
return (char *) NULL; /* return NULL if we come to end of line */
}
token_e = *s;
*s = skip_ws(*s);
return copy_substring(token, token_e);
}
} /* end of function gettok_noparens */
/*-------------------------------------------------------------------------*
* gettok_model acts like gettok_noparens, however when it encounters a '{',
@ -746,8 +872,8 @@ str_has_arith_char(char *s)
}
int
get_comma_separated_values(char *values[], char *str) {
int get_comma_separated_values(char *values[], char *str)
{
int count = 0;
char *comma_ptr;
@ -769,9 +895,7 @@ get_comma_separated_values(char *values[], char *str) {
modulo a trailing model binning extension '\.[0-9]+'
then return 2
*/
int
model_name_match(const char *token, const char *model_name)
int model_name_match(const char *token, const char *model_name)
{
const char *p;
size_t token_len = strlen(token);
@ -799,7 +923,443 @@ model_name_match(const char *token, const char *model_name)
return 0;
return 2;
}
} /* end of funtion model_name_match */
/* This function returns 1 if pattern is a substring anywhere in str and
* 0 otherwise. A null pattern is considered a mismatch.
*
* Uses Karp-Rabin substring matching with base=256 and modulus=1009
*/
int substring_n(size_t n_char_pattern, const char *p_pattern,
size_t n_char_string, const char *p_string)
{
/* Test for a pattern to match */
if (n_char_pattern == 0) {
return 0;
}
/* Test for a string of sufficient length */
if (n_char_pattern > n_char_string) {
return 0;
}
/* Factor for rolling hash computation */
const size_t msb_factor = get_kr_msb_factor(n_char_pattern);
const size_t h_pattern = kr_hash(n_char_pattern, p_pattern);
size_t h_string = kr_hash(n_char_pattern, p_string);
/* Compare at beginning. If hashes match, do full compare */
if (h_pattern == h_string &&
memcmp(p_pattern, p_string, n_char_pattern) == 0) {
return 1; /* match at start */
}
/* Compare at each possible starting point in the string */
const char *p_last = p_string + (n_char_string - n_char_pattern - 1);
return next_substr(n_char_pattern, p_pattern, &p_string, p_last,
msb_factor, h_pattern, &h_string) == (char *) NULL ?
0 : 1;
} /* end of function substring_n */
/* This function initializes a scan for substring matches */
void substring_match_init(size_t n_char_pattern, const char *p_pattern,
size_t n_char_string, const char *p_string, bool f_overlap,
struct substring_match_info *p_scan_state)
{
/* Save input info into structure. Note that the strings are not
* copied, so they must remain allocated and unaltered while the
* search is in progress. */
p_scan_state->n_char_pattern = n_char_pattern;
p_scan_state->p_pattern = p_pattern;
p_scan_state->n_char_string = n_char_string;
p_scan_state->p_string = p_string;
/*** Calculate intermediate data ***/
/* Test for a pattern to match */
if (n_char_pattern == 0) {
p_scan_state->f_done = TRUE;
}
/* Test for a string of sufficient length */
else if (n_char_pattern > n_char_string) {
p_scan_state->f_done = TRUE;
}
else {
p_scan_state->f_done = FALSE;
/* Look for overlaps only if possible */
p_scan_state->f_overlap= f_overlap ?
!can_overlap(n_char_pattern, p_pattern) : FALSE;
p_scan_state->n_char_pattern_1 = n_char_pattern - 1;
p_scan_state->msb_factor = get_kr_msb_factor(n_char_pattern);
p_scan_state->h_pattern = kr_hash(n_char_pattern, p_pattern);
p_scan_state->h_string = kr_hash(n_char_pattern, p_string);
p_scan_state->p_last =
p_string + (n_char_string - n_char_pattern - 1);
}
return;
} /* end of function substring_match_init */
/* This function finds the next substring match
*
* Parameter
* p_scan_state: Address of struct substring_match_info initialized by
* substring_match_init()
*
* Return value
* NULL if there is no match or the address of the next match otherwise
*/
char *substring_match_next(struct substring_match_info *p_scan_state)
{
/* First test if there are no more possible matches */
if (p_scan_state->f_done) {
return (char *) NULL;
}
/* Find next match, if any */
const char * const p_match = next_substr(
p_scan_state->n_char_pattern, p_scan_state->p_pattern,
&p_scan_state->p_string, p_scan_state->p_last,
p_scan_state->msb_factor,p_scan_state->h_pattern,
&p_scan_state->h_string);
/* Update done status if changed */
if (p_match == (char *) NULL) {
p_scan_state->f_done = TRUE;
}
else {
if (!p_scan_state->f_overlap) {
p_scan_state->p_string +=
p_scan_state->n_char_pattern_1; /* end of match */
p_scan_state->h_string = p_scan_state->h_pattern;
}
}
return (char *) p_match; /* Return result */
} /* end of function substring_match_next */
#ifdef COMPILE_UNUSED_FUNCTIONS
/* This funtion returns the locations of optionally non-overlapping substring
* matches. For example, in the string aaaaa, aa is found in non-overlapping
* locations at 0-based offsets 0 and 2 ahd with overlapping allowed atr
* offsets 0, 1, 2, and 3 */
size_t get_substring_matches(size_t n_char_pattern, const char *p_pattern,
size_t n_char_string, const char *p_string,
size_t n_elem_buf, char *p_match_buf, bool f_overlap)
{
/* Test for a pattern to match */
if (n_char_pattern == 0) {
return 0;
}
/* Test for a string of sufficient length */
if (n_char_pattern > n_char_string) {
return 0;
}
/* Handle 0-sized buffer */
if (n_elem_buf == 0) {
return 0;
}
/* Factor for rolling hash computation */
const size_t msb_factor = get_kr_msb_factor(n_char_pattern);
const size_t h_pattern = kr_hash(n_char_pattern, p_pattern);
size_t h_string = kr_hash(n_char_pattern, p_string);
/* Compare at beginning. If hashes match, do full compare */
if (h_pattern == h_string &&
memcmp(p_pattern, p_string, n_char_pattern) == 0) {
return 1; /* match at start */
}
/* Compare at each possible starting point in the string */
const char *p_last = p_string + (n_char_string - n_char_pattern - 1);
const size_t n_char_pattern_1 = n_char_pattern - 1;
char **pp_match_buf_cur = &p_match_buf;
char * const * const pp_match_buf_end = pp_match_buf_cur + n_elem_buf;
/* Look for overlaps only if possible */
f_overlap = f_overlap ? !can_overlap(n_char_pattern, p_pattern) : FALSE;
for ( ; pp_match_buf_cur < pp_match_buf_end; pp_match_buf_cur++) {
const char *p_match = next_substr(n_char_pattern, p_pattern,
&p_string, p_last, msb_factor, h_pattern, &h_string);
if (p_match == (char *) NULL) { /* if no match, done */
return (int) (pp_match_buf_cur - &p_match_buf);
}
/* Save result */
*pp_match_buf_cur = (char *) p_match;
/* If overlapping is not allowed, contniue search after the match.
* Note that in this case, the string hash is the pattern hash. */
if (!f_overlap) {
p_string += n_char_pattern_1; /* end of match */
h_string = h_pattern;
}
} /* end of loop over string */
return n_elem_buf; /* full buffer */
} /* end of funtion get_substring_matches */
#endif /* COMPILE_UNUSED_FUNCTIONS */
/* This function determines if a pattern can allow overlapping matches.
* For example, the pattern "starts" would have overlapped matches in the
* string "startstarts".
*
* Remarks
* While not directly related to this function, there is only a binary yes/no
* interest regarding overlap rather than an offset into the the string where
* such overlap may occur. That is because the hash value is being computed
* incremetally, so the only time when there is substantial computational
* savings in this approach is when the hash value is known, as it would be
* at the end of a match (since the hash of the pattern is knonw.)
*/
static bool can_overlap(size_t n_char_pattern, const char * const p_pattern)
{
if (n_char_pattern < 2) { /* does not matter */
return TRUE;
}
/* Find the last occurrance of the first character */
const char * const p_end = p_pattern + n_char_pattern;
const char *p_cur = p_end - 1;
const char ch_first = *p_pattern;
for ( ; p_cur > p_pattern; --p_cur) {
if (*p_cur == ch_first) {
break;
}
} /* end of loop finding the first char */
/* Test for no duplicate */
if (p_cur == p_pattern) { /* not found */
return FALSE; /* no duplicate so cannot overlap */
}
/* Now must match from this char onward to overlap */
const char *p_src = p_pattern;
for ( ; p_cur != p_end; ++p_cur, ++p_src) {
if (*p_cur != *p_src) { /* comparing 'b' to 'd' in "abcad"
* for example */
return FALSE; /* Mismatch, so not an overlap */
}
} /* end of loop finding the first char */
return TRUE; /* Matched to end of word */
} /* end of function can_overlap */
/* Prime number of Karp-Rabin hashing. Tradeoff between number of hash
* collisions and number of times modulus must be taken. */
#define KR_MODULUS 1009
/* Compute (256^(n-1))%KR_MODULUS */
static size_t get_kr_msb_factor(size_t n)
{
size_t i;
size_t factor = 1;
const size_t n_itr = n - 1;
for (i = 0; i < n_itr; ++i) {
size_t factor_new = (factor << 8);
if (factor_new < factor) { /* overflow */
factor %= KR_MODULUS; /* take modulus */
factor <<= 8; /* and recompute */
}
} /* end of loop building factor */
/* Return the factor after final modulus if necessary */
if (factor >= KR_MODULUS) {
factor %= KR_MODULUS;
}
return factor;
} /* end of function get_kr_msb_factor */
/* Compute KR hash assuming n >= 1 */
static size_t kr_hash(size_t n, const char *p)
{
const char * const p_end = p + n;
size_t hash = *(unsigned char *) p;
for (p++; p < p_end; p++) {
unsigned char ch = *(unsigned char *) p;
size_t hash_new = (hash << 8) + ch;
if (hash_new < hash) { /* overflow */
hash %= KR_MODULUS; /* take modulus */
hash = (hash << 8) + ch; /* and recompute */
}
else { /* no overflow, so no need for modulus yet */
hash = hash_new;
}
} /* end of loop hasing chars */
/* Do final modulus if necessary */
if (hash >= KR_MODULUS) {
hash %= KR_MODULUS;
}
return hash;
} /* end of function kr_hash */
/* This function locates the next substring match. It is intended to be called
* as part of the scanning of a string for a substring
*
* Parameters
* n_char_pattern: Length of pattern to find
* p_pattern: Pattern to find. Need not be null-terminated
* pp_string: Address containing the current location in the string. Updated
* if a match is found.
* p_last: Address of last possible location of a match
* msb_factor: Constant related to hash update
* h_pattern: Computed hash of pattern
* p_h_string: Address containing the current hash value of the location
* in the string being considered. It is updated in the function.
*
* Return value
* NULL if no substring, or the address of the substring if one exists.
*/
static inline const char *next_substr(
size_t n_char_pattern, const char *p_pattern,
const char **pp_string, const char * const p_last,
const size_t msb_factor, const size_t h_pattern, size_t *p_h_string)
{
const char *p_string = *pp_string;
size_t h_string = *p_h_string;
for ( ; ; ) {
/* Update hash for next starting point at p_string + 1 */
if ((h_string = (((h_string - (unsigned char) p_string[0] *
msb_factor) << 8) + p_string[n_char_pattern]) %
KR_MODULUS) > KR_MODULUS) { /* negative value when signed */
h_string += KR_MODULUS;
}
++p_string; /* step to next starting point */
/* Compare at current starting point. If hashes match,
* do full compare */
if (h_pattern == h_string &&
memcmp(p_pattern, p_string, n_char_pattern) == 0) {
*pp_string = p_string; /* Update string location */
*p_h_string = h_string; /* and hash for another call */
return p_string; /* match here */
}
/* Exit with no match if at last starting point */
if (p_string == p_last) {
return (char *) NULL; /* no match found */
}
} /* end of loop over starting points in string */
} /* end of function next_substr */
/* This function returns TRUE if '\0' is among the n characters at p and
* FALSE otherwise. */
static inline bool have_null(size_t n, const char *p)
{
/* Scan backwards to make the common case of using a null termination
* of a string for the null char be faster */
const char *p_cur = p + n - 1;
for ( ; p_cur >= p; --p_cur) { /* Locate '\0' among the chars */
if (*p_cur == '\0') { /* found */
return TRUE;
}
}
return FALSE;
} /* end of function have_null */
/* This function "finds a needle in a haystack" aka the first occurrence of
* any character of needle in haystack. NULL is returned if none is found.
* haystack must be terminated with '\0'.
*
* Remarks
* p_needle does not need to be null terminated. In fact, a null can be
* included among the characters to be located so that this funtion will
* locate the end of haystack if none of the other characters is found and
* would guarantee that the returned value is not NULL.
*
* The case of a '\0' included among the chars to locate is treated as a
* special case for improved efficiency.
*
* For a sufficiently large haystack, further gains in performance can be
* achieved by analyzing the characteristics of the needle values and
* developing comparisons based on bit values or range values. As a
* trivial example, for the needle string "01234567", instead of 8
* comparisons for the 8 values, 2 comparisons can be used by comparing
* against >= 0 and against <= 7. Without a large enough haystack, the
* computational time required for the analysis would not be recovered.
*/
char *find_first_of(const char *haystack,
unsigned int n_needle, const char *p_needle)
{
/* Hanldle case of nothing to find */
if (n_needle == 0) {
return (char *) NULL;
}
const char * const p_needle_end = p_needle + n_needle;
if (have_null(n_needle, p_needle)) { /* searching for '\0' */
for ( ; ; ++haystack) { /* iterate over straws in haystack */
const char straw = *haystack;
const char *p_needle_cur = p_needle;
for ( ; p_needle_cur != p_needle_end; ++p_needle_cur) {
const char needle = *p_needle_cur;
if (straw == needle) { /* found needle */
return (char *) haystack;
}
} /* end of loop over needles */
} /* end of loop over straws in haystack */
} /* end of case that '\0' among items being located */
/* Else '\0' is not among the items being located */
for ( ; ; ++haystack) { /* iterate over straws in haystack */
const char straw = *haystack;
const char *p_needle_cur = p_needle;
for ( ; p_needle_cur != p_needle_end; ++p_needle_cur) {
const char needle = *p_needle_cur;
if (straw == needle) { /* found needle */
return (char *) haystack;
}
} /* end of loop over needles */
if (straw == '\0') { /* entire haystack searched */
return (char *) NULL;
}
} /* end of loop over straws in haystack */
} /* end of function find_first_of */
/* This function returns TRUE if the string has any of the characters
* '"', '\'' or '\\' */
bool has_escape_or_quote(size_t n, const char *str)
{
const char *str_end = str + n;
for ( ; str != str_end; ++str) {
const char ch_cur = *str;
if (ch_cur == '"' || ch_cur == '\'' || ch_cur == '\\') {
return TRUE;
}
} /* end of loop over chars in string */
return FALSE;
} /* end of function may_have_eq */