/********** Copyright 1990 Regents of the University of California. All rights reserved. **********/ /* * String functions */ #include #include #include "ngspice/ngspice.h" #include "ngspice/stringutil.h" #include "ngspice/stringskip.h" #include "ngspice/dstring.h" /* Instantiations of string functions */ extern inline char *copy(const char *str); extern inline char *copy_substring(const char *str, const char *end); extern inline int scannum(const char *str); extern inline int substring(const char *sub, const char *str); static size_t get_kr_msb_factor(size_t n); static size_t kr_hash(size_t n, const char *p); static inline const char *next_substr( size_t n_char_pattern, const char *p_pattern, const char **pp_string, const char * const p_last, const size_t msb_factor, const size_t h_pattern, size_t *p_h_string); static bool can_overlap(size_t n_char_pattern, const char * const p_pattern); static void findtok_np(char** p_str, char** p_token, char** p_token_end); /* This function returns true if the string s begins with the * string p and false otherwise. */ int prefix(const char *p, const char *s) { while (*p && (*p == *s)) p++, s++; return *p == '\0'; } /* end of function prefix */ /* This function returns 1 if string begins with prefix and 0 otherwise. * Neither the prefix nor string needs a null termination. */ int prefix_n(size_t n_char_prefix, const char *prefix, size_t n_char_string, const char *string) { /*Test that string is long enough */ if (n_char_prefix > n_char_string) { return 0; } return memcmp(prefix, string, n_char_prefix) == 0; } /* end of function prefix_n */ /* This function allocates a buffer and copies the specified number of * characters from the input string into the buffer followed by a * terminating null. * * Paramters * str: String to copy * n_char: Number of characters to copy * * Return values * NULL: Allocation failure * otherwise: The initialized string. */ char *dup_string(const char *str, size_t n_char) { char *p = TMALLOC(char, n_char + 1); if (p != NULL) { (void) memcpy(p, str, n_char + 1); p[n_char] = '\0'; } return p; } /* end of function dup_string */ char *tvprintf(const char *fmt, va_list args) { char buf[1024]; char *p = buf; int size = sizeof(buf); int nchars; for (;;) { va_list ap; va_copy(ap, args); nchars = vsnprintf(p, (size_t) size, fmt, ap); va_end(ap); /* This case was previously handled by doubling the size of * the buffer for "compatibility to old implementations." * However, vsnprintf is defined in both C99 and SUSv2 from 1997. * There is a slight difference which does not affect this * usage, but both return negative values (possibly -1) on an * encoding error, which would lead to an infinte loop (until * memory was exhausted) with the old behavior */ if (nchars < 0) { controlled_exit(-1); } if (nchars < size) { /* String formatted OK */ break; } /* Output was truncated. Returned value is the number of chars * that would have been written if the buffer were large enough * excluding the terminiating null. */ size = nchars + 1; /* min required allocation size */ /* Allocate a larger buffer */ if (p == buf) { p = TMALLOC(char, size); } else { p = TREALLOC(char, p, size); } } /* Return the formatted string, making a copy on the heap if the * stack's buffer (buf) contains the string */ return (p == buf) ? dup_string(p, (size_t) nchars) : p; } /* end of function tvprintf */ /* This function returns an allocation containing the string formatted * according to fmt and the variadic argument list provided. It is a wrapper * around tvprintf() which processes the argumens as a va_list. */ char *tprintf(const char *fmt, ...) { char *rv; va_list ap; va_start(ap, fmt); rv = tvprintf(fmt, ap); va_end(ap); return rv; } /* end of function tprintf */ /* Append one character to a string. Don't check for overflow. */ /* Almost like strcat( ) XXX */ void appendc(char *s, char c) { while (*s) { s++; } *s++ = c; *s = '\0'; } /* end of function appendc */ /* Returns the unsigned number at *p_str or 0 if there is none. *p_str * points to the first character after the number that was read, so * it is possible to distingish between the value 0 and a missing number * by testing if the string has been advanced. */ int scannum_adv(char **p_str) { const char *str = *p_str; int i = 0; while (isdigit_c(*str)) { i = i * 10 + *(str++) - '0'; } *p_str = (char *) str; /* locate end of number */ return i; } /* end of function scannum_adv */ /* This function returns the integer at the current string location. * The string does not need to be null-terminated. * * Parameters * str: String containing the integer to return at the beginning * n: Number of characters in the string * p_value: Address where the integer is returned * * Return values * -1: No integer present * -2: Overflow * >0: Number of characters in the integer */ int get_int_n(const char *str, size_t n, int *p_value) { if (n == 0) { /* no string */ return -1; } unsigned int value = 0; const char *p_cur = str; const char * const p_end = str + n; bool f_neg; if (*p_cur == '-') { /* Check for leading negative sign */ f_neg = 1; ++p_cur; } else { f_neg = 0; } /* Iterate over chars until end or char that is not numeric */ for ( ; p_cur != p_end; ++p_cur) { char ch_cur = *p_cur; if (!isdigit(ch_cur)) { /* Test for exit due to non-numeric char */ break; } /* Compute new value and check for overflow. */ const unsigned int value_new = 10 * value + (unsigned int) (ch_cur - '0'); if (value_new < value) { return -2; } value = value_new; } /* end of loop over digits */ /* Test for at least one digit */ if (p_cur == str + f_neg) { return -1; /* no digit */ } /* Test for overflow. * If negative, can be 1 greater (-2**n vs 2**n -1) */ if (value - (unsigned int) f_neg > (unsigned int) INT_MAX) { return -2; } /* Take negative if negative sign present. (This operation works * correctly if value == INT_MIN since -INT_MIN == INT_MIN */ *p_value = f_neg ? -(int) value : (int) value; return (int) (p_cur - str); /* number of chars in the number */ } /* end of function get_int_n */ /* Case insensitive str eq. */ /* Like strcasecmp( ) XXX */ int cieq(const char *p, const char *s) { for (; *p; p++, s++) { if (tolower_c(*p) != tolower_c(*s)) { return FALSE; } } return *s == '\0'; } /* end of function cieq */ /* Case-insensitive string compare fore equialty with explicit length * given. Neither character array needs to be null terminated. By not * including the trailing null in the count, it can be used to check * for a prefix. This function is useful for avoiding string copies * to temporary buffers and the potential for buffer overruns that * can occur when using temporary buffers without checking lengths. */ int cieqn(const char *p, const char *s, size_t n) { size_t i; for (i = 0; i < n; ++i) { if (tolower_c(p[i]) != tolower_c(s[i])) { return FALSE; } } return TRUE; /* all chars matched */ } /* end of function cineq */ /* Case insensitive prefix. */ int ciprefix(const char *p, const char *s) { for (; *p; p++, s++) if (tolower_c(*p) != tolower_c(*s)) { return FALSE; } return TRUE; } /* end of function ciprefix */ void strtolower(char *str) { if (!str) { return; } for (; *str; str++) { *str = tolower_c(*str); } } /* end of function strtolower */ void strtoupper(char *str) { if (!str) { return; } for (; *str; str++) { *str = toupper_c(*str); } } /* end of function strtoupper */ #ifdef CIDER /* * Imported from cider file support/strmatch.c * Original copyright notice: * Author: 1991 David A. Gates, U. C. Berkeley CAD Group * */ /* * Case-insensitive test of whether p is a prefix of s and at least the * first n characters are the same */ int cinprefix(char *p, char *s, int n) { if (!p || !s) { return 0; } for (; *p; p++, s++, n--) { if (tolower_c(*p) != tolower_c(*s)) { return 0; } } return n <= 0; } /* end of function cinprefix */ /* * Case-insensitive match of prefix string p against string s * returns the number of matching characters * */ int cimatch(char *p, char *s) { int n = 0; if (!p || !s) return 0; for (; *p; p++, s++, n++) if (tolower_c(*p) != tolower_c(*s)) return n; return n; } #endif /* CIDER */ /*-------------------------------------------------------------------------* * gettok skips over whitespace and returns the next token found. This is * the original version. It does not "do the right thing" when you have * parens or commas anywhere in the nodelist. Note that I left this unmodified * since I didn't want to break any fcns which called it from elsewhere than * subckt.c. -- SDB 12.3.2003. *-------------------------------------------------------------------------*/ char * gettok(char **s) { char c; int paren; const char *token, *token_e; if (!*s) return NULL; paren = 0; *s = skip_ws(*s); if (!**s) return NULL; token = *s; while ((c = **s) != '\0' && !isspace_c(c)) { if (c == '(') paren += 1; else if (c == ')') paren -= 1; else if (c == ',' && paren < 1) break; (*s)++; } token_e = *s; while (isspace_c(**s) || **s == ',') (*s)++; return copy_substring(token, token_e); } /*-------------------------------------------------------------------------* * nexttok skips over whitespaces and the next token in s * returns NULL if there is nothing left to skip. * It replaces constructs like txfree(gettok(&actstring)) by * actstring = nexttok(actstring). This is derived from the original gettok version. * It does not "do the right thing" when * you have parens or commas anywhere in the nodelist. *-------------------------------------------------------------------------*/ char * nexttok(const char *s) { if (!s) return NULL; int paren = 0; s = skip_ws(s); if (!*s) return NULL; for (; *s && !isspace_c(*s); s++) if (*s == '(') paren += 1; else if (*s == ')') paren -= 1; else if (*s == ',' && paren < 1) break; while (isspace_c(*s) || *s == ',') s++; return (char *) s; } /*-------------------------------------------------------------------------* * nexttok skips over whitespaces and the next token in s * returns NULL if there is nothing left to skip. * It replaces constructs like txfree(gettok(&actstring)) by * actstring = nexttok(actstring). This is derived from the gettok_np version. * It acts like gettok, except that it treats parens and commas like * whitespace. *-------------------------------------------------------------------------*/ char* nexttok_noparens(const char* s) { if (!s) return NULL; s = skip_ws(s); if (!*s) return NULL; for (; *s && !isspace_c(*s); s++) if (*s == '(') break; else if (*s == ')') break; else if (*s == ',') break; while (isspace_c(*s) || *s == ',' || *s == '(' || *s == ')') s++; return (char*)s; } /*-------------------------------------------------------------------------* * gettok skips over whitespaces or '=' and returns the next token found, * if the token is something like i(xxx), v(yyy), or v(xxx,yyy) * -- h_vogt 10.07.2010. *-------------------------------------------------------------------------*/ char * gettok_iv(char **s) { char *p_src = *s; /* location in source string */ char c; /* current char */ /* Step past whitespace and '=' */ while (isspace_c(c = *p_src) || (c == '=')) { p_src++; } /* Test for valid leading character */ if (((c =*p_src) == '\0') || ((c != 'v') && (c != 'i') && (c != 'V') && (c != 'I'))) { *s = p_src; /* update position in string */ return (char *) NULL; } /* Allocate buffer for token being returned */ char * const token = TMALLOC(char, strlen(p_src) + 1); char *p_dst = token; /* location in token */ // add v or i to buf *p_dst++ = *p_src++; { int n_paren = 0; /* Skip any space between v/V/i/I and '(' */ p_src = skip_ws(p_src); while ((c = *p_src) != '\0') { /* Keep track of nesting level */ if (c == '(') { n_paren++; } else if (c == ')') { n_paren--; } if (isspace_c(c)) { /* Do not copy whitespace to output */ p_src++; } else { *p_dst++ = *p_src++; if (n_paren == 0) { break; } } } } /* Step past whitespace and ',' */ while (isspace_c(c = *p_src) || (c == ',')) { p_src++; } *s = p_src; /* update position in string */ return token; } /* end of function gettok_iv */ /* findtok_noparen() does the string scanning for gettok_noparens() but * does not allocate a token. Hence it is useful when a copy of the token * is not required */ void findtok_noparen(char **p_str, char **p_token, char **p_token_end) { char *str = *p_str; str = skip_ws(str); if (!*str) { *p_str = str; *p_token = (char *) NULL; return; } *p_token = str; /* Token starts after whitespace */ { char c; while ((c = *str) != '\0' && !isspace_c(c) && (c != '(') && (c != ')') && (c != ',') ) { str++; } } *p_token_end = str; str = skip_ws(str); *p_str = str; } /* end of function findtok_noparen */ /*-------------------------------------------------------------------------* * gettok_noparens was added by SDB on 4.21.2003. * It acts like gettok, except that it treats parens and commas like * whitespace while looking for the POLY token. That is, it stops * parsing and returns when it finds one of those chars. It is called from * 'translate' (subckt.c). *-------------------------------------------------------------------------*/ char *gettok_noparens(char **s) { char *token, *token_e; if (!*s) return NULL; findtok_noparen(s, &token, &token_e); if (token == (char *) NULL) { return (char *) NULL; /* return NULL if we come to end of line */ } return copy_substring(token, token_e); } /* end of function gettok_noparens */ /* findtok_np() does the string scanning for gettok_np() but * does not allocate a token. It skips over all white spaces, ',', '('and ')' */ static void findtok_np(char** p_str, char** p_token, char** p_token_end) { char* str = *p_str; while (isspace_c(*str) || *str == ',' || *str == '(' || *str == ')') str++; if (!*str) { *p_str = str; *p_token = (char*)NULL; return; } *p_token = str; /* Token starts after whitespace */ { char c; while ((c = *str) != '\0' && !isspace_c(c) && (c != '(') && (c != ')') && (c != ',') ) { str++; } } *p_token_end = str; while (isspace_c(*str) || *str == ',' || *str == '(' || *str == ')') str++; *p_str = str; } /* end of function findtok_noparen */ /*-------------------------------------------------------------------------* * gettok_np acts like gettok, except that it treats parens and commas like * whitespace. That is, it stops parsing and returns when it finds one of * those chars. It then moves s beyond all white spaces, ',', '('and ')'. *-------------------------------------------------------------------------*/ char* gettok_np(char** s) { char* token, * token_e; if (!*s) return NULL; findtok_np(s, &token, &token_e); if (token == (char*)NULL) { return (char*)NULL; /* return NULL if we come to end of line */ } return copy_substring(token, token_e); } /* end of function gettok_noparens */ /*-------------------------------------------------------------------------* * gettok_model acts like gettok_noparens, however when it encounters a '{', * it searches for the corresponding '}' and adds the string to the output * token. *-------------------------------------------------------------------------*/ char * gettok_model(char **s) { char c; const char *token, *token_e; if (!*s) return NULL; *s = skip_ws(*s); if (!**s) return NULL; /* return NULL if we come to end of line */ token = *s; while ((c = **s) != '\0' && !isspace_c(c) && (**s != '(') && (**s != ')') && (**s != ',') ) { (*s)++; if (**s == '{') { char *tmpstr = gettok_char(s, '}', FALSE, TRUE); tfree(tmpstr); } } token_e = *s; *s = skip_ws(*s); return copy_substring(token, token_e); } char * gettok_instance(char **s) { char c; const char *token, *token_e; if (!*s) return NULL; *s = skip_ws(*s); if (!**s) return NULL; /* return NULL if we come to end of line */ token = *s; while ((c = **s) != '\0' && !isspace_c(c) && (**s != '(') && (**s != ')') ) { (*s)++; } token_e = *s; /* Now iterate up to next non-whitespace char */ *s = skip_ws(*s); return copy_substring(token, token_e); } /* get the next token starting at next non white space, stopping at p. If inc_p is true, then including p, else excluding p. Return NULL if p is not found. If '}', ']' or ')' and nested is true, find corresponding p. */ char * gettok_char(char **s, char p, bool inc_p, bool nested) { char c; const char *token, *token_e; if (!*s) return NULL; *s = skip_ws(*s); if (!**s) return NULL; /* return NULL if we come to end of line */ token = *s; if (nested && ((p == '}') || (p == ')') || (p == ']'))) { char q; int count = 0; /* find opening bracket */ if (p == '}') q = '{'; else if (p == ']') q = '['; else q = '('; /* add string in front of q, excluding q */ while ((c = **s) != '\0' && (**s != q)) (*s)++; /* return if nested bracket found, excluding its character */ while ((c = **s) != '\0') { if (c == q) count++; else if (c == p) count--; if (count == 0) break; (*s)++; } } else /* just look for p and return string, excluding p */ while ((c = **s) != '\0' && (**s != p)) (*s)++; if (c == '\0') /* p not found */ return NULL; if (inc_p) /* add p */ (*s)++; token_e = *s; /* Now iterate up to next non-whitespace char */ *s = skip_ws(*s); return copy_substring(token, token_e); } /*-------------------------------------------------------------------------* * gettok_node was added by SDB on 12.3.2003 * It acts like gettok, except that it treats parens and commas like * whitespace (i.e. it ignores them). Use it when parsing through netnames * (node names) since they may be grouped using ( , ). *-------------------------------------------------------------------------*/ char * gettok_node(char **s) { char c; const char *token, *token_e; if (*s == NULL) return NULL; while (isspace_c(**s) || (**s == '(') || (**s == ')') || (**s == ',') ) (*s)++; /* iterate over whitespace and ( , ) */ if (!**s) return NULL; /* return NULL if we come to end of line */ token = *s; while ((c = **s) != '\0' && !isspace_c(c) && (**s != '(') && (**s != ')') && (**s != ',') ) /* collect chars until whitespace or ( , ) */ (*s)++; token_e = *s; /* Now iterate up to next non-whitespace char */ while (isspace_c(**s) || (**s == '(') || (**s == ')') || (**s == ',') ) (*s)++; /* iterate over whitespace and ( , ) */ return copy_substring(token, token_e); } /*-------------------------------------------------------------------------* * get_l_paren iterates the pointer forward in a string until it hits * the position after the next left paren "(". It returns 0 if it found a left * paren, 1 if no left paren is found, -1 if left paren is the last character. * It is called from 'translate' (subckt.c). *-------------------------------------------------------------------------*/ int get_l_paren(char **s) { while (**s && (**s != '(')) (*s)++; if (!**s) return 1; (*s)++; if (**s == '\0') return -1; return 0; } /*-------------------------------------------------------------------------* * get_r_paren iterates the pointer forward in a string until it hits * the position after the next right paren ")". It returns 0 if it found a right * paren, 1 if no right paren is found, and -1 if right paren is te last * character. It is called from 'translate' (subckt.c). *-------------------------------------------------------------------------*/ int get_r_paren(char **s) { while (**s && (**s != ')')) (*s)++; if (!**s) return 1; (*s)++; if (**s == '\0') return -1; return 0; } /*-------------------------------------------------------------------------* * this function strips all white space inside parens * is needed in gettoks (dotcards.c) for correct processing of expressions * like " .plot v( 5 , 4 ) v( 6 )" -> .plot v(5,4) v(6)" *-------------------------------------------------------------------------*/ char * stripWhiteSpacesInsideParens(const char *str) { str = skip_ws(str); /* Skip leading whitespace */ const size_t n_char_str = strlen(str); /* Allocate buffer for string being built */ char * const str_out = TMALLOC(char, n_char_str + 1); char *p_dst = str_out; /* location in str_out */ char ch; /* current char */ /* Process input string until its end */ for ( ; ; ) { /* Add char. If at end of input string, return the string * that was built */ if ((*p_dst++ = (ch = *str++)) == '\0') { return str_out; } /* If the char is a ')' add all non-whitespace until ')' or, * if the string is malformed, until '\0' */ if (ch == '(') { for ( ; ; ) { /* If at end of input string, the closing ') was missing. * The caller will need to resolve this issue. */ if ((ch = *str++) == '\0') { *p_dst = '\0'; return str_out; } if (isspace((int) ch)) { /* skip whitespace */ continue; } /* Not whitespace, so add next character */ *p_dst++ = ch; /* If the char that was added was ')', done */ if (ch == ')') { break; } } /* end of loop processing () */ } /* end of case of '(' found */ } /* end of loop over chars in input string */ } /* end of function stripWhiteSpacesInsideParens */ bool isquote(char ch) { return ch == '\'' || ch == '"'; } bool is_arith_char(char c) { return c != '\0' && strchr("+-*/()<>?:|&^!%\\", c); } bool str_has_arith_char(char *s) { for (; *s; s++) if (is_arith_char(*s)) return TRUE; return FALSE; } int get_comma_separated_values(char *values[], char *str) { int count = 0; char *comma_ptr; while ((comma_ptr = strchr(str, ',')) != NULL) { char *ptr = skip_back_ws(comma_ptr, str); values[count++] = copy_substring(str, ptr); str = skip_ws(comma_ptr + 1); } values[count++] = copy(str); return count; } /* check if the given token matches a model name either exact then return 1 or modulo a trailing model binning extension '\.[0-9]+' then return 2 */ int model_name_match(const char *token, const char *model_name) { const char *p; size_t token_len = strlen(token); if (strncmp(token, model_name, token_len) != 0) return 0; p = model_name + token_len; // exact match if (*p == '\0') return 1; // check for . if (*p++ != '.') return 0; // minimum one trailing char if (*p == '\0') return 0; // all of them digits for (; *p; p++) if (!isdigit_c(*p)) return 0; return 2; } /* end of funtion model_name_match */ /* This function returns 1 if pattern is a substring anywhere in str and * 0 otherwise. A null pattern is considered a mismatch. * * Uses Karp-Rabin substring matching with base=256 and modulus=1009 */ int substring_n(size_t n_char_pattern, const char *p_pattern, size_t n_char_string, const char *p_string) { /* Test for a pattern to match */ if (n_char_pattern == 0) { return 0; } /* Test for a string of sufficient length */ if (n_char_pattern > n_char_string) { return 0; } /* Factor for rolling hash computation */ const size_t msb_factor = get_kr_msb_factor(n_char_pattern); const size_t h_pattern = kr_hash(n_char_pattern, p_pattern); size_t h_string = kr_hash(n_char_pattern, p_string); /* Compare at beginning. If hashes match, do full compare */ if (h_pattern == h_string && memcmp(p_pattern, p_string, n_char_pattern) == 0) { return 1; /* match at start */ } /* Compare at each possible starting point in the string */ const char *p_last = p_string + (n_char_string - n_char_pattern - 1); return next_substr(n_char_pattern, p_pattern, &p_string, p_last, msb_factor, h_pattern, &h_string) == (char *) NULL ? 0 : 1; } /* end of function substring_n */ /* This function initializes a scan for substring matches */ void substring_match_init(size_t n_char_pattern, const char *p_pattern, size_t n_char_string, const char *p_string, bool f_overlap, struct substring_match_info *p_scan_state) { /* Save input info into structure. Note that the strings are not * copied, so they must remain allocated and unaltered while the * search is in progress. */ p_scan_state->n_char_pattern = n_char_pattern; p_scan_state->p_pattern = p_pattern; p_scan_state->n_char_string = n_char_string; p_scan_state->p_string = p_string; /*** Calculate intermediate data ***/ /* Test for a pattern to match */ if (n_char_pattern == 0) { p_scan_state->f_done = TRUE; } /* Test for a string of sufficient length */ else if (n_char_pattern > n_char_string) { p_scan_state->f_done = TRUE; } else { p_scan_state->f_done = FALSE; /* Look for overlaps only if possible */ p_scan_state->f_overlap= f_overlap ? !can_overlap(n_char_pattern, p_pattern) : FALSE; p_scan_state->n_char_pattern_1 = n_char_pattern - 1; p_scan_state->msb_factor = get_kr_msb_factor(n_char_pattern); p_scan_state->h_pattern = kr_hash(n_char_pattern, p_pattern); p_scan_state->h_string = kr_hash(n_char_pattern, p_string); p_scan_state->p_last = p_string + (n_char_string - n_char_pattern - 1); } return; } /* end of function substring_match_init */ /* This function finds the next substring match * * Parameter * p_scan_state: Address of struct substring_match_info initialized by * substring_match_init() * * Return value * NULL if there is no match or the address of the next match otherwise */ char *substring_match_next(struct substring_match_info *p_scan_state) { /* First test if there are no more possible matches */ if (p_scan_state->f_done) { return (char *) NULL; } /* Find next match, if any */ const char * const p_match = next_substr( p_scan_state->n_char_pattern, p_scan_state->p_pattern, &p_scan_state->p_string, p_scan_state->p_last, p_scan_state->msb_factor,p_scan_state->h_pattern, &p_scan_state->h_string); /* Update done status if changed */ if (p_match == (char *) NULL) { p_scan_state->f_done = TRUE; } else { if (!p_scan_state->f_overlap) { p_scan_state->p_string += p_scan_state->n_char_pattern_1; /* end of match */ p_scan_state->h_string = p_scan_state->h_pattern; } } return (char *) p_match; /* Return result */ } /* end of function substring_match_next */ #ifdef COMPILE_UNUSED_FUNCTIONS /* This funtion returns the locations of optionally non-overlapping substring * matches. For example, in the string aaaaa, aa is found in non-overlapping * locations at 0-based offsets 0 and 2 ahd with overlapping allowed atr * offsets 0, 1, 2, and 3 */ size_t get_substring_matches(size_t n_char_pattern, const char *p_pattern, size_t n_char_string, const char *p_string, size_t n_elem_buf, char *p_match_buf, bool f_overlap) { /* Test for a pattern to match */ if (n_char_pattern == 0) { return 0; } /* Test for a string of sufficient length */ if (n_char_pattern > n_char_string) { return 0; } /* Handle 0-sized buffer */ if (n_elem_buf == 0) { return 0; } /* Factor for rolling hash computation */ const size_t msb_factor = get_kr_msb_factor(n_char_pattern); const size_t h_pattern = kr_hash(n_char_pattern, p_pattern); size_t h_string = kr_hash(n_char_pattern, p_string); /* Compare at beginning. If hashes match, do full compare */ if (h_pattern == h_string && memcmp(p_pattern, p_string, n_char_pattern) == 0) { return 1; /* match at start */ } /* Compare at each possible starting point in the string */ const char *p_last = p_string + (n_char_string - n_char_pattern - 1); const size_t n_char_pattern_1 = n_char_pattern - 1; char **pp_match_buf_cur = &p_match_buf; char * const * const pp_match_buf_end = pp_match_buf_cur + n_elem_buf; /* Look for overlaps only if possible */ f_overlap = f_overlap ? !can_overlap(n_char_pattern, p_pattern) : FALSE; for ( ; pp_match_buf_cur < pp_match_buf_end; pp_match_buf_cur++) { const char *p_match = next_substr(n_char_pattern, p_pattern, &p_string, p_last, msb_factor, h_pattern, &h_string); if (p_match == (char *) NULL) { /* if no match, done */ return (int) (pp_match_buf_cur - &p_match_buf); } /* Save result */ *pp_match_buf_cur = (char *) p_match; /* If overlapping is not allowed, contniue search after the match. * Note that in this case, the string hash is the pattern hash. */ if (!f_overlap) { p_string += n_char_pattern_1; /* end of match */ h_string = h_pattern; } } /* end of loop over string */ return n_elem_buf; /* full buffer */ } /* end of funtion get_substring_matches */ #endif /* COMPILE_UNUSED_FUNCTIONS */ /* This function determines if a pattern can allow overlapping matches. * For example, the pattern "starts" would have overlapped matches in the * string "startstarts". * * Remarks * While not directly related to this function, there is only a binary yes/no * interest regarding overlap rather than an offset into the the string where * such overlap may occur. That is because the hash value is being computed * incremetally, so the only time when there is substantial computational * savings in this approach is when the hash value is known, as it would be * at the end of a match (since the hash of the pattern is knonw.) */ static bool can_overlap(size_t n_char_pattern, const char * const p_pattern) { if (n_char_pattern < 2) { /* does not matter */ return TRUE; } /* Find the last occurrance of the first character */ const char * const p_end = p_pattern + n_char_pattern; const char *p_cur = p_end - 1; const char ch_first = *p_pattern; for ( ; p_cur > p_pattern; --p_cur) { if (*p_cur == ch_first) { break; } } /* end of loop finding the first char */ /* Test for no duplicate */ if (p_cur == p_pattern) { /* not found */ return FALSE; /* no duplicate so cannot overlap */ } /* Now must match from this char onward to overlap */ const char *p_src = p_pattern; for ( ; p_cur != p_end; ++p_cur, ++p_src) { if (*p_cur != *p_src) { /* comparing 'b' to 'd' in "abcad" * for example */ return FALSE; /* Mismatch, so not an overlap */ } } /* end of loop finding the first char */ return TRUE; /* Matched to end of word */ } /* end of function can_overlap */ /* Prime number of Karp-Rabin hashing. Tradeoff between number of hash * collisions and number of times modulus must be taken. */ #define KR_MODULUS 1009 /* Compute (256^(n-1))%KR_MODULUS */ static size_t get_kr_msb_factor(size_t n) { size_t i; size_t factor = 1; const size_t n_itr = n - 1; for (i = 0; i < n_itr; ++i) { size_t factor_new = (factor << 8); if (factor_new < factor) { /* overflow */ factor %= KR_MODULUS; /* take modulus */ factor <<= 8; /* and recompute */ } } /* end of loop building factor */ /* Return the factor after final modulus if necessary */ if (factor >= KR_MODULUS) { factor %= KR_MODULUS; } return factor; } /* end of function get_kr_msb_factor */ /* Compute KR hash assuming n >= 1 */ static size_t kr_hash(size_t n, const char *p) { const char * const p_end = p + n; size_t hash = *(unsigned char *) p; for (p++; p < p_end; p++) { unsigned char ch = *(unsigned char *) p; size_t hash_new = (hash << 8) + ch; if (hash_new < hash) { /* overflow */ hash %= KR_MODULUS; /* take modulus */ hash = (hash << 8) + ch; /* and recompute */ } else { /* no overflow, so no need for modulus yet */ hash = hash_new; } } /* end of loop hasing chars */ /* Do final modulus if necessary */ if (hash >= KR_MODULUS) { hash %= KR_MODULUS; } return hash; } /* end of function kr_hash */ /* This function locates the next substring match. It is intended to be called * as part of the scanning of a string for a substring * * Parameters * n_char_pattern: Length of pattern to find * p_pattern: Pattern to find. Need not be null-terminated * pp_string: Address containing the current location in the string. Updated * if a match is found. * p_last: Address of last possible location of a match * msb_factor: Constant related to hash update * h_pattern: Computed hash of pattern * p_h_string: Address containing the current hash value of the location * in the string being considered. It is updated in the function. * * Return value * NULL if no substring, or the address of the substring if one exists. */ static inline const char *next_substr( size_t n_char_pattern, const char *p_pattern, const char **pp_string, const char * const p_last, const size_t msb_factor, const size_t h_pattern, size_t *p_h_string) { const char *p_string = *pp_string; size_t h_string = *p_h_string; for ( ; ; ) { /* Update hash for next starting point at p_string + 1 */ if ((h_string = (((h_string - (unsigned char) p_string[0] * msb_factor) << 8) + (size_t) p_string[n_char_pattern]) % KR_MODULUS) > KR_MODULUS) { /* negative value when signed */ h_string += KR_MODULUS; } ++p_string; /* step to next starting point */ /* Compare at current starting point. If hashes match, * do full compare */ if (h_pattern == h_string && memcmp(p_pattern, p_string, n_char_pattern) == 0) { *pp_string = p_string; /* Update string location */ *p_h_string = h_string; /* and hash for another call */ return p_string; /* match here */ } /* Exit with no match if at last starting point */ if (p_string == p_last) { return (char *) NULL; /* no match found */ } } /* end of loop over starting points in string */ } /* end of function next_substr */ /* This function returns TRUE if '\0' is among the n characters at p and * FALSE otherwise. */ static inline bool have_null(size_t n, const char *p) { /* Scan backwards to make the common case of using a null termination * of a string for the null char be faster */ const char *p_cur = p + n - 1; for ( ; p_cur >= p; --p_cur) { /* Locate '\0' among the chars */ if (*p_cur == '\0') { /* found */ return TRUE; } } return FALSE; } /* end of function have_null */ /* This function "finds a needle in a haystack" aka the first occurrence of * any character of needle in haystack. NULL is returned if none is found. * haystack must be terminated with '\0'. * * Remarks * p_needle does not need to be null terminated. In fact, a null can be * included among the characters to be located so that this funtion will * locate the end of haystack if none of the other characters is found and * would guarantee that the returned value is not NULL. * * The case of a '\0' included among the chars to locate is treated as a * special case for improved efficiency. * * For a sufficiently large haystack, further gains in performance can be * achieved by analyzing the characteristics of the needle values and * developing comparisons based on bit values or range values. As a * trivial example, for the needle string "01234567", instead of 8 * comparisons for the 8 values, 2 comparisons can be used by comparing * against >= 0 and against <= 7. Without a large enough haystack, the * computational time required for the analysis would not be recovered. */ char *find_first_of(const char *haystack, unsigned int n_needle, const char *p_needle) { /* Hanldle case of nothing to find */ if (n_needle == 0) { return (char *) NULL; } const char * const p_needle_end = p_needle + n_needle; if (have_null(n_needle, p_needle)) { /* searching for '\0' */ for ( ; ; ++haystack) { /* iterate over straws in haystack */ const char straw = *haystack; const char *p_needle_cur = p_needle; for ( ; p_needle_cur != p_needle_end; ++p_needle_cur) { const char needle = *p_needle_cur; if (straw == needle) { /* found needle */ return (char *) haystack; } } /* end of loop over needles */ } /* end of loop over straws in haystack */ } /* end of case that '\0' among items being located */ /* Else '\0' is not among the items being located */ for ( ; ; ++haystack) { /* iterate over straws in haystack */ const char straw = *haystack; const char *p_needle_cur = p_needle; for ( ; p_needle_cur != p_needle_end; ++p_needle_cur) { const char needle = *p_needle_cur; if (straw == needle) { /* found needle */ return (char *) haystack; } } /* end of loop over needles */ if (straw == '\0') { /* entire haystack searched */ return (char *) NULL; } } /* end of loop over straws in haystack */ } /* end of function find_first_of */ /* This function returns TRUE if the string has any of the characters * '"', '\'' or '\\' */ bool has_escape_or_quote(size_t n, const char *str) { const char *str_end = str + n; for ( ; str != str_end; ++str) { const char ch_cur = *str; if (ch_cur == '"' || ch_cur == '\'' || ch_cur == '\\') { return TRUE; } } /* end of loop over chars in string */ return FALSE; } /* end of function may_have_eq */ /* Converts integer to string. Return the result string. Only 10 radix is supported */ char *itoa10(int n, char s[]) { int i, j, sign; char c; if ((sign = n) < 0) /* record sign */ n = -n; /* make n positive */ i = 0; do { /* generate digits in reverse order */ s[i++] = n % 10 + '0'; /* get next digit */ } while ((n /= 10) > 0); /* delete it */ if (sign < 0) s[i++] = '-'; s[i] = '\0'; /* revert string */ for (i = 0, j = (int)strlen(s) - 1; i < j; i++, j--) { c = s[i]; s[i] = s[j]; s[j] = c; } return s; }