klayout/src/tl/tl/tlString.cc

2044 lines
41 KiB
C++

/*
KLayout Layout Viewer
Copyright (C) 2006-2024 Matthias Koefferlein
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <cstdlib>
#include <cstdio>
#include <cmath>
#include <cstring>
#include <cctype>
#include <limits>
#include <vector>
#include <cwctype>
#include <algorithm>
#include "tlString.h"
#include "tlExpression.h"
#include "tlInternational.h"
namespace tl
{
static std::locale c_locale ("C");
// -------------------------------------------------------------------------
// Exception classes
ExtractorNotImplementedException::ExtractorNotImplementedException (const std::type_info &ti)
: Exception (tl::to_string (tr ("No string extractor available for type: ")) + ti.name ())
{
// .. nothing yet ..
}
StringConversionException::StringConversionException (const std::type_info &ti)
: Exception (tl::to_string (tr ("No string conversion available for type: ")) + ti.name ())
{
// .. nothing yet ..
}
// -------------------------------------------------------------------------
// lower and upper case for wchar_t and uint32_t
#include "utf_casefolding.h"
wchar_t wdowncase (wchar_t c)
{
int ch = c >> 8;
if (ch >= 0 && ch < int (sizeof (uc_tab) / sizeof (uc_tab[0])) && uc_tab[ch]) {
return uc_tab[ch][c & 0xff];
} else {
return c;
}
}
wchar_t wupcase (wchar_t c)
{
int ch = c >> 8;
if (ch >= 0 && ch < int (sizeof (lc_tab) / sizeof (lc_tab[0])) && lc_tab[ch]) {
return lc_tab[ch][c & 0xff];
} else {
return c;
}
}
uint32_t utf32_downcase (uint32_t c32)
{
if (sizeof (wchar_t) == 2 && c32 >= 0x10000) {
return c32;
} else {
return uint32_t (wdowncase (wchar_t (c32)));
}
}
uint32_t utf32_upcase (uint32_t c32)
{
if (sizeof (wchar_t) == 2 && c32 >= 0x10000) {
return c32;
} else {
return uint32_t (wupcase (wchar_t (c32)));
}
}
// -------------------------------------------------------------------------
// Conversion of UTF8 to wchar_t
uint32_t utf32_from_utf8 (const char *&cp, const char *cpe)
{
uint32_t c32 = (unsigned char) *cp++;
if (c32 >= 0xf0 && ((cpe && cp + 2 < cpe) || (! cpe && cp [0] && cp [1] && cp [2]))) {
c32 = ((c32 & 0x7) << 18) | ((uint32_t (cp [0]) & 0x3f) << 12) | ((uint32_t (cp [1]) & 0x3f) << 6) | (uint32_t (cp [2]) & 0x3f);
cp += 3;
} else if (c32 >= 0xe0 && ((cpe && cp + 1 < cpe) || (! cpe && cp [0] && cp [1]))) {
c32 = ((c32 & 0xf) << 12) | ((uint32_t (cp [0]) & 0x3f) << 6) | (uint32_t (cp [1]) & 0x3f);
cp += 2;
} else if (c32 >= 0xc0 && ((cpe && cp < cpe) || (! cpe && cp [0]))) {
c32 = ((c32 & 0x1f) << 6) | (uint32_t (*cp) & 0x3f);
++cp;
}
return c32;
}
std::wstring to_wstring (const std::string &s)
{
std::wstring ws;
const char *cpe = s.c_str () + s.size ();
for (const char *cp = s.c_str (); cp < cpe; ) {
uint32_t c32 = utf32_from_utf8 (cp, cpe);
if (sizeof (wchar_t) == 2 && c32 >= 0x10000) {
c32 -= 0x10000;
ws += wchar_t (0xd800 + (c32 >> 10));
ws += wchar_t (0xdc00 + (c32 & 0x3ff));
} else {
ws += wchar_t (c32);
}
}
return ws;
}
std::string to_string (const std::wstring &ws)
{
std::string s;
for (std::wstring::const_iterator c = ws.begin (); c != ws.end (); ++c) {
uint32_t c32 = *c;
if (sizeof (wchar_t) == 2 && c32 >= 0xd800 && c + 1 < ws.end ()) {
++c;
c32 = (c32 & 0x3ff) << 10;
c32 |= uint32_t (*c) & 0x3ff;
c32 += 0x10000;
}
if (c32 >= 0x10000) {
s.push_back (0xf0 | ((c32 >> 18) & 0x7));
s.push_back (0x80 | ((c32 >> 12) & 0x3f));
s.push_back (0x80 | ((c32 >> 6) & 0x3f));
s.push_back (0x80 | (c32 & 0x3f));
} else if (c32 >= 0x800) {
s.push_back (0xe0 | ((c32 >> 12) & 0xf));
s.push_back (0x80 | ((c32 >> 6) & 0x3f));
s.push_back (0x80 | (c32 & 0x3f));
} else if (c32 >= 0x80) {
s.push_back (0xc0 | ((c32 >> 6) & 0x1f));
s.push_back (0x80 | (c32 & 0x3f));
} else {
s.push_back (char (c32));
}
}
return s;
}
// -------------------------------------------------------------------------
// safe versions (assertion-less) of safe_isdigit, safe_isprint, safe_isalpha, safe_isalnum
// (required for debug mode of MSVC)
inline bool safe_isdigit (char c)
{
return c != 0 && static_cast<unsigned char> (c) < 0x80 && isdigit (c);
}
inline bool safe_isalnum (char c)
{
return c != 0 && static_cast<unsigned char> (c) < 0x80 && isalnum (c);
}
inline bool safe_isalpha (char c)
{
return c != 0 && static_cast<unsigned char> (c) < 0x80 && isalpha (c);
}
inline bool safe_isprint (char c)
{
return c != 0 && static_cast<unsigned char> (c) < 0x80 && isprint (c);
}
inline bool safe_isspace (char c)
{
return c != 0 && static_cast<unsigned char> (c) < 0x80 && isspace (c);
}
// -------------------------------------------------------------------------
// Utility: skip a newline
bool skip_newline (const char *&cp)
{
if (*cp == '\012' || *cp == '\015') {
if (*cp == '\015' && cp[1] == '\012') {
++cp;
}
++cp;
return true;
} else {
return false;
}
}
// -------------------------------------------------------------------------
// Utility: case-insensitive compare of the first characters
static bool local_compare (const char *s1, const char *s2)
{
while (*s1 && *s2) {
uint32_t c1 = utf32_downcase (utf32_from_utf8 (s1));
uint32_t c2 = utf32_downcase (utf32_from_utf8 (s2));
if (c1 != c2) {
return false;
}
}
return true;
}
// -------------------------------------------------------------------------
// Utility: a strtod version that is independent of the locale
static std::string inf_string = "inf";
static std::string ninf_string = "-inf";
static std::string nan_string = "nan";
static std::string micron_format ("%.5f");
static std::string dbu_format ("%.2f");
void set_micron_resolution (unsigned int ndigits)
{
micron_format = "%." + tl::to_string (ndigits) + "f";
}
void set_db_resolution (unsigned int ndigits)
{
dbu_format = "%." + tl::to_string (ndigits) + "f";
}
std::string micron_to_string (double d)
{
if (std::isnan (d)) {
return nan_string;
} else if (std::isinf (d)) {
return d < 0 ? ninf_string : inf_string;
} else {
return tl::sprintf (micron_format.c_str (), d);
}
}
std::string db_to_string (double d)
{
if (std::isnan (d)) {
return nan_string;
} else if (std::isinf (d)) {
return d < 0 ? ninf_string : inf_string;
} else {
return tl::sprintf (dbu_format.c_str (), d);
}
}
std::string to_upper_case (const std::string &s)
{
std::wstring ws = to_wstring (s);
for (std::wstring::iterator c = ws.begin (); c != ws.end (); ++c) {
*c = wupcase (*c);
}
return to_string (ws);
}
std::string to_lower_case (const std::string &s)
{
std::wstring ws = to_wstring (s);
for (std::wstring::iterator c = ws.begin (); c != ws.end (); ++c) {
*c = wdowncase (*c);
}
return to_string (ws);
}
std::string to_local (const std::string &s)
{
std::unique_ptr<char []> buffer (new char [MB_CUR_MAX]); // MB_CUR_MAX isn't a constant
std::string ls;
std::wstring ws = to_wstring (s);
for (std::wstring::const_iterator c = ws.begin (); c != ws.end (); ++c) {
int length = wctomb (buffer.get (), *c);
for (int i = 0; i < length; ++i) {
ls += buffer.get ()[i];
}
}
return ls;
}
std::string to_string_from_local (const char *cp)
{
mbstate_t state;
memset ((void *) &state, 0, sizeof (mbstate_t));
std::wstring ws;
size_t max = strlen (cp);
while (max > 0) {
wchar_t wc;
// NOTE: mbrtowc uses the current LOCALE, hence "local"
int length = int (mbrtowc (&wc, cp, max, &state));
if (length < 1) {
break;
}
ws += wc;
cp += length;
max -= length;
}
return to_string (ws);
}
// -------------------------------------------------------------------------
// Utility: a strtod version that is independent of the locale
static double local_strtod (const char *cp, const char *&cp_new)
{
const char *cp0 = cp;
// special numerical values
if (local_compare (cp, nan_string.c_str ())) {
cp_new = cp + nan_string.size ();
return NAN;
} else if (local_compare (cp, inf_string.c_str ())) {
cp_new = cp + inf_string.size ();
return INFINITY;
} else if (local_compare (cp, ninf_string.c_str ())) {
cp_new = cp + ninf_string.size ();
return -INFINITY;
}
// Extract sign
double s = 1.0;
if (*cp == '-') {
s = -1.0;
++cp;
} else if (*cp == '+') {
++cp;
}
// Extract upper digits
int exponent = 0;
double mant = 0.0;
while (safe_isdigit (*cp)) {
mant = mant * 10.0 + double (*cp - '0');
++cp;
}
// Extract lower digits
if (*cp == '.') {
++cp;
while (safe_isdigit (*cp)) {
mant = mant * 10.0 + double (*cp - '0');
++cp;
--exponent;
}
}
// Extract exponent (unless we're at the beginning)
if (cp != cp0 && (*cp == 'e' || *cp == 'E')) {
++cp;
bool epos = true;
if (*cp == '-') {
epos = false;
++cp;
} else if (*cp == '+') {
++cp;
}
int en = 0;
while (safe_isdigit (*cp)) {
en = en * 10 + int (*cp - '0');
++cp;
}
if (! epos) {
en = -en;
}
exponent += en;
}
cp_new = cp;
return s * mant * pow(10.0, exponent);
}
// -------------------------------------------------------------------------
// Implementation
std::string
to_string (double d, int prec)
{
if (std::isnan (d)) {
return nan_string;
} else if (std::isinf (d)) {
return d < 0 ? ninf_string : inf_string;
}
// For small values less than 1e-(prec) simply return "0" to avoid ugly values like "1.2321716e-14".
if (fabs (d) < pow (10.0, -prec)) {
return "0";
}
std::ostringstream os;
os.imbue (c_locale);
os.precision (prec);
os.setf (std::ios_base::fmtflags (0), std::ios::basefield);
os.setf (std::ios_base::fmtflags (0), std::ios::floatfield);
os << d;
return os.str ();
}
std::string
to_string (float d, int prec)
{
if (std::isnan (d)) {
return nan_string;
} else if (std::isinf (d)) {
return d < 0 ? ninf_string : inf_string;
}
// For small values less than 1e-(prec) simply return "0" to avoid ugly values like "1.2321716e-14".
if (fabs (d) < pow (10.0, -prec)) {
return "0";
}
std::ostringstream os;
os.imbue (c_locale);
os.precision (prec);
os.setf (std::ios_base::fmtflags (0), std::ios::basefield);
os.setf (std::ios_base::fmtflags (0), std::ios::floatfield);
os << d;
return os.str ();
}
template <>
std::string
to_string (const int &d)
{
std::ostringstream os;
os.imbue (c_locale);
os << d;
return os.str ();
}
template <>
std::string
to_string (const unsigned int &d)
{
std::ostringstream os;
os.imbue (c_locale);
os << d;
return os.str ();
}
template <>
std::string
to_string (const long &d)
{
std::ostringstream os;
os.imbue (c_locale);
os << d;
return os.str ();
}
template <>
std::string
to_string (const long long &d)
{
std::ostringstream os;
os.imbue (c_locale);
os << d;
return os.str ();
}
template <>
std::string
to_string (const unsigned long &d)
{
std::ostringstream os;
os.imbue (c_locale);
os << d;
return os.str ();
}
template <>
std::string
to_string (const unsigned long long &d)
{
std::ostringstream os;
os.imbue (c_locale);
os << d;
return os.str ();
}
#if defined(HAVE_64BIT_COORD)
template <>
std::string
to_string (const __int128 &d)
{
if (d < 0 ) {
return "-" + tl::to_string(static_cast<unsigned __int128> (-d));
} else {
return tl::to_string(static_cast<unsigned __int128> (d));
}
}
template <>
std::string
to_string (const unsigned __int128 &_x)
{
std::string r;
unsigned __int128 x = _x;
// this is the max. power of 10 that can be represented with __int128
unsigned __int128 m = (unsigned long long) 0x4b3b4ca85a86c47a;
m <<= 64;
m |= (unsigned long long) 0x98a224000000000;
if (x == 0) {
return "0";
}
bool first = true;
while (m > 1) {
int d = 0;
while (x >= m) {
d += 1;
x -= m;
}
if (d > 0 || !first) {
r += char ('0' + d);
first = false;
}
m /= 10;
}
r += char('0' + int(x));
return r;
}
#endif
template <>
std::string
to_string (char * const &cp)
{
return std::string (cp);
}
template <>
std::string
to_string (const char * const &cp)
{
return std::string (cp);
}
template <>
std::string
to_string (unsigned char * const &cp)
{
return std::string ((const char *) cp);
}
template <>
std::string
to_string (const unsigned char * const &cp)
{
return std::string ((const char *) cp);
}
std::string
to_string (const char *cp, int length)
{
return std::string (cp, length);
}
std::string
to_string (const unsigned char *cp, int length)
{
return std::string ((const char *) cp, length);
}
template <>
std::string
to_string (const bool &b)
{
return b ? "true" : "false";
}
int
edit_distance (const std::string &a, const std::string &b)
{
std::vector<int> row0, row1;
row0.resize (a.size () + 1, 0);
row1.resize (a.size () + 1, 0);
for (int i = 0; i <= int (a.size ()); ++i) {
row0[i] = i;
}
for (int i = 0; i < int (b.size ()); ++i) {
row1[0] = i + 1;
for (int j = 0; j < int (a.size ()); ++j) {
int cost = (b[i] == a[j] ? 0 : 1);
row1[j + 1] = std::min (row0[j] + cost, std::min (row0[j + 1], row1[j]) + 1);
}
row0.swap (row1);
}
return row0 [a.size ()];
}
std::string
to_quoted_string (const std::string &s)
{
std::string r;
r.reserve (s.size () + 2);
r += '\'';
for (const char *c = s.c_str (); *c; ++c) {
if (*c == '\'' || *c == '\\') {
r += '\\';
r += *c;
} else if (*c == '\n') {
r += "\\n";
} else if (*c == '\r') {
r += "\\r";
} else if (*c == '\t') {
r += "\\t";
} else if (! safe_isprint (*c) || (unsigned char) *c >= 0x80) {
char b [20];
::sprintf (b, "\\%03o", int ((unsigned char) *c));
r += b;
} else {
r += *c;
}
}
r += '\'';
return r;
}
std::string
escape_string (const std::string &s)
{
std::string r;
for (const char *c = s.c_str (); *c; ++c) {
if (*c == '\\') {
r += '\\';
r += *c;
} else if (*c == '\n') {
r += "\\n";
} else if (*c == '\r') {
r += "\\r";
} else if (*c == '\t') {
r += "\\t";
} else if (! safe_isprint (*c)) {
char b [20];
::sprintf (b, "\\%03o", int ((unsigned char) *c));
r += b;
} else {
r += *c;
}
}
return r;
}
inline char unescape_char (const char * &cp)
{
if (safe_isdigit (*cp)) {
int c = 0;
while (*cp && safe_isdigit (*cp)) {
c = c * 8 + int (*cp - '0');
++cp;
}
--cp;
return char (c);
} else if (*cp == 'r') {
return '\r';
} else if (*cp == 'n') {
return '\n';
} else if (*cp == 't') {
return '\t';
} else {
return *cp;
}
}
std::string
unescape_string (const std::string &value)
{
std::string r;
for (const char *cp = value.c_str (); *cp; ++cp) {
if (*cp == '\\' && cp[1]) {
++cp;
r += unescape_char (cp);
} else {
r += *cp;
}
}
return r;
}
std::string
to_word_or_quoted_string (const std::string &s, const char *non_term)
{
// If the string does not contain non_term characters, we may simply keep it.
// Otherwise we need to quote it.
const char *cp = s.c_str ();
if (*cp && (safe_isalpha (*cp) || strchr (non_term, *cp) != NULL)) {
++cp;
for ( ; *cp && (safe_isalnum (*cp) || strchr (non_term, *cp) != NULL); ++cp) {
;
}
}
if (*cp || s.empty ()) {
return to_quoted_string (s);
} else {
return s;
}
}
void
escape_to_html (std::string &out, const std::string &in, bool replace_newlines)
{
for (const char *cp = in.c_str (); *cp; ++cp) {
if (*cp == '<') {
out += "&lt;";
} else if (*cp == '>') {
out += "&gt;";
} else if (*cp == '&') {
out += "&amp;";
} else if (*cp == '\"') {
out += "&quot;";
} else if (replace_newlines && *cp == '\n') {
out += "<br/>";
} else {
out += *cp;
}
}
}
std::string
escaped_to_html (const std::string &in, bool replace_newlines)
{
std::string s;
escape_to_html (s, in, replace_newlines);
return s;
}
std::string
replicate (const std::string &s, unsigned int n)
{
if (n == 0) {
return std::string ();
}
std::string res;
res.reserve (s.size () * n);
while (n > 0) {
res += s;
--n;
}
return res;
}
std::string
pad_string_right (unsigned int columns, const std::string &text)
{
std::string s = text;
s.reserve (columns);
while (s.size () < size_t (columns)) {
s += " ";
}
return s;
}
std::string
pad_string_left (unsigned int columns, const std::string &text)
{
std::string s;
s.reserve (columns);
while (s.size () + text.size () < size_t (columns)) {
s += " ";
}
s += text;
return s;
}
std::string
replaced (const std::string &subject, const std::string &before, const std::string &after)
{
if (before.empty ()) {
return subject;
}
std::string s;
std::string::size_type pos;
std::string::size_type last = 0;
while ((pos = subject.find (before, last)) != std::string::npos) {
if (pos > last) {
s += std::string (subject, last, pos - last);
}
s += after;
last = pos + before.size ();
}
if (last < subject.size ()) {
s += std::string (subject, last, subject.size () - last);
}
return s;
}
void
from_string (const std::string &s, const char * &result)
{
result = s.c_str ();
}
void
from_string (const std::string &s, const unsigned char * &result)
{
result = (unsigned char *) s.c_str ();
}
static void
from_string_numeric (const std::string &s, double &v, bool eval)
{
const char *cp = s.c_str ();
while (safe_isspace (*cp)) {
++cp;
}
if (! *cp) {
throw tl::Exception (tl::to_string (tr ("Got empty string where a real number was expected")));
}
const char *cp_end = cp;
v = local_strtod (cp, cp_end);
while (safe_isspace (*cp_end)) {
++cp_end;
}
if (*cp_end) {
if (eval) {
// try using an expression (using a clean environment disables all global features and leaves
// only some static functions)
v = tl::Eval (0, 0, false).parse (s).execute ().to_double ();
} else {
throw tl::Exception (tl::to_string (tr ("Unexpected text after numeric value: '...")) + cp_end + "'");
}
}
}
template <class T>
static void
convert_string_to_int (const std::string &s, T &v, bool eval)
{
double x;
// HACK: this should be some real string-to-int conversion
tl::from_string_numeric (s, x, eval);
if (x < std::numeric_limits <T>::min ()) {
throw tl::Exception (tl::to_string (tr ("Range underflow: ")) + s);
}
if (x > std::numeric_limits <T>::max ()) {
throw tl::Exception (tl::to_string (tr ("Range overflow: ")) + s);
}
v = T (x);
if (x != v) {
throw tl::Exception (tl::to_string (tr ("Number cannot be represented precisely: ")) + s);
}
}
void
from_string (const std::string &s, double &v)
{
return from_string_numeric (s, v, false);
}
void
from_string (const std::string &s, int &v)
{
convert_string_to_int (s, v, false);
}
void
from_string (const std::string &s, long &v)
{
convert_string_to_int (s, v, false);
}
void
from_string (const std::string &s, long long &v)
{
convert_string_to_int (s, v, false);
}
void
from_string (const std::string &s, unsigned int &v)
{
convert_string_to_int (s, v, false);
}
void
from_string (const std::string &s, unsigned long &v)
{
convert_string_to_int (s, v, false);
}
void
from_string (const std::string &s, unsigned long long &v)
{
convert_string_to_int (s, v, false);
}
void
from_string_ext (const std::string &s, double &v)
{
return from_string_numeric (s, v, true);
}
void
from_string_ext (const std::string &s, int &v)
{
convert_string_to_int (s, v, true);
}
void
from_string_ext (const std::string &s, long &v)
{
convert_string_to_int (s, v, true);
}
void
from_string_ext (const std::string &s, long long &v)
{
convert_string_to_int (s, v, true);
}
void
from_string_ext (const std::string &s, unsigned int &v)
{
convert_string_to_int (s, v, true);
}
void
from_string_ext (const std::string &s, unsigned long &v)
{
convert_string_to_int (s, v, true);
}
void
from_string_ext (const std::string &s, unsigned long long &v)
{
convert_string_to_int (s, v, true);
}
void
from_string (const std::string &s, bool &b)
{
std::string t (tl::trim (s));
if (t == "true") {
b = true;
} else if (t == "false") {
b = false;
} else if (t == "1") {
b = true;
} else if (t == "0") {
b = false;
} else {
throw tl::Exception (tl::to_string (tr ("Invalid boolean value: ")) + s);
}
}
std::vector<std::string>
split (const std::string &t, const std::string &s)
{
std::vector<std::string> r;
size_t p = 0;
for (size_t pp = 0; (pp = t.find (s, p)) != std::string::npos; p = pp + s.size ()) {
r.push_back (std::string (t, p, pp - p));
}
r.push_back (std::string (t, p));
return r;
}
std::string
trim (const std::string &s)
{
const char *cp = s.c_str ();
while (safe_isspace (*cp)) {
++cp;
}
const char *cq = s.c_str () + s.size ();
while (cq > cp && safe_isspace (cq [-1])) {
--cq;
}
return std::string (cp, cq - cp);
}
// -------------------------------------------------------------------
// tl::Extractor implementation
Extractor::Extractor (const char *s)
: m_cp (s)
{
// .. nothing yet ..
}
Extractor::Extractor (const std::string &str)
: m_str (str)
{
m_cp = m_str.c_str ();
}
Extractor &
Extractor::read (unsigned int &value)
{
if (! try_read (value)) {
error (tl::to_string (tr ("Expected an unsigned integer value")));
}
return *this;
}
Extractor &
Extractor::read (unsigned char &value)
{
if (! try_read (value)) {
error (tl::to_string (tr ("Expected an unsigned byte value")));
}
return *this;
}
Extractor &
Extractor::read (unsigned long &value)
{
if (! try_read (value)) {
error (tl::to_string (tr ("Expected an unsigned long integer value")));
}
return *this;
}
Extractor &
Extractor::read (unsigned long long &value)
{
if (! try_read (value)) {
error (tl::to_string (tr ("Expected an unsigned long integer value")));
}
return *this;
}
Extractor &
Extractor::read (double &value)
{
if (! try_read (value)) {
error (tl::to_string (tr ("Expected a real number")));
}
return *this;
}
Extractor &
Extractor::read (float &value)
{
if (! try_read (value)) {
error (tl::to_string (tr ("Expected a real number")));
}
return *this;
}
Extractor &
Extractor::read (int &value)
{
if (! try_read (value)) {
error (tl::to_string (tr ("Expected an integer value")));
}
return *this;
}
Extractor &
Extractor::read (long &value)
{
if (! try_read (value)) {
error (tl::to_string (tr ("Expected a long integer value")));
}
return *this;
}
Extractor &
Extractor::read (long long &value)
{
if (! try_read (value)) {
error (tl::to_string (tr ("Expected a long integer value")));
}
return *this;
}
Extractor &
Extractor::read (bool &value)
{
if (! try_read (value)) {
error (tl::to_string (tr ("Expected a boolean value ('true', 'false')")));
}
return *this;
}
Extractor &
Extractor::read (std::string &value, const char *term)
{
if (! try_read (value, term)) {
error (tl::to_string (tr ("Expected a string")));
}
return *this;
}
Extractor &
Extractor::read_word (std::string &value, const char *non_term)
{
if (! try_read_word (value, non_term)) {
error (tl::to_string (tr ("Expected a word string")));
}
return *this;
}
Extractor &
Extractor::read_name (std::string &value, const char *non_term)
{
if (! try_read_name (value, non_term)) {
error (tl::to_string (tr ("Expected a name string")));
}
return *this;
}
Extractor &
Extractor::read_word_or_quoted (std::string &value, const char *non_term)
{
if (! try_read_word (value, non_term) && ! try_read_quoted (value)) {
error (tl::to_string (tr ("Expected a word or quoted string")));
}
return *this;
}
Extractor &
Extractor::read_quoted (std::string &value)
{
if (! try_read_quoted (value)) {
error (tl::to_string (tr ("Expected a quoted string")));
}
return *this;
}
namespace
{
template <class T> struct overflow_msg_func;
template <> struct overflow_msg_func<long long>
{
std::string operator() () const
{
return tl::to_string (tr ("Range overflow on long long integer"));
}
};
template <> struct overflow_msg_func<unsigned long long>
{
std::string operator() () const
{
return tl::to_string (tr ("Range overflow on unsigned long long integer"));
}
};
template <> struct overflow_msg_func<long>
{
std::string operator() () const
{
return tl::to_string (tr ("Range overflow on long integer"));
}
};
template <> struct overflow_msg_func<unsigned long>
{
std::string operator() () const
{
return tl::to_string (tr ("Range overflow on unsigned long integer"));
}
};
template <> struct overflow_msg_func<int>
{
std::string operator() () const
{
return tl::to_string (tr ("Range overflow on integer"));
}
};
template <> struct overflow_msg_func<unsigned int>
{
std::string operator() () const
{
return tl::to_string (tr ("Range overflow on unsigned integer"));
}
};
template <> struct overflow_msg_func<unsigned char>
{
std::string operator() () const
{
return tl::to_string (tr ("Range overflow on unsigned byte"));
}
};
}
template <class T> bool
Extractor::try_read_signed_int (T &value)
{
if (! *skip ()) {
return false;
}
bool minus = false;
if (*m_cp == '-') {
minus = true;
++m_cp;
} else if (*m_cp == '+') {
++m_cp;
}
if (! safe_isdigit (*m_cp)) {
return false;
}
value = 0;
while (safe_isdigit (*m_cp)) {
if (value > std::numeric_limits<T>::max () / 10) {
throw tl::Exception (overflow_msg_func<T> () ());
}
value *= 10;
if (value > std::numeric_limits<T>::max () - (*m_cp - '0')) {
throw tl::Exception (overflow_msg_func<T> () ());
}
value += (*m_cp - '0');
++m_cp;
}
if (minus) {
value = -value;
}
return true;
}
template <class T> bool
Extractor::try_read_unsigned_int (T &value)
{
if (! *skip ()) {
return false;
}
if (! safe_isdigit (*m_cp)) {
return false;
}
value = 0;
while (safe_isdigit (*m_cp)) {
if (value > std::numeric_limits<T>::max () / 10) {
throw tl::Exception (overflow_msg_func<T> () ());
}
value *= 10;
if (value > std::numeric_limits<T>::max () - (*m_cp - '0')) {
throw tl::Exception (overflow_msg_func<T> () ());
}
value += (*m_cp - '0');
++m_cp;
}
return true;
}
bool
Extractor::try_read (unsigned char &value)
{
return try_read_unsigned_int (value);
}
bool
Extractor::try_read (unsigned int &value)
{
return try_read_unsigned_int (value);
}
bool
Extractor::try_read (unsigned long &value)
{
return try_read_unsigned_int (value);
}
bool
Extractor::try_read (unsigned long long &value)
{
return try_read_unsigned_int (value);
}
bool
Extractor::try_read (int &value)
{
return try_read_signed_int (value);
}
bool
Extractor::try_read (long &value)
{
return try_read_signed_int (value);
}
bool
Extractor::try_read (long long &value)
{
return try_read_signed_int (value);
}
bool
Extractor::try_read (float &value)
{
double d = value;
if (try_read (d)) {
value = d;
return true;
} else {
return false;
}
}
bool
Extractor::try_read (double &value)
{
if (! *skip ()) {
return false;
}
const char *cp_end = m_cp;
value = local_strtod (m_cp, cp_end);
if (cp_end == m_cp) {
return false;
} else {
m_cp = cp_end;
return true;
}
}
bool
Extractor::try_read (bool &value)
{
if (test ("0") || test ("false")) {
value = false;
return true;
}
if (test ("1") || test ("true")) {
value = true;
return true;
}
return false;
}
bool
Extractor::try_read_name (std::string &string, const char *non_term)
{
if (! *skip ()) {
return false;
}
string.clear ();
// first character must not be a digit
if (*m_cp && (safe_isalpha (*m_cp) || strchr (non_term, *m_cp) != NULL)) {
string += *m_cp;
++m_cp;
} else {
return false;
}
while (*m_cp && (safe_isalnum (*m_cp) || strchr (non_term, *m_cp) != NULL)) {
string += *m_cp;
++m_cp;
}
return ! string.empty ();
}
bool
Extractor::try_read_word (std::string &string, const char *non_term)
{
if (! *skip ()) {
return false;
}
string.clear ();
while (*m_cp && (safe_isalnum (*m_cp) || strchr (non_term, *m_cp) != NULL)) {
string += *m_cp;
++m_cp;
}
return ! string.empty ();
}
bool
Extractor::try_read_word_or_quoted (std::string &string, const char *non_term)
{
return try_read_word (string, non_term) || try_read_quoted (string);
}
bool
Extractor::try_read_quoted (std::string &string)
{
char q = *skip ();
if (q != '\'' && q != '\"') {
return false;
}
++m_cp;
string.clear ();
while (*m_cp && *m_cp != q) {
if (*m_cp == '\\' && m_cp[1]) {
++m_cp;
string += unescape_char (m_cp);
} else {
string += *m_cp;
}
++m_cp;
}
if (*m_cp == q) {
++m_cp;
}
return true;
}
bool
Extractor::try_read (std::string &string, const char *term)
{
// if the terminating characters contain line feed for blank, we must not skip over them
if (strchr (term, '\n') || strchr (term, ' ')) {
while (safe_isspace (*m_cp) && strchr (term, *m_cp) == 0) {
++m_cp;
}
if (! *m_cp) {
return false;
}
} else if (! *skip ()) {
return false;
}
bool term_is_space = false;
for (const char *t = term; *t && ! term_is_space; ++t) {
term_is_space = safe_isspace (*t);
}
string.clear ();
while (*m_cp && (term_is_space || ! safe_isspace (*m_cp)) && strchr (term, *m_cp) == NULL) {
string += *m_cp;
++m_cp;
}
return true;
}
Extractor &
Extractor::expect_end ()
{
if (! at_end ()) {
error (tl::to_string (tr ("Expected end of text")));
}
return *this;
}
Extractor &
Extractor::expect_more ()
{
if (at_end ()) {
error (tl::to_string (tr ("Expected more text")));
}
return *this;
}
Extractor &
Extractor::expect (const char *token)
{
if (! test (token)) {
error (tl::sprintf (tl::to_string (tr ("Expected '%s'")).c_str (), token));
}
return *this;
}
bool
Extractor::test (const char *token)
{
skip ();
const char *cp = m_cp;
while (*cp && *token) {
if (*cp != *token) {
return false;
}
++cp;
++token;
}
if (! *token) {
m_cp = cp;
return true;
} else {
return false;
}
}
bool
Extractor::test_without_case (const char *token)
{
skip ();
const char *cp = m_cp;
while (*cp && *token) {
uint32_t c = utf32_downcase (utf32_from_utf8 (cp));
uint32_t ct = utf32_downcase (utf32_from_utf8 (token));
if (c != ct) {
return false;
}
}
if (! *token) {
m_cp = cp;
return true;
} else {
return false;
}
}
const char *
Extractor::skip ()
{
while (safe_isspace (*m_cp)) {
++m_cp;
}
return m_cp;
}
void
Extractor::error (const std::string &msg) const
{
std::string m (msg);
if (! *m_cp) {
m += tl::to_string (tr (", but text ended"));
} else {
m += tl::to_string (tr (" here: "));
const char *cp = m_cp;
for (unsigned int i = 0; i < 10 && *cp; ++i, ++cp) {
m += *cp;
}
if (*cp) {
m += " ..";
}
}
throw tl::Exception (m);
}
// -------------------------------------------------------------------
// string implementation
string::string (const char *c)
{
if (c && *c) {
m_capacity = m_size = strlen (c);
allocator_t alloc;
mp_rep = alloc.allocate (m_capacity + 1);
strcpy (mp_rep, c);
} else {
mp_rep = 0;
m_capacity = m_size = 0;
}
}
string::string (const char *c, size_t from, size_t to)
{
m_capacity = m_size = to - from;
if (m_size > 0) {
allocator_t alloc;
mp_rep = alloc.allocate (m_capacity + 1);
strncpy (mp_rep, c + from, m_size);
mp_rep [m_size] = 0;
} else {
mp_rep = 0;
}
}
string::string (const tl::string &s)
{
m_capacity = m_size = s.size ();
if (m_size > 0) {
allocator_t alloc;
mp_rep = alloc.allocate (m_capacity + 1);
strncpy (mp_rep, s.c_str (), m_size);
mp_rep [m_size] = 0;
} else {
mp_rep = 0;
}
}
string::string (const tl::string &s, size_t from, size_t to)
{
m_capacity = m_size = to - from;
if (m_size > 0) {
allocator_t alloc;
mp_rep = alloc.allocate (m_capacity + 1);
strncpy (mp_rep, s.c_str () + from, m_size);
mp_rep [m_size] = 0;
} else {
mp_rep = 0;
}
}
string::string (const std::string &s)
{
m_capacity = m_size = s.size ();
if (m_size > 0) {
allocator_t alloc;
mp_rep = alloc.allocate (m_capacity + 1);
strncpy (mp_rep, s.c_str (), m_size);
mp_rep [m_size] = 0;
} else {
mp_rep = 0;
}
}
string::string (const std::string &s, size_t from, size_t to)
{
m_capacity = m_size = to - from;
if (m_size > 0) {
allocator_t alloc;
mp_rep = alloc.allocate (m_capacity + 1);
strncpy (mp_rep, s.c_str () + from, m_size);
mp_rep [m_size] = 0;
} else {
mp_rep = 0;
}
}
string::~string ()
{
if (mp_rep) {
allocator_t alloc;
alloc.deallocate (mp_rep, m_capacity + 1);
}
mp_rep = 0;
}
string &
string::operator= (const char *c)
{
if (c && *c) {
assign (c, 0, strlen (c));
} else {
m_size = 0;
if (mp_rep) {
mp_rep [0] = 0;
}
}
return *this;
}
void
string::assign (const char *c, size_t from, size_t to)
{
m_size = to - from;
if (m_size > 0) {
if (m_capacity < m_size) {
allocator_t alloc;
if (mp_rep) {
alloc.deallocate (mp_rep, m_capacity + 1);
}
mp_rep = alloc.allocate (m_size + 1);
m_capacity = m_size;
}
strncpy (mp_rep, c + from, m_size);
mp_rep [m_size] = 0;
} else {
if (mp_rep) {
mp_rep [0] = 0;
}
}
}
string &
string::operator= (const string &s)
{
if (&s != this) {
m_size = s.size ();
if (m_size > 0) {
if (m_capacity < m_size) {
allocator_t alloc;
if (mp_rep) {
alloc.deallocate (mp_rep, m_capacity + 1);
}
mp_rep = alloc.allocate (m_size + 1);
m_capacity = m_size;
}
strncpy (mp_rep, s.mp_rep, m_size);
mp_rep [m_size] = 0;
} else {
if (mp_rep) {
mp_rep [0] = 0;
}
}
}
return *this;
}
void
string::assign (const tl::string &s, size_t from, size_t to)
{
if (&s != this) {
assign (s.c_str (), from, to);
} else if (from != 0 || to != m_size) {
tl::string substr (s, from, to);
swap (substr);
}
}
string &
string::operator= (const std::string &s)
{
assign (s.c_str (), 0, s.size ());
return *this;
}
void
string::assign (const std::string &s, size_t from, size_t to)
{
assign (s.c_str (), from, to);
}
void
string::clear ()
{
if (mp_rep) {
allocator_t alloc;
alloc.deallocate (mp_rep, m_capacity + 1);
mp_rep = 0;
}
m_size = 0;
m_capacity = 0;
}
void
string::reserve (size_t n)
{
if (m_capacity < n) {
allocator_t alloc;
char *nrep = alloc.allocate (n + 1);
strncpy (nrep, mp_rep, m_size);
if (mp_rep) {
alloc.deallocate (mp_rep, m_capacity + 1);
}
mp_rep = nrep;
m_capacity = n;
}
}
bool
string::operator== (const char *c) const
{
return (c[0] == c_str()[0] && strcmp (c, c_str()) == 0);
}
bool
string::operator== (const tl::string &s) const
{
return (c_str()[0] == s.c_str()[0] && strcmp (c_str(), s.c_str()) == 0);
}
bool
string::operator!= (const char *c) const
{
return (c[0] != c_str()[0] || strcmp (c, c_str()) != 0);
}
bool
string::operator!= (const tl::string &s) const
{
return (c_str()[0] != s.c_str()[0] || strcmp (c_str(), s.c_str()) != 0);
}
bool
string::operator< (const char *c) const
{
return strcmp (c_str(), c) < 0;
}
bool
string::operator< (const tl::string &s) const
{
return strcmp (c_str(), s.c_str()) < 0;
}
bool
string::operator<= (const char *c) const
{
return strcmp (c_str(), c) <= 0;
}
bool
string::operator<= (const tl::string &s) const
{
return strcmp (c_str(), s.c_str()) <= 0;
}
bool
string::operator> (const char *c) const
{
return strcmp (c_str(), c) > 0;
}
bool
string::operator> (const tl::string &s) const
{
return strcmp (c_str(), s.c_str()) > 0;
}
bool
string::operator>= (const char *c) const
{
return strcmp (c_str(), c) >= 0;
}
bool
string::operator>= (const tl::string &s) const
{
return strcmp (c_str(), s.c_str()) >= 0;
}
// -------------------------------------------------------------------
// sprintf implementation
#if defined(_STLPORT_VERSION) && _STLPORT_VERSION == 0x521 && defined(_MSC_VER)
/**
* @brief Workaround for STLPort 5.2.1 bug with scientific formatting
* In that version, the scientific formatting produces on digit less precision
* and replaces uses 0 for the last digit.
* To work around that problem we first create one digit too much and delete the
* trailing '0' in front of 'E' or 'e'.
*/
std::string format_sci_stlport_fix (double f, int prec, unsigned int flags)
{
std::ostringstream os;
os.setf (flags);
os.precision (prec + 1);
os << f;
std::string res;
res.reserve (os.str ().size ());
for (const char *cp = os.str ().c_str (); *cp; ++cp) {
if (*cp == '0' && (cp[1] == 'e' || cp[1] == 'E')) {
++cp;
}
res += *cp;
}
return res;
};
#endif
std::string
sprintf (const char *f, const std::vector <tl::Variant> &vv, unsigned int a0)
{
std::ostringstream os;
os.imbue (c_locale);
int def_prec = os.precision();
unsigned int a = a0;
for (const char *cp = f; *cp; ) {
if (*cp == '%' && cp[1] == '%') {
os << '%';
cp += 2;
} else if (*cp == '%') {
++cp;
if (*cp == '-') {
++cp;
os << std::left;
} else {
os << std::right;
}
if (*cp == '0') {
++cp;
os.fill('0');
} else {
os.fill(' ');
}
unsigned int width = 0;
while (safe_isdigit (*cp) && *cp) {
width = (width * 10) + (unsigned int)(*cp - '0');
++cp;
}
os.width(width);
if (*cp == '.') {
++cp;
unsigned int prec = 0;
while (safe_isdigit (*cp) && *cp) {
prec = (prec * 10) + (unsigned int)(*cp - '0');
++cp;
}
os.precision(prec);
} else {
os.precision(def_prec);
}
// allow up to two 'l' for compatibility
if (*cp == 'l') {
++cp;
if (*cp == 'l') {
++cp;
}
}
if (*cp == 'c' || *cp == 'C') {
if (a < vv.size ()) {
os << char (vv [a].to_long ());
}
} else if (*cp == 'x' || *cp == 'X') {
os.setf (std::ios::hex, std::ios::basefield | std::ios::uppercase);
if (*cp == 'X') {
os.setf (std::ios::uppercase);
}
if (a < vv.size ()) {
os << vv [a].to_ulong ();
}
} else if (*cp == 'u' || *cp == 'U') {
os.setf (std::ios_base::fmtflags (0), std::ios::basefield);
if (a < vv.size ()) {
os << vv [a].to_ulong ();
}
} else if (*cp == 'd' || *cp == 'D') {
os.setf (std::ios_base::fmtflags (0), std::ios::basefield);
if (a < vv.size ()) {
os << vv [a].to_long ();
}
} else if (*cp == 's' || *cp == 'S') {
os.setf (std::ios_base::fmtflags (0), std::ios::basefield);
if (a < vv.size ()) {
os << vv [a].to_string ();
}
} else if (*cp == 'g' || *cp == 'G') {
os.setf (std::ios_base::fmtflags (0), std::ios::floatfield | std::ios::basefield | std::ios::uppercase);
if (*cp == 'G') {
os.setf (std::ios::uppercase);
}
if (a < vv.size ()) {
os << vv [a].to_double ();
}
} else if (*cp == 'e' || *cp == 'E') {
os.setf (std::ios::scientific, std::ios::floatfield | std::ios::basefield | std::ios::uppercase);
if (*cp == 'E') {
os.setf (std::ios::uppercase);
}
if (a < vv.size ()) {
#if defined(_STLPORT_VERSION) && _STLPORT_VERSION == 0x521 && defined(_MSC_VER)
os << format_sci_stlport_fix (vv [a].to_double (), os.precision (), os.flags ()).c_str ();
#else
os << vv [a].to_double ();
#endif
}
} else if (*cp == 'f' || *cp == 'F') {
os.setf (std::ios::fixed, std::ios::floatfield | std::ios::basefield);
if (a < vv.size ()) {
os << vv [a].to_double ();
}
}
if (*cp) {
++cp;
}
++a;
} else {
os << *cp;
++cp;
}
}
return os.str ();
}
std::string
sprintf (const std::string &f, const std::vector <tl::Variant> &vv, unsigned int a0)
{
return tl::sprintf (f.c_str (), vv, a0);
}
}