WIP: more powerful glob pattern

This commit is contained in:
Matthias Koefferlein 2019-05-11 02:29:38 +02:00
parent 252b1551dc
commit 72cadf6d5d
3 changed files with 581 additions and 181 deletions

View File

@ -22,234 +22,609 @@
#include "tlGlobPattern.h" #include "tlGlobPattern.h"
#include "tlString.h"
#include <cstring>
namespace tl namespace tl
{ {
static bool // TODO: take from tlString.h
do_match (const char *p, const char *s, bool cs, bool exact, bool hm, std::vector<std::string> *o, std::vector<std::pair<unsigned int, const char *> > &bstart) inline uint32_t utf32_from_utf8 (const char *&cp, const char *cpe = 0)
{ {
while (*p) { uint32_t c32 = (unsigned char) *cp++;
if (c32 >= 0xf0 && ((cpe && cp + 2 < cpe) || (! cpe && cp [0] && cp [1] && cp [2]))) {
c32 = ((c32 & 0x7) << 18) | ((uint32_t (cp [0]) & 0x3f) << 12) | ((uint32_t (cp [1]) & 0x3f) << 6) | (uint32_t (cp [2]) & 0x3f);
cp += 3;
} else if (c32 >= 0xe0 && ((cpe && cp + 1 < cpe) || (! cpe && cp [0] && cp [1]))) {
c32 = ((c32 & 0xf) << 12) | ((uint32_t (cp [0]) & 0x3f) << 6) | (uint32_t (cp [1]) & 0x3f);
cp += 2;
} else if (c32 >= 0xc0 && ((cpe && cp < cpe) || (! cpe && cp [0]))) {
c32 = ((c32 & 0x1f) << 6) | (uint32_t (*cp) & 0x3f);
++cp;
}
if (!exact && *p == '\\') { return c32;
}
++p; class GlobPatternOp
if (!*s || *s != *p) { {
public:
GlobPatternOp () : m_next_owned (false), mp_next (0) { }
virtual ~GlobPatternOp ()
{
if (m_next_owned) {
delete mp_next;
}
mp_next = 0;
}
virtual GlobPatternOp *clone () const
{
GlobPatternOp *op = new GlobPatternOp ();
if (next ()) {
op->set_next (next ()->clone ());
}
return op;
}
virtual bool match (const char *s, std::vector<std::string> *e) const
{
size_t n = e ? e->size () : 0;
if (mp_next && mp_next->match (s, e)) {
return true;
} else if (! mp_next && ! *s) {
return true;
} else if (e) {
e->erase (e->begin () + n, e->end ());
return false;
} else {
return false;
}
}
void set_next (GlobPatternOp *next)
{
m_next_owned = true;
mp_next = next;
}
GlobPatternOp *next ()
{
return mp_next;
}
const GlobPatternOp *next () const
{
return mp_next;
}
void set_tail (GlobPatternOp *op)
{
GlobPatternOp *n = this;
while (n->mp_next) {
n = n->mp_next;
}
n->mp_next = op;
n->m_next_owned = false;
}
private:
bool m_next_owned;
GlobPatternOp *mp_next;
};
class GlobPatternString
: public GlobPatternOp
{
public:
GlobPatternString (const std::string &s, bool cs)
: GlobPatternOp (), m_s (s), m_cs (cs)
{
// .. nothing yet ..
}
virtual GlobPatternOp *clone () const
{
GlobPatternString *op = new GlobPatternString (m_s, m_cs);
op->set_next (next ()->clone ());
return op;
}
virtual bool match (const char *s, std::vector<std::string> *e) const
{
if (! m_cs && strncasecmp (s, m_s.c_str (), m_s.size ()) == 0) {
return GlobPatternOp::match (s + m_s.size (), e);
} else if (m_cs && strncmp (s, m_s.c_str (), m_s.size ()) == 0) {
return GlobPatternOp::match (s + m_s.size (), e);
} else {
return false;
}
}
private:
std::string m_s;
bool m_cs;
};
class GlobPatternPass
: public GlobPatternOp
{
public:
GlobPatternPass ()
: GlobPatternOp ()
{
// .. nothing yet ..
}
virtual GlobPatternOp *clone () const
{
GlobPatternPass *op = new GlobPatternPass ();
if (next ()) {
op->set_next (next ()->clone ());
}
return op;
}
virtual bool match (const char *, std::vector<std::string> *) const
{
return true;
}
};
class GlobPatternAny
: public GlobPatternOp
{
public:
GlobPatternAny (size_t min, size_t max)
: GlobPatternOp (), m_min (min), m_max (max)
{
// .. nothing yet ..
}
virtual GlobPatternOp *clone () const
{
GlobPatternAny *op = new GlobPatternAny (m_min, m_max);
if (next ()) {
op->set_next (next ()->clone ());
}
return op;
}
virtual bool match (const char *s, std::vector<std::string> *e) const
{
size_t i = 0;
while (i <= m_max) {
if (i >= m_min && GlobPatternOp::match (s, e)) {
return true;
} else if (! *s) {
return false; return false;
} }
if (*p) { utf32_from_utf8 (s);
++p; ++i;
} }
++s;
} else if (!exact && *p == '?') { return false;
}
++p; private:
if (! *s) { size_t m_min, m_max;
return false; };
}
++s;
} else if (!exact && *p == '*') { class GlobPatternBranch;
++p; template <class T>
class GlobPatternContinuator
: public GlobPatternOp
{
public:
GlobPatternContinuator (T *br)
: mp_br (br)
{
// .. nothing yet ..
}
// a trailing '*' always matches virtual GlobPatternOp *clone () const { return 0; }
if (!*p) {
virtual bool match (const char *s, std::vector<std::string> *e) const
{
return mp_br->continue_match (s, e);
}
private:
T *mp_br;
};
class GlobPatternBranch
: public GlobPatternOp
{
public:
GlobPatternBranch ()
: GlobPatternOp (), m_cont (this)
{
// .. nothing yet ..
}
~GlobPatternBranch ()
{
for (std::vector<GlobPatternOp *>::const_iterator i = m_choices.begin (); i != m_choices.end (); ++i) {
delete *i;
}
m_choices.clear ();
}
void add_choice (GlobPatternOp *op)
{
op->set_tail (&m_cont);
m_choices.push_back (op);
}
virtual GlobPatternOp *clone () const
{
GlobPatternBranch *br = new GlobPatternBranch ();
if (next ()) {
br->set_next (next ()->clone ());
}
for (std::vector<GlobPatternOp *>::const_iterator i = m_choices.begin (); i != m_choices.end (); ++i) {
br->add_choice ((*i)->clone ());
}
return br;
}
virtual bool match (const char *s, std::vector<std::string> *e) const
{
for (std::vector<GlobPatternOp *>::const_iterator i = m_choices.begin (); i != m_choices.end (); ++i) {
if ((*i)->match (s, e)) {
return true; return true;
} }
}
return false;
}
std::vector<std::pair<unsigned int, const char *> > bs = bstart; virtual bool continue_match (const char *s, std::vector<std::string> *e) const
size_t no = o ? o->size () : 0; {
return GlobPatternOp::match (s, e);
}
while (*s) { private:
if (do_match (p, s, cs, exact, hm, o, bstart)) { std::vector<GlobPatternOp *> m_choices;
return true; GlobPatternContinuator<GlobPatternBranch> m_cont;
} };
bstart = bs;
if (o && o->begin () + no < o->end ()) {
o->erase (o->begin () + no, o->end ());
}
++s;
}
} else if (!exact && *p == '[') { class GlobPatternBracket
: public GlobPatternOp
{
public:
GlobPatternBracket ()
: GlobPatternOp (), mp_inner (0), mp_s0 (0), m_cont (this)
{
// .. nothing yet ..
}
if (! *s) { ~GlobPatternBracket ()
return false; {
} delete mp_inner;
mp_inner = 0;
}
bool negate = false; void set_inner (GlobPatternOp *op)
++p; {
if (*p && *p == '^') { delete mp_inner;
++p; op->set_tail (& m_cont);
negate = true; mp_inner = op;
} }
bool hit = false; virtual GlobPatternOp *clone () const
{
GlobPatternBracket *br = new GlobPatternBracket ();
if (next ()) {
br->set_next (next ()->clone ());
}
if (mp_inner) {
br->set_inner (mp_inner->clone ());
}
return br;
}
while (*p != ']' && *p) { virtual bool match (const char *s, std::vector<std::string> *e) const
{
if (mp_inner) {
mp_s0 = s;
bool res = mp_inner->match (s, e);
mp_s0 = 0;
return res;
}
return false;
}
char c1 = *p; virtual bool continue_match (const char *s, std::vector<std::string> *e) const
if (c1 == '\\') { {
c1 = *++p; if (mp_s0 && e) {
} e->push_back (std::string (mp_s0, 0, s - mp_s0));
if (*p) { }
++p; return GlobPatternOp::match (s, e);
} }
char c2 = c1; private:
if (*p == '-') { GlobPatternOp *mp_inner;
++p; // NOTE: this isn't thread-safe unless GlobPattern objects live in different threads
c2 = *p; mutable const char *mp_s0;
if (c2 == '\\') { GlobPatternContinuator<GlobPatternBracket> m_cont;
c2 = *++p; };
}
if (*p) {
++p;
}
}
if (! hit) { class GlobPatternCharClass
if (cs && *s >= c1 && *s <= c2) { : public GlobPatternOp
hit = true; {
// TODO: implement UTF-8 support public:
} else if (!cs && tolower (*s) >= tolower (c1) && tolower (*s) <= tolower (c2)) { GlobPatternCharClass (bool negate, bool cs)
hit = true; : m_negate (negate), m_cs (cs)
} {
} // .. nothing yet ..
}
} GlobPatternCharClass (const std::vector<std::pair<uint32_t, uint32_t> > &intervals, bool negate, bool cs)
: m_negate (negate), m_cs (cs), m_intervals (intervals)
{
// .. nothing yet ..
}
if (negate == hit) { void add_interval (uint32_t c1, uint32_t c2)
return false; {
} m_intervals.push_back (std::make_pair (c1, c2));
}
++s; virtual GlobPatternOp *clone () const
if (*p) { {
++p; GlobPatternCharClass *op = new GlobPatternCharClass (m_intervals, m_negate, m_cs);
} if (next ()) {
op->set_next (next ()->clone ());
}
return op;
}
} else if (!exact && *p == '{') { virtual bool match (const char *s, std::vector<std::string> *e) const
{
++p; uint32_t c = utf32_from_utf8 (s);
bool hit = false;
const char *s0 = s;
while (*p) {
if (hit) {
while (*p && *p != ',' && *p != '}') {
if (*p == '\\') {
++p;
}
if (*p) {
++p;
}
}
for (std::vector<std::pair<uint32_t, uint32_t> >::const_iterator i = m_intervals.begin (); i != m_intervals.end (); ++i) {
if (c >= i->first && c <= i->second) {
if (m_negate) {
return false;
} else { } else {
return GlobPatternOp::match (s, e);
s = s0;
hit = true;
while (*p && *p != ',' && *p != '}') {
if (*p == '\\') {
++p;
}
if (hit) {
if (! *s) {
hit = false;
} else if (cs && *p != *s) {
hit = false;
// TODO: implement UTF-8 support
} else if (!cs && tolower (*p) != tolower (*s)) {
hit = false;
} else {
++s;
}
}
if (*p) {
++p;
}
}
} }
}
}
if (*p == ',') { if (! m_negate) {
++p; return false;
} else if (*p == '}') { } else {
++p; return GlobPatternOp::match (s, e);
break; }
} }
private:
bool m_negate, m_cs;
std::vector<std::pair<uint32_t, uint32_t> > m_intervals;
};
static
GlobPatternOp *compile (const char *&p, bool exact, bool cs, bool hm, bool for_brace);
void
compile_emit_op (GlobPatternOp *&op_head, GlobPatternOp *&op, GlobPatternOp *no)
{
if (op) {
op->set_next (no);
} else {
op_head = no;
}
op = no;
}
void
compile_emit_string (std::string &str, GlobPatternOp *&op_head, GlobPatternOp *&op, bool cs)
{
if (! str.empty ()) {
compile_emit_op (op_head, op, new GlobPatternString (str, cs));
str.clear ();
}
}
void
compile_emit_char_class (GlobPatternOp *&op_head, GlobPatternOp *&op, const char *&p, bool cs)
{
bool negate = false;
if (*p && *p == '^') {
++p;
negate = true;
}
GlobPatternCharClass *cc = new GlobPatternCharClass (negate, cs);
while (*p != ']' && *p) {
uint32_t c1 = utf32_from_utf8 (p);
if (c1 == '\\') {
c1 = utf32_from_utf8 (p);
}
uint32_t c2 = c1;
if (*p == '-') {
++p;
c2 = utf32_from_utf8 (p);
if (c2 == '\\') {
c2 = utf32_from_utf8 (p);
}
}
cc->add_interval (c1, c2);
}
compile_emit_op (op_head, op, cc);
}
void
compile_emit_alt (GlobPatternOp *&op_head, GlobPatternOp *&op, const char *&p, bool cs)
{
GlobPatternBranch *alt_op = new GlobPatternBranch ();
while (*p) {
GlobPatternOp *alt = compile (p, false, cs, false, true);
if (alt) {
alt_op->add_choice (alt);
}
if (*p == ',') {
++p;
} else if (*p == '}') {
++p;
break;
}
}
compile_emit_op (op_head, op, alt_op);
}
static
GlobPatternOp *compile (const char *&p, bool exact, bool cs, bool hm, bool for_brace)
{
std::string str;
GlobPatternOp *op = 0, *op_head = 0;
while (*p) {
if (exact) {
str += *++p;
} else if (*p == '\\') {
++p;
if (*p) {
str += *++p;
} }
if (! hit) { } else if (*p == '?') {
return false;
}
} else if (!exact && *p == ')') { compile_emit_string (str, op_head, op, cs);
compile_emit_op (op_head, op, new GlobPatternAny (1, 1));
++p; ++p;
if (! bstart.empty ()) { } else if (*p == '*') {
if (o) {
(*o)[bstart.back ().first] = std::string (bstart.back ().second, s - bstart.back ().second);
}
bstart.pop_back ();
}
} else if (!exact && *p == '(') { compile_emit_string (str, op_head, op, cs);
if (p[1]) {
compile_emit_op (op_head, op, new GlobPatternAny (0, std::numeric_limits<size_t>::max ()));
} else {
compile_emit_op (op_head, op, new GlobPatternPass ());
}
++p; ++p;
if (o) {
bstart.push_back (std::make_pair ((unsigned int) o->size (), s)); } else if (*p == '[') {
o->push_back (std::string ());
} compile_emit_string (str, op_head, op, cs);
++p;
compile_emit_char_class (op_head, op, p, cs);
} else if (*p == '{') {
compile_emit_string (str, op_head, op, cs);
++p;
compile_emit_alt (op_head, op, p, cs);
} else if (for_brace && (*p == ',' || *p == '}')) {
break;
} else { } else {
if (cs) { str += *p++;
if (*s != *p) {
return false;
} else {
++s;
++p;
}
} else {
// TODO: implement UTF-8 support
if (tolower (*s) != tolower (*p)) {
return false;
} else {
++s;
++p;
}
}
} }
} }
return (hm || *s == 0); compile_emit_string (str, op_head, op, cs);
if (hm) {
compile_emit_op (op_head, op, new GlobPatternPass ());
}
return op_head;
} }
GlobPattern::GlobPattern () GlobPattern::GlobPattern ()
: m_case_sensitive (true), m_exact (false), m_header_match (false) : m_case_sensitive (true), m_exact (false), m_header_match (false)
{ {
// .. nothing yet .. mp_op = 0;
m_needs_compile = true;
} }
GlobPattern::GlobPattern (const std::string &p) GlobPattern::GlobPattern (const std::string &p)
: m_p (p), m_case_sensitive (true), m_exact (false), m_header_match (false) : m_p (p), m_case_sensitive (true), m_exact (false), m_header_match (false)
{ {
// .. nothing yet .. mp_op = 0;
m_needs_compile = true;
}
GlobPattern::GlobPattern (const GlobPattern &other)
: m_case_sensitive (true), m_exact (false), m_header_match (false)
{
mp_op = 0;
m_needs_compile = true;
operator= (other);
}
GlobPattern &
GlobPattern::operator= (const GlobPattern &other)
{
if (this != &other) {
m_case_sensitive = other.m_case_sensitive;
m_exact = other.m_exact;
m_header_match = other.m_header_match;
m_needs_compile = true;
}
return *this;
}
void
GlobPattern::do_compile ()
{
delete mp_op;
const char *p = m_p.c_str ();
mp_op = compile (p, m_exact, m_case_sensitive, m_header_match, false);
if (! mp_op) {
mp_op = new GlobPatternOp ();
}
m_needs_compile = false;
}
GlobPattern &GlobPattern::operator= (const std::string &p)
{
if (m_p != p) {
m_p = p;
m_needs_compile = true;
}
return *this;
} }
void GlobPattern::set_case_sensitive (bool f) void GlobPattern::set_case_sensitive (bool f)
{ {
m_case_sensitive = f; if (f != m_case_sensitive) {
m_case_sensitive = f;
m_needs_compile = true;
}
} }
bool GlobPattern::case_sensitive () const bool GlobPattern::case_sensitive () const
@ -259,7 +634,10 @@ bool GlobPattern::case_sensitive () const
void GlobPattern::set_exact (bool f) void GlobPattern::set_exact (bool f)
{ {
m_exact = f; if (f != m_exact) {
m_exact = f;
m_needs_compile = true;
}
} }
bool GlobPattern::exact () const bool GlobPattern::exact () const
@ -269,7 +647,10 @@ bool GlobPattern::exact () const
void GlobPattern::set_header_match (bool f) void GlobPattern::set_header_match (bool f)
{ {
m_header_match = f; if (f != m_header_match) {
m_header_match = f;
m_needs_compile = true;
}
} }
bool GlobPattern::header_match () const bool GlobPattern::header_match () const
@ -277,10 +658,19 @@ bool GlobPattern::header_match () const
return m_header_match; return m_header_match;
} }
GlobPatternOp *GlobPattern::op () const
{
if (m_needs_compile) {
GlobPattern *non_const_this = const_cast<GlobPattern *> (this);
non_const_this->do_compile ();
}
return mp_op;
}
bool GlobPattern::match (const char *s) const bool GlobPattern::match (const char *s) const
{ {
std::vector<std::pair<unsigned int, const char *> > bstart; return op ()->match (s, 0);
return do_match (m_p.c_str (), s, m_case_sensitive, m_exact, m_header_match, 0, bstart);
} }
bool GlobPattern::match (const char *s, std::vector<std::string> &e) const bool GlobPattern::match (const char *s, std::vector<std::string> &e) const
@ -288,14 +678,13 @@ bool GlobPattern::match (const char *s, std::vector<std::string> &e) const
if (! e.empty ()) { if (! e.empty ()) {
e.clear (); e.clear ();
} }
std::vector<std::pair<unsigned int, const char *> > bstart;
return do_match (m_p.c_str (), s, m_case_sensitive, m_exact, m_header_match, &e, bstart); return op ()->match (s, &e);
} }
bool GlobPattern::match (const std::string &s) const bool GlobPattern::match (const std::string &s) const
{ {
std::vector<std::pair<unsigned int, const char *> > bstart; return op ()->match (s.c_str (), 0);
return do_match (m_p.c_str (), s.c_str (), m_case_sensitive, m_exact, m_header_match, 0, bstart);
} }
bool GlobPattern::match (const std::string &s, std::vector<std::string> &e) const bool GlobPattern::match (const std::string &s, std::vector<std::string> &e) const
@ -303,8 +692,8 @@ bool GlobPattern::match (const std::string &s, std::vector<std::string> &e) cons
if (! e.empty ()) { if (! e.empty ()) {
e.clear (); e.clear ();
} }
std::vector<std::pair<unsigned int, const char *> > bstart;
return do_match (m_p.c_str (), s.c_str (), m_case_sensitive, m_exact, m_header_match, &e, bstart); return op ()->match (s.c_str (), &e);
} }
} }

View File

@ -32,6 +32,8 @@
namespace tl namespace tl
{ {
class GlobPatternOp;
/** /**
* @brief A class representing a glob pattern * @brief A class representing a glob pattern
*/ */
@ -51,14 +53,20 @@ public:
*/ */
GlobPattern (const std::string &p); GlobPattern (const std::string &p);
/**
* @brief Copy constructor
*/
GlobPattern (const GlobPattern &other);
/**
* @brief Assignment
*/
GlobPattern &operator= (const GlobPattern &other);
/** /**
* @brief Assignment of a string * @brief Assignment of a string
*/ */
GlobPattern &operator= (const std::string &p) GlobPattern &operator= (const std::string &s);
{
m_p = p;
return *this;
}
/** /**
* @brief Sets a value indicating whether to treat the match case sensitive * @brief Sets a value indicating whether to treat the match case sensitive
@ -124,9 +132,14 @@ public:
private: private:
std::string m_p; std::string m_p;
GlobPatternOp *mp_op;
bool m_case_sensitive; bool m_case_sensitive;
bool m_exact; bool m_exact;
bool m_header_match; bool m_header_match;
bool m_needs_compile;
void do_compile ();
GlobPatternOp *op () const;
}; };
} // namespace tl } // namespace tl

View File

@ -386,11 +386,9 @@ main_cont (int &argc, char **argv)
std::vector<std::string> inst_modules = tl::dir_entries (inst_dir, true, false); std::vector<std::string> inst_modules = tl::dir_entries (inst_dir, true, false);
std::sort (inst_modules.begin (), inst_modules.end ()); std::sort (inst_modules.begin (), inst_modules.end ());
tl::GlobPattern pat ("*.ut");
for (std::vector<std::string>::const_iterator im = inst_modules.begin (); im != inst_modules.end (); ++im) { for (std::vector<std::string>::const_iterator im = inst_modules.begin (); im != inst_modules.end (); ++im) {
if (! pat.match (*im)) { if (tl::extension_last (*im) != "ut") {
continue; continue;
} }