/* * * This file is part of the PCRE++ Class Library. * * By accessing this software, PCRE++, you are duly informed * of and agree to be bound by the conditions described below * in this notice: * * This software product, PCRE++, is developed by Thomas Linden * and copyrighted (C) 2002-2003 by Thomas Linden,with all rights * reserved. * * There is no charge for PCRE++ software. You can redistribute * it and/or modify it under the terms of the GNU Lesser General * Public License, which is incorporated by reference herein. * * PCRE++ is distributed WITHOUT ANY WARRANTY, IMPLIED OR EXPRESS, * OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE or that * the use of it will not infringe on any third party's intellec- * tual property rights. * * You should have received a copy of the GNU Lesser General Public * License along with PCRE++. Copies can also be obtained from: * * http://www.gnu.org/licenses/lgpl.txt * * or by writing to: * * Free Software Foundation, Inc. * 59 Temple Place, Suite 330 * Boston, MA 02111-1307 * USA * * Or contact: * * "Thomas Linden" * * */ #ifndef HAVE_PCRE_PP_H #define HAVE_PCRE_PP_H #include #include #include #include #include #include extern "C" { #include #include } namespace pcrepp { #ifdef DEBUG #define __pcredebug cerr << "(pcre++ DEBUG) " << __LINE__ << ": " #else #define __pcredebug if(0) cerr #endif /** additional binary flag in the hope Philip never ever uses the same in the future * @see Pcre(const std::string& expression, unsigned int flags) */ #define PCRE_GLOBAL 0x10000 /** * The Pcre class is a wrapper around the PCRE library. * * The library "pcre++" defines a class named "Pcre" which you * can use to search in strings using reular expressions as * well as getting matched sub strings. It does currently not * support all features, which the underlying PCRE library * provides, but the most important stuff is implemented. * * Please study this example code to learn how to use this class: * @include examples/demo.cc * * Compile your programs which use the prce++ class using the following * command line: * @code * g++ -c yourcode.o `pcre-config --cflags` `pcre++-config --cflags` * g++ yourcode.o `pcre-config --libs` `pcre++-config --libs` -o yourprogram * @endcode * * If you want to learn more about regular expressions which can be used * with pcre++, then please read the following documentation: * perlre - Perl regular expressions * * The pcre library itself does also contain some usefull documentation, * which maybe interesting for you: * PCRE manual page */ class Pcre { private: std::string _expression; /* the given regular expression */ unsigned int _flags; /* the given flags, 0 if not defined */ bool case_t, global_t; /* internal compile flags, used by replace() and split() */ pcre *p_pcre; /* pcre object pointer */ pcre_extra *p_pcre_extra; /* stuff required by pcre lib */ int sub_len; int *sub_vec; int erroffset; char *err_str; std::vector *resultset; /* store substrings, if any */ bool _have_paren; /* indicate wether we have already parentesis applied in replace */ const unsigned char *tables; /* locale tables */ bool did_match; /** true if the expression produced a match */ int num_matches; /** number of matches if std::vector* expected */ /* reset all counters and free objects, prepare for another search */ void reset(); /* compile the pattern */ void Compile(int flags); /* do the actual search, will be called by the public ::search(..) methods */ bool dosearch(const std::string& stuff, int OffSet); /* do the actual split() job, called by the various wrapper split() methods */ std::vector _split(const std::string& piece, int limit, int start_offset, int end_offset); /* replace $1 .. $n with the corresponding substring, used by replace() */ std::string _replace_vars(const std::string& piece); /* init pointers with NULL */ void zero(); std::map info(); std::string info(int what); public: /** * Exception wrapper class. * * All errors which may occur inside the Pcre class will * throw an exception of this type("Pcre::exception"). * * You can catch such exceptions like this: * * @code * try { * .. * } * catch(Pcre::exception &e) { * cout << "Pcre++ error: " << e.what() << endl; * } * @endcode */ class exception : public std::runtime_error { private: std::string translate(int num) { std::string msg; switch(num) { case -1: msg = "PCRE_ERROR_NOMATCH"; break; case -2: msg = "PCRE_ERROR_NULL"; break; case -3: msg = "PCRE_ERROR_BADOPTION"; break; case -4: msg = "PCRE_ERROR_BADMAGIC"; break; case -5: msg = "PCRE_ERROR_UNKNOWN_NODE"; break; case -6: msg = "PCRE_ERROR_NOMEMORY"; break; case -7: msg = "PCRE_ERROR_NOSUBSTRING"; break; // pcre4-HINT: add PCRE_ERROR_MATCHLIMIT support } return msg; } public: exception(const std::string & msg) : runtime_error(msg) { } exception(int num) : runtime_error(translate(num)) { } }; /** Empty Constructor. * Create a new empty Pcre object. This is the simplest * constructor available, you might consider one of the other * constructors as a better solution. * You need to initialize thie Pcre object, if you use the * empty constructor. You can use one of the two available * operator= operators to assign it an expression or a Pcre * copy. * * @return A new empty Pcre object */ Pcre(); /** Constructor. * Compile the given pattern. An Pcre object created this way can * be used multiple times to do searches. * * @param "expression" a string, which must be a valid perl regular expression. * @return A new Pcre object, which holds te compiled pattern. * @see Pcre(const std::string& expression, const std::string& flags) * @see Pcre(const std::string& expression, unsigned int flags) */ Pcre(const std::string& expression); /** Constructor. * Compile the given pattern. An Pcre object created this way can * be used multiple times to do searches. * * @param "expression" a string, which must be a valid perl regular expression. * @param "flags" can be one or more of the following letters: * *- i Search case insensitive. * *- m Match on multiple lines, thus ^ and $ are interpreted * as the start and end of the entire string, not of a * single line. * *- s A dot in an expression matches newlines too(which is * normally not the case). * *- x Whitespace characters will be ignored (except within * character classes or if escaped). * *- g Match multiple times. This flags affects only the behavior of the * replace(const std::string& piece, const std::string& with) method. * * @return A new Pcre object, which holds te compiled pattern. * @see Pcre(const std::string& expression) * @see Pcre(const std::string& expression, unsigned int flags) */ Pcre(const std::string& expression, const std::string& flags); /** Constructor. * Compile the given pattern. An Pcre object created this way can * be used multiple times to do searches. * * @param "expression" a string, which must be a valid perl regular expression. * @param "flags" option bits can be one or more of the following bits: * * - PCRE_ANCHORED anchored pattern. * - PCRE_CASELESS case insensitive search. * - PCRE_DOLLAR_ENDONLY dollar sign matches only at end. * - PCRE_DOTALL newline is contained in . * - PCRE_EXTENDED whitespace characters will be ignored. * - PCRE_EXTRA use perl incompatible pcre extensions. * - PCRE_MULTILINE match on multiple lines. * - PCRE_NO_AUTO_CAPTURE disable the use of numbered capturing parentheses in the pattern. * - PCRE_UNGREEDY qunatifiers behave not greedy by default. * - PCRE_UTF8 use utf8 support. * - PCRE_GLOBAL (PCRE++ internal flag) match multiple times used only in the * replace(const std::string& piece, const std::string& with) method. * * @return A new Pcre object, which holds te compiled pattern. * @see Pcre(const std::string& expression) * @see Pcre(const std::string& expression, const std::string& flags) * @see pcreapi(3) manpage */ Pcre(const std::string& expression, unsigned int flags); /** Copy Constructor * Creates a new Pcre object of an existing one. * @param "P" an existing Pcre object. * @return A new Pcre object, which holds te compiled pattern. * @see Pcre(const std::string& expression) * @see Pcre(const std::string& expression, const std::string& flags) */ Pcre(const Pcre &P); /** Operator =. * @param "expression" a valid regular expression. * @return a new Pcre object. * * Example: * * @code * Pcre regex = "(A+?)"; * @endcode */ const Pcre& operator = (const std::string& expression); /** Operator =. * @param "&P" an Pcre object * @return a new Pcre object * * Example: * * @code * Pcre reg1("^[a-z]+?"); * Pcre reg2; * reg2 = reg1; * @endcode */ const Pcre& operator = (const Pcre &P); /** Destructor. * The desturcor will automatically invoked if the object * is no more used. It frees all the memory allocated by * pcre++. */ ~Pcre(); /** Do a search on the given string. * This method does the actual search on the given string. * @param "stuff" the string in which you want to search for something. * @return boolean true if the regular expression matched. false if not. * @see bool search(const std::string& stuff, int OffSet) */ bool search(const std::string& stuff); /** Do a search on the given string beginning at the given offset. * This method does the actual search on the given string. * @param "stuff" the string in which you want to search for something. * @param "OffSet" the offset where to start the search. * @return boolean true if the regular expression matched. false if not. * @see bool search(const std::string& stuff) */ bool search(const std::string& stuff, int OffSet); /** Return a vector of substrings, if any. * @return a pointer to an std::vector, which may be NULL, if no substrings has been found. * @see std::vector */ std::vector* get_sub_strings() const; /** Get a substring at a known position. * This method throws an out-of-range exception if the given position * is invalid. * @param "pos" the position of the substring to return. Identical to perl's $1..$n. * @return the substring at the given position. * * Example: * @code * * std::string mysub = regex.get_match(1); * * @endcode * Get the first substring that matched the expression in the "regex" object. */ std::string get_match(int pos) const; /** Get the start position of a substring within the searched string. * This method returns the character position of the first character of * a substring withing the searched string. * @param "pos" the position of the substring. Identical to perl's $1..$n. * @return the integer character position of the first character of a substring. Positions are starting at 0. * * Example: * @code * * Pcre regex("([0-9]+)"); // search for numerical characters * regex.search("The 11th september."); // do the search on this string * std::string day = regex.get_match(1); // returns "11" * int pos = regex.get_match_start(1); // returns 4, because "11" begins at the * // 4th character inside the search string. * * @endcode * @see int get_match_end(int pos) * @see int get_match_end() * @see int get_match_start() */ int get_match_start(int pos) const; /** Get the end position of a substring within the searched string. * This method returns the character position of the last character of * a substring withing the searched string. * @param "pos" the position of the substring. Identical to perl's $1..$n. * @return the integer character position of the last character of a substring. Positions are starting at 0. * * Example: * @code * * Pcre regex("([0-9]+)"); // search for numerical characters * regex.search("The 11th september."); // do the search on this string * std::string day = regex.get_match(1); // returns "11" * int pos = regex.get_match_end(1); // returns 5, because "11" ends at the * // 5th character inside the search string. * * @endcode * @see int get_match_start(int pos) * @see int get_match_start() * @see int get_match_end() */ int get_match_end(int pos) const; /** Get the start position of the entire match within the searched string. * This method returns the character position of the first character of * the entire match within the searched string. * @return the integer character position of the first character of the entire match. * * Example: * @code * * Pcre regex("([0-9]+)\s([a-z]+)"); // search for the date(makes 2 substrings * regex.search("The 11th september."); // do the search on this string * int pos = regex.get_match_start(); // returns 4, because "11th september" begins at the * // 4th character inside the search string. * * @endcode * @see int get_match_start(int pos) * @see int get_match_end(int pos) * @see int get_match_end() */ int get_match_start() const; /** Get the end position of the entire match within the searched string. * This method returns the character position of the last character of * the entire match within the searched string. * @return the integer character position of the last character of the entire match. * * Example: * @code * * Pcre regex("([0-9]+)\s([a-z]+)"); // search for the date(makes 2 substrings * regex.search("The 11th september."); // do the search on this string * int pos = regex.get_match_end(); // returns 17, because "11th september", which is // the entire match, ends at the * // 17th character inside the search string. * * @endcode * @see int get_match_start() * @see int get_match_start(int pos) * @see int get_match_end(int pos) */ int get_match_end() const; /** Get the length of a substring at a known position. * This method throws an out-of-range exception if the given position * is invalid. * @param "pos" the position of the substring-length to return. Identical to perl's $1..$n. * @return the length substring at the given position. * */ size_t get_match_length(int pos) const; /** Test if a search was successfull. * This method must be invoked after calling search(). * @return boolean true if the search was successfull at all, or false if not. */ bool matched() const { return did_match; }; /** Get the number of substrings generated by pcre++. * @return the number of substrings generated by pcre++. */ int matches() const { return num_matches; } /** split a string into pieces * This method will split the given string into a vector * of strings using the compiled expression (given to the constructor). * * @param "piece" The string you want to split into it's parts. * @return an vector of strings * @see std::vector * @see std::vector split(const std::string& piece, int limit) * @see std::vector split(const std::string& piece, int limit, int start_offset) * @see std::vector split(const std::string& piece, int limit, int start_offset, int end_offset) * @see std::vector split(const std::string& piece, std::vector positions) */ std::vector split(const std::string& piece); /** split a string into pieces * This method will split the given string into a vector * of strings using the compiled expression (given to the constructor). * * @param "piece" The string you want to split into it's parts. * @param "limit" the maximum number of elements you want to get back from split(). * @return an vector of strings * @see std::vector * @see std::vector split(const std::string& piece) * @see std::vector split(const std::string& piece, int limit, int start_offset) * @see std::vector split(const std::string& piece, int limit, int start_offset, int end_offset) * @see std::vector split(const std::string& piece, std::vector positions) */ std::vector split(const std::string& piece, int limit); /** split a string into pieces * This method will split the given string into a vector * of strings using the compiled expression (given to the constructor). * * @param "piece" The string you want to split into it's parts. * @param "limit" the maximum number of elements you want to get back from split(). * @param "start_offset" at which substring the returned vector should start. * @return an vector of strings * @see std::vector * @see std::vector split(const std::string& piece) * @see std::vector split(const std::string& piece, int limit) * @see std::vector split(const std::string& piece, int limit, int start_offset, int end_offset) * @see std::vector split(const std::string& piece, std::vector positions) */ std::vector split(const std::string& piece, int limit, int start_offset); /** split a string into pieces * This method will split the given string into a vector * of strings using the compiled expression (given to the constructor). * * @param "piece" The string you want to split into it's parts. * @param "limit" the maximum number of elements you want to get back from split(). * @param "start_offset" at which substring the returned vector should start. * @param "end_offset" at which substring the returned vector should end. * @return an vector of strings * @see std::vector split(const std::string& piece) * @see std::vector split(const std::string& piece, int limit) * @see std::vector split(const std::string& piece, int limit, int start_offset) * @see std::vector split(const std::string& piece, std::vector positions) * @see std::vector */ std::vector split(const std::string& piece, int limit, int start_offset, int end_offset); /** split a string into pieces * This method will split the given string into a vector * of strings using the compiled expression (given to the constructor). * * @param "piece" The string you want to split into it's parts. * @param "positions" a std::vector of positions, which the returned vector should contain. * @return an vector of strings * @see std::vector split(const std::string& piece) * @see std::vector split(const std::string& piece, int limit) * @see std::vector split(const std::string& piece, int limit, int start_offset) * @see std::vector split(const std::string& piece, int limit, int start_offset) * @see std::vector */ std::vector split(const std::string& piece, std::vector positions); /** Replace parts of a string using regular expressions. * This method is the counterpart of the perl s/// operator. * It replaces the substrings which matched the given regular expression * (given to the constructor) with the supplied string. * * @param "piece" the string in which you want to search and replace. * @param "with" the string which you want to place on the positions which match the expression (given to the constructor). */ std::string replace(const std::string& piece, const std::string& with); /** Return pointer to underlying pcre object. * The pcre object allows you to access the pcre API directly. * E.g. if your are using pcre version 4.x and want to use the * new functionality which is currently not supported by pcre++. * An example would be: pcre_fullinfo(), pcre_study() or the * callout functionality. * * @return "pcre*" pointer to pcre object. * @see man pcre * @see pcre_extra* get_pcre_extra() */ pcre* get_pcre(); /** Return pointer to underlying pcre_extra structure. * The returned pcre_extra structure can be used in conjunction * with the pcre* object returned by pcre(). * * @return "pcre_extra*" pointer to pcre_extra structure. * @see pcre* get_pcre() */ pcre_extra* get_pcre_extra(); /** Analyze pattern for speeding up the matching process. * When a pattern is going to be used several times, it is worth spending more * time analyzing it in order to speed up the time taken for matching. * * An excpetion will be thrown if analyzing the pattern failed. */ void study(); /** Sets locale for all character operations * Returns false if locale can't be set. Otherwise returns true * @param "locale" locale alias name you want to use. * @return true if setting locale were successful * * @see locale(1) */ bool setlocale(const char* locale); /** Return substring of a match at a known possition using the array notation. * This method throws an out-of-range exception if the given position * is invalid. * @param "index" the position of the substring to return. Identical to perl's $1..$n. * @return the substring at the given position. * * Example: * @code * * std::string mysub = regex[1]; * * @endcode * Get the first substring that matched the expression in the "regex" object. * * @see std::string get_match(int pos) */ std::string operator[](int index) { return get_match(index); } }; } // end namespace pcre #endif // HAVE_PCRE_PP_H