// Generated by gmmproc 2.66.3 -- DO NOT MODIFY! #ifndef _GLIBMM_REGEX_H #define _GLIBMM_REGEX_H /* Copyright (C) 2007 The glibmm Development Team * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . */ #include #include #include #include #include #include #ifndef DOXYGEN_SHOULD_SKIP_THIS typedef struct _GRegex GRegex; #endif namespace Glib { /** @addtogroup glibmmEnums glibmm Enums and Flags */ /** * @var RegexCompileFlags REGEX_CASELESS * Letters in the pattern match both upper- and * lowercase letters. This option can be changed within a pattern * by a "(?i)" option setting. * * @var RegexCompileFlags REGEX_MULTILINE * By default, GRegex treats the strings as consisting * of a single line of characters (even if it actually contains * newlines). The "start of line" metacharacter ("^") matches only * at the start of the string, while the "end of line" metacharacter * ("$") matches only at the end of the string, or before a terminating * newline (unless REGEX_DOLLAR_ENDONLY is set). When * REGEX_MULTILINE is set, the "start of line" and "end of line" * constructs match immediately following or immediately before any * newline in the string, respectively, as well as at the very start * and end. This can be changed within a pattern by a "(?m)" option * setting. * * @var RegexCompileFlags REGEX_DOTALL * A dot metacharacter (".") in the pattern matches all * characters, including newlines. Without it, newlines are excluded. * This option can be changed within a pattern by a ("?s") option setting. * * @var RegexCompileFlags REGEX_EXTENDED * Whitespace data characters in the pattern are * totally ignored except when escaped or inside a character class. * Whitespace does not include the VT character (code 11). In addition, * characters between an unescaped "#" outside a character class and * the next newline character, inclusive, are also ignored. This can * be changed within a pattern by a "(?x)" option setting. * * @var RegexCompileFlags REGEX_ANCHORED * The pattern is forced to be "anchored", that is, * it is constrained to match only at the first matching point in the * string that is being searched. This effect can also be achieved by * appropriate constructs in the pattern itself such as the "^" * metacharacter. * * @var RegexCompileFlags REGEX_DOLLAR_ENDONLY * A dollar metacharacter ("$") in the pattern * matches only at the end of the string. Without this option, a * dollar also matches immediately before the final character if * it is a newline (but not before any other newlines). This option * is ignored if REGEX_MULTILINE is set. * * @var RegexCompileFlags REGEX_UNGREEDY * Inverts the "greediness" of the quantifiers so that * they are not greedy by default, but become greedy if followed by "?". * It can also be set by a "(?U)" option setting within the pattern. * * @var RegexCompileFlags REGEX_RAW * Usually strings must be valid UTF-8 strings, using this * flag they are considered as a raw sequence of bytes. * * @var RegexCompileFlags REGEX_NO_AUTO_CAPTURE * Disables the use of numbered capturing * parentheses in the pattern. Any opening parenthesis that is not * followed by "?" behaves as if it were followed by "?:" but named * parentheses can still be used for capturing (and they acquire numbers * in the usual way). * * @var RegexCompileFlags REGEX_OPTIMIZE * Optimize the regular expression. If the pattern will * be used many times, then it may be worth the effort to optimize it * to improve the speed of matches. * * @var RegexCompileFlags REGEX_FIRSTLINE * Limits an unanchored pattern to match before (or at) the * first newline. @newin{2,34} * * @var RegexCompileFlags REGEX_DUPNAMES * Names used to identify capturing subpatterns need not * be unique. This can be helpful for certain types of pattern when it * is known that only one instance of the named subpattern can ever be * matched. * * @var RegexCompileFlags REGEX_NEWLINE_CR * Usually any newline character or character sequence is * recognized. If this option is set, the only recognized newline character * is '\\r'. * * @var RegexCompileFlags REGEX_NEWLINE_LF * Usually any newline character or character sequence is * recognized. If this option is set, the only recognized newline character * is '\\n'. * * @var RegexCompileFlags REGEX_NEWLINE_CRLF * Usually any newline character or character sequence is * recognized. If this option is set, the only recognized newline character * sequence is '\\r\\n'. * * @var RegexCompileFlags REGEX_NEWLINE_ANYCRLF * Usually any newline character or character sequence * is recognized. If this option is set, the only recognized newline character * sequences are '\\r', '\\n', and '\\r\\n'. @newin{2,34} * * @var RegexCompileFlags REGEX_BSR_ANYCRLF * Usually any newline character or character sequence * is recognised. If this option is set, then "\\R" only recognizes the newline * characters '\\r', '\\n' and '\\r\\n'. @newin{2,34} * * @var RegexCompileFlags REGEX_JAVASCRIPT_COMPAT * Changes behaviour so that it is compatible with * JavaScript rather than PCRE. @newin{2,34} * * @enum RegexCompileFlags * * Flags specifying compile-time options. * * @newin{2,14} * * @ingroup glibmmEnums * @par Bitwise operators: * %RegexCompileFlags operator|(RegexCompileFlags, RegexCompileFlags)
* %RegexCompileFlags operator&(RegexCompileFlags, RegexCompileFlags)
* %RegexCompileFlags operator^(RegexCompileFlags, RegexCompileFlags)
* %RegexCompileFlags operator~(RegexCompileFlags)
* %RegexCompileFlags& operator|=(RegexCompileFlags&, RegexCompileFlags)
* %RegexCompileFlags& operator&=(RegexCompileFlags&, RegexCompileFlags)
* %RegexCompileFlags& operator^=(RegexCompileFlags&, RegexCompileFlags)
*/ enum RegexCompileFlags { REGEX_CASELESS = 1 << 0, REGEX_MULTILINE = 1 << 1, REGEX_DOTALL = 1 << 2, REGEX_EXTENDED = 1 << 3, REGEX_ANCHORED = 1 << 4, REGEX_DOLLAR_ENDONLY = 1 << 5, REGEX_UNGREEDY = 1 << 9, REGEX_RAW = 1 << 11, REGEX_NO_AUTO_CAPTURE = 1 << 12, REGEX_OPTIMIZE = 1 << 13, REGEX_FIRSTLINE = 1 << 18, REGEX_DUPNAMES = 1 << 19, REGEX_NEWLINE_CR = 1 << 20, REGEX_NEWLINE_LF = 1 << 21, REGEX_NEWLINE_CRLF = 0x300000, REGEX_NEWLINE_ANYCRLF = 0x500000, REGEX_BSR_ANYCRLF = 1 << 23, REGEX_JAVASCRIPT_COMPAT = 1 << 25 }; /** @ingroup glibmmEnums */ inline RegexCompileFlags operator|(RegexCompileFlags lhs, RegexCompileFlags rhs) { return static_cast(static_cast(lhs) | static_cast(rhs)); } /** @ingroup glibmmEnums */ inline RegexCompileFlags operator&(RegexCompileFlags lhs, RegexCompileFlags rhs) { return static_cast(static_cast(lhs) & static_cast(rhs)); } /** @ingroup glibmmEnums */ inline RegexCompileFlags operator^(RegexCompileFlags lhs, RegexCompileFlags rhs) { return static_cast(static_cast(lhs) ^ static_cast(rhs)); } /** @ingroup glibmmEnums */ inline RegexCompileFlags operator~(RegexCompileFlags flags) { return static_cast(~static_cast(flags)); } /** @ingroup glibmmEnums */ inline RegexCompileFlags& operator|=(RegexCompileFlags& lhs, RegexCompileFlags rhs) { return (lhs = static_cast(static_cast(lhs) | static_cast(rhs))); } /** @ingroup glibmmEnums */ inline RegexCompileFlags& operator&=(RegexCompileFlags& lhs, RegexCompileFlags rhs) { return (lhs = static_cast(static_cast(lhs) & static_cast(rhs))); } /** @ingroup glibmmEnums */ inline RegexCompileFlags& operator^=(RegexCompileFlags& lhs, RegexCompileFlags rhs) { return (lhs = static_cast(static_cast(lhs) ^ static_cast(rhs))); } /** * @var RegexMatchFlags REGEX_MATCH_ANCHORED * The pattern is forced to be "anchored", that is, * it is constrained to match only at the first matching point in the * string that is being searched. This effect can also be achieved by * appropriate constructs in the pattern itself such as the "^" * metacharacter. * * @var RegexMatchFlags REGEX_MATCH_NOTBOL * Specifies that first character of the string is * not the beginning of a line, so the circumflex metacharacter should * not match before it. Setting this without REGEX_MULTILINE (at * compile time) causes circumflex never to match. This option affects * only the behaviour of the circumflex metacharacter, it does not * affect "\\A". * * @var RegexMatchFlags REGEX_MATCH_NOTEOL * Specifies that the end of the subject string is * not the end of a line, so the dollar metacharacter should not match * it nor (except in multiline mode) a newline immediately before it. * Setting this without REGEX_MULTILINE (at compile time) causes * dollar never to match. This option affects only the behaviour of * the dollar metacharacter, it does not affect "\\Z" or "\\z". * * @var RegexMatchFlags REGEX_MATCH_NOTEMPTY * An empty string is not considered to be a valid * match if this option is set. If there are alternatives in the pattern, * they are tried. If all the alternatives match the empty string, the * entire match fails. For example, if the pattern "a?b?" is applied to * a string not beginning with "a" or "b", it matches the empty string * at the start of the string. With this flag set, this match is not * valid, so GRegex searches further into the string for occurrences * of "a" or "b". * * @var RegexMatchFlags REGEX_MATCH_PARTIAL * Turns on the partial matching feature, for more * documentation on partial matching see g_match_info_is_partial_match(). * * @var RegexMatchFlags REGEX_MATCH_NEWLINE_CR * Overrides the newline definition set when * creating a new Regex, setting the '\\r' character as line terminator. * * @var RegexMatchFlags REGEX_MATCH_NEWLINE_LF * Overrides the newline definition set when * creating a new Regex, setting the '\\n' character as line terminator. * * @var RegexMatchFlags REGEX_MATCH_NEWLINE_CRLF * Overrides the newline definition set when * creating a new Regex, setting the '\\r\\n' characters sequence as line terminator. * * @var RegexMatchFlags REGEX_MATCH_NEWLINE_ANY * Overrides the newline definition set when * creating a new Regex, any Unicode newline sequence * is recognised as a newline. These are '\\r', '\\n' and '\\rn', and the * single characters U+000B LINE TABULATION, U+000C FORM FEED (FF), * U+0085 NEXT LINE (NEL), U+2028 LINE SEPARATOR and * U+2029 PARAGRAPH SEPARATOR. * * @var RegexMatchFlags REGEX_MATCH_NEWLINE_ANYCRLF * Overrides the newline definition set when * creating a new Regex; any '\\r', '\\n', or '\\r\\n' character sequence * is recognized as a newline. @newin{2,34} * * @var RegexMatchFlags REGEX_MATCH_BSR_ANYCRLF * Overrides the newline definition for "\\R" set when * creating a new Regex; only '\\r', '\\n', or '\\r\\n' character sequences * are recognized as a newline by "\\R". @newin{2,34} * * @var RegexMatchFlags REGEX_MATCH_BSR_ANY * Overrides the newline definition for "\\R" set when * creating a new Regex; any Unicode newline character or character sequence * are recognized as a newline by "\\R". These are '\\r', '\\n' and '\\rn', and the * single characters U+000B LINE TABULATION, U+000C FORM FEED (FF), * U+0085 NEXT LINE (NEL), U+2028 LINE SEPARATOR and * U+2029 PARAGRAPH SEPARATOR. @newin{2,34} * * @var RegexMatchFlags REGEX_MATCH_PARTIAL_SOFT * An alias for REGEX_MATCH_PARTIAL. @newin{2,34} * * @var RegexMatchFlags REGEX_MATCH_PARTIAL_HARD * Turns on the partial matching feature. In contrast to * to REGEX_MATCH_PARTIAL_SOFT, this stops matching as soon as a partial match * is found, without continuing to search for a possible complete match. See * g_match_info_is_partial_match() for more information. @newin{2,34} * * @var RegexMatchFlags REGEX_MATCH_NOTEMPTY_ATSTART * Like REGEX_MATCH_NOTEMPTY, but only applied to * the start of the matched string. For anchored * patterns this can only happen for pattern containing "\\K". @newin{2,34} * * @enum RegexMatchFlags * * Flags specifying match-time options. * * @newin{2,14} * * @ingroup glibmmEnums * @par Bitwise operators: * %RegexMatchFlags operator|(RegexMatchFlags, RegexMatchFlags)
* %RegexMatchFlags operator&(RegexMatchFlags, RegexMatchFlags)
* %RegexMatchFlags operator^(RegexMatchFlags, RegexMatchFlags)
* %RegexMatchFlags operator~(RegexMatchFlags)
* %RegexMatchFlags& operator|=(RegexMatchFlags&, RegexMatchFlags)
* %RegexMatchFlags& operator&=(RegexMatchFlags&, RegexMatchFlags)
* %RegexMatchFlags& operator^=(RegexMatchFlags&, RegexMatchFlags)
*/ enum RegexMatchFlags { REGEX_MATCH_ANCHORED = 1 << 4, REGEX_MATCH_NOTBOL = 1 << 7, REGEX_MATCH_NOTEOL = 1 << 8, REGEX_MATCH_NOTEMPTY = 1 << 10, REGEX_MATCH_PARTIAL = 1 << 15, REGEX_MATCH_NEWLINE_CR = 1 << 20, REGEX_MATCH_NEWLINE_LF = 1 << 21, REGEX_MATCH_NEWLINE_CRLF = 0x300000, REGEX_MATCH_NEWLINE_ANY = 1 << 22, REGEX_MATCH_NEWLINE_ANYCRLF = 0x500000, REGEX_MATCH_BSR_ANYCRLF = 1 << 23, REGEX_MATCH_BSR_ANY = 1 << 24, REGEX_MATCH_PARTIAL_SOFT = 0x8000, REGEX_MATCH_PARTIAL_HARD = 1 << 27, REGEX_MATCH_NOTEMPTY_ATSTART = 1 << 28 }; /** @ingroup glibmmEnums */ inline RegexMatchFlags operator|(RegexMatchFlags lhs, RegexMatchFlags rhs) { return static_cast(static_cast(lhs) | static_cast(rhs)); } /** @ingroup glibmmEnums */ inline RegexMatchFlags operator&(RegexMatchFlags lhs, RegexMatchFlags rhs) { return static_cast(static_cast(lhs) & static_cast(rhs)); } /** @ingroup glibmmEnums */ inline RegexMatchFlags operator^(RegexMatchFlags lhs, RegexMatchFlags rhs) { return static_cast(static_cast(lhs) ^ static_cast(rhs)); } /** @ingroup glibmmEnums */ inline RegexMatchFlags operator~(RegexMatchFlags flags) { return static_cast(~static_cast(flags)); } /** @ingroup glibmmEnums */ inline RegexMatchFlags& operator|=(RegexMatchFlags& lhs, RegexMatchFlags rhs) { return (lhs = static_cast(static_cast(lhs) | static_cast(rhs))); } /** @ingroup glibmmEnums */ inline RegexMatchFlags& operator&=(RegexMatchFlags& lhs, RegexMatchFlags rhs) { return (lhs = static_cast(static_cast(lhs) & static_cast(rhs))); } /** @ingroup glibmmEnums */ inline RegexMatchFlags& operator^=(RegexMatchFlags& lhs, RegexMatchFlags rhs) { return (lhs = static_cast(static_cast(lhs) ^ static_cast(rhs))); } /** %Exception class for Regex */ class GLIBMM_API RegexError : public Glib::Error { public: /** @var Code COMPILE * Compilation of the regular expression failed. * * @var Code OPTIMIZE * Optimization of the regular expression failed. * * @var Code REPLACE * Replacement failed due to an ill-formed replacement * string. * * @var Code MATCH * The match process failed. * * @var Code INTERNAL * Internal error of the regular expression engine. * @newin{2,16} * * @var Code STRAY_BACKSLASH * "\\\\" at end of pattern. @newin{2,16} * * @var Code MISSING_CONTROL_CHAR * "\\\\c" at end of pattern. @newin{2,16} * * @var Code UNRECOGNIZED_ESCAPE * Unrecognized character follows "\\\\". * @newin{2,16} * * @var Code QUANTIFIERS_OUT_OF_ORDER * Numbers out of order in "{}" * quantifier. @newin{2,16} * * @var Code QUANTIFIER_TOO_BIG * Number too big in "{}" quantifier. * @newin{2,16} * * @var Code UNTERMINATED_CHARACTER_CLASS * Missing terminating "]" for * character class. @newin{2,16} * * @var Code INVALID_ESCAPE_IN_CHARACTER_CLASS * Invalid escape sequence * in character class. @newin{2,16} * * @var Code RANGE_OUT_OF_ORDER * Range out of order in character class. * @newin{2,16} * * @var Code NOTHING_TO_REPEAT * Nothing to repeat. @newin{2,16} * * @var Code UNRECOGNIZED_CHARACTER * Unrecognized character after "(?", * "(?<" or "(?P". @newin{2,16} * * @var Code POSIX_NAMED_CLASS_OUTSIDE_CLASS * POSIX named classes are * supported only within a class. @newin{2,16} * * @var Code UNMATCHED_PARENTHESIS * Missing terminating ")" or ")" * without opening "(". @newin{2,16} * * @var Code INEXISTENT_SUBPATTERN_REFERENCE * Reference to non-existent * subpattern. @newin{2,16} * * @var Code UNTERMINATED_COMMENT * Missing terminating ")" after comment. * @newin{2,16} * * @var Code EXPRESSION_TOO_LARGE * Regular expression too large. * @newin{2,16} * * @var Code MEMORY_ERROR * Failed to get memory. @newin{2,16} * * @var Code VARIABLE_LENGTH_LOOKBEHIND * Lookbehind assertion is not * fixed length. @newin{2,16} * * @var Code MALFORMED_CONDITION * Malformed number or name after "(?(". * @newin{2,16} * * @var Code TOO_MANY_CONDITIONAL_BRANCHES * Conditional group contains * more than two branches. @newin{2,16} * * @var Code ASSERTION_EXPECTED * Assertion expected after "(?(". * @newin{2,16} * * @var Code UNKNOWN_POSIX_CLASS_NAME * Unknown POSIX class name. * @newin{2,16} * * @var Code POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED * POSIX collating * elements are not supported. @newin{2,16} * * @var Code HEX_CODE_TOO_LARGE * Character value in "\\\\x{...}" sequence * is too large. @newin{2,16} * * @var Code INVALID_CONDITION * Invalid condition "(?(0)". @newin{2,16} * * @var Code SINGLE_BYTE_MATCH_IN_LOOKBEHIND * \\\\C not allowed in * lookbehind assertion. @newin{2,16} * * @var Code INFINITE_LOOP * Recursive call could loop indefinitely. * @newin{2,16} * * @var Code MISSING_SUBPATTERN_NAME_TERMINATOR * Missing terminator * in subpattern name. @newin{2,16} * * @var Code DUPLICATE_SUBPATTERN_NAME * Two named subpatterns have * the same name. @newin{2,16} * * @var Code MALFORMED_PROPERTY * Malformed "\\\\P" or "\\\\p" sequence. * @newin{2,16} * * @var Code UNKNOWN_PROPERTY * Unknown property name after "\\\\P" or * "\\\\p". @newin{2,16} * * @var Code SUBPATTERN_NAME_TOO_LONG * Subpattern name is too long * (maximum 32 characters). @newin{2,16} * * @var Code TOO_MANY_SUBPATTERNS * Too many named subpatterns (maximum * 10,000). @newin{2,16} * * @var Code INVALID_OCTAL_VALUE * Octal value is greater than "\\\\377". * @newin{2,16} * * @var Code TOO_MANY_BRANCHES_IN_DEFINE * "DEFINE" group contains more * than one branch. @newin{2,16} * * @var Code DEFINE_REPETION * Repeating a "DEFINE" group is not allowed. * This error is never raised. @newin{2,16} Deprecated: 2.34. * * @var Code INCONSISTENT_NEWLINE_OPTIONS * Inconsistent newline options. * @newin{2,16} * * @var Code MISSING_BACK_REFERENCE * "\\\\g" is not followed by a braced, * angle-bracketed, or quoted name or number, or by a plain number. @newin{2,16} * * @var Code INVALID_RELATIVE_REFERENCE * Relative reference must not be zero. @newin{2,34} * * @var Code BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN * The backtracing * control verb used does not allow an argument. @newin{2,34} * * @var Code UNKNOWN_BACKTRACKING_CONTROL_VERB * Unknown backtracing * control verb. @newin{2,34} * * @var Code NUMBER_TOO_BIG * Number is too big in escape sequence. @newin{2,34} * * @var Code MISSING_SUBPATTERN_NAME * Missing subpattern name. @newin{2,34} * * @var Code MISSING_DIGIT * Missing digit. @newin{2,34} * * @var Code INVALID_DATA_CHARACTER * In JavaScript compatibility mode, * "[" is an invalid data character. @newin{2,34} * * @var Code EXTRA_SUBPATTERN_NAME * Different names for subpatterns of the * same number are not allowed. @newin{2,34} * * @var Code BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED * The backtracing control * verb requires an argument. @newin{2,34} * * @var Code INVALID_CONTROL_CHAR * "\\\\c" must be followed by an ASCII * character. @newin{2,34} * * @var Code MISSING_NAME * "\\\\k" is not followed by a braced, angle-bracketed, or * quoted name. @newin{2,34} * * @var Code NOT_SUPPORTED_IN_CLASS * "\\\\N" is not supported in a class. @newin{2,34} * * @var Code TOO_MANY_FORWARD_REFERENCES * Too many forward references. @newin{2,34} * * @var Code NAME_TOO_LONG * The name is too long in "(*MARK)", "(*PRUNE)", * "(*SKIP)", or "(*THEN)". @newin{2,34} * * @var Code CHARACTER_VALUE_TOO_LARGE * The character value in the \\\\u sequence is * too large. @newin{2,34} * * @enum Code * * %Error codes returned by regular expressions functions. * * @newin{2,14} */ enum Code { COMPILE = 0, OPTIMIZE = 1, REPLACE = 2, MATCH = 3, INTERNAL = 4, STRAY_BACKSLASH = 101, MISSING_CONTROL_CHAR = 102, UNRECOGNIZED_ESCAPE = 103, QUANTIFIERS_OUT_OF_ORDER = 104, QUANTIFIER_TOO_BIG = 105, UNTERMINATED_CHARACTER_CLASS = 106, INVALID_ESCAPE_IN_CHARACTER_CLASS = 107, RANGE_OUT_OF_ORDER = 108, NOTHING_TO_REPEAT = 109, UNRECOGNIZED_CHARACTER = 112, POSIX_NAMED_CLASS_OUTSIDE_CLASS = 113, UNMATCHED_PARENTHESIS = 114, INEXISTENT_SUBPATTERN_REFERENCE = 115, UNTERMINATED_COMMENT = 118, EXPRESSION_TOO_LARGE = 120, MEMORY_ERROR = 121, VARIABLE_LENGTH_LOOKBEHIND = 125, MALFORMED_CONDITION = 126, TOO_MANY_CONDITIONAL_BRANCHES = 127, ASSERTION_EXPECTED = 128, UNKNOWN_POSIX_CLASS_NAME = 130, POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED = 131, HEX_CODE_TOO_LARGE = 134, INVALID_CONDITION = 135, SINGLE_BYTE_MATCH_IN_LOOKBEHIND = 136, INFINITE_LOOP = 140, MISSING_SUBPATTERN_NAME_TERMINATOR = 142, DUPLICATE_SUBPATTERN_NAME = 143, MALFORMED_PROPERTY = 146, UNKNOWN_PROPERTY = 147, SUBPATTERN_NAME_TOO_LONG = 148, TOO_MANY_SUBPATTERNS = 149, INVALID_OCTAL_VALUE = 151, TOO_MANY_BRANCHES_IN_DEFINE = 154, DEFINE_REPETION = 155, INCONSISTENT_NEWLINE_OPTIONS = 156, MISSING_BACK_REFERENCE = 157, INVALID_RELATIVE_REFERENCE = 158, BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN = 159, UNKNOWN_BACKTRACKING_CONTROL_VERB = 160, NUMBER_TOO_BIG = 161, MISSING_SUBPATTERN_NAME = 162, MISSING_DIGIT = 163, INVALID_DATA_CHARACTER = 164, EXTRA_SUBPATTERN_NAME = 165, BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED = 166, INVALID_CONTROL_CHAR = 168, MISSING_NAME = 169, NOT_SUPPORTED_IN_CLASS = 171, TOO_MANY_FORWARD_REFERENCES = 172, NAME_TOO_LONG = 175, CHARACTER_VALUE_TOO_LARGE = 176 }; RegexError(Code error_code, const Glib::ustring& error_message); explicit RegexError(GError* gobject); Code code() const; #ifndef DOXYGEN_SHOULD_SKIP_THIS private: static void throw_func(GError* gobject); friend GLIBMM_API void wrap_init(); // uses throw_func() #endif //DOXYGEN_SHOULD_SKIP_THIS }; class GLIBMM_API MatchInfo; /** Perl-compatible regular expressions - matches strings against regular expressions. * * The Glib::Regex functions implement regular expression pattern matching using * syntax and semantics similar to Perl regular expression. * * Some functions accept a start_position argument, setting it differs from just * passing over a shortened string and setting REGEX_MATCH_NOTBOL in the case * of a pattern that begins with any kind of lookbehind assertion. For example, * consider the pattern "\Biss\B" which finds occurrences of "iss" in the middle * of words. ("\B" matches only if the current position in the subject is not a * word boundary.) When applied to the string "Mississipi" from the fourth byte, * namely "issipi", it does not match, because "\B" is always false at the * start of the subject, which is deemed to be a word boundary. However, if * the entire string is passed , but with start_position set to 4, it finds the * second occurrence of "iss" because it is able to look behind the starting point * to discover that it is preceded by a letter. * * Note that, unless you set the REGEX_RAW flag, all the strings passed to these * functions must be encoded in UTF-8. The lengths and the positions inside the * strings are in bytes and not in characters, so, for instance, * "\xc3\xa0" (i.e. "à") is two bytes long but it is treated as a single * character. If you set REGEX_RAW the strings can be non-valid UTF-8 strings * and a byte is treated as a character, so "\xc3\xa0" is two bytes and * two characters long. * * When matching a pattern, "\n" matches only against a "\n" character in the * string, and "\r" matches only a "\r" character. To match any newline sequence * use "\R". This particular group matches either the two-character sequence * CR + LF ("\r\n"), or one of the single characters LF (linefeed, U+000A, "\n"), * VT (vertical tab, U+000B, "\v"), FF (formfeed, U+000C, "\f"), CR (carriage * return, U+000D, "\r"), NEL (next line, U+0085), LS (line separator, U+2028), * or PS (paragraph separator, U+2029). * * The behaviour of the dot, circumflex, and dollar metacharacters are affected * by newline characters, the default is to recognize any newline character (the * same characters recognized by "\R"). This can be changed with REGEX_NEWLINE_CR, * REGEX_NEWLINE_LF and REGEX_NEWLINE_CRLF compile options, and with * REGEX_MATCH_NEWLINE_ANY, REGEX_MATCH_NEWLINE_CR, REGEX_MATCH_NEWLINE_LF * and REGEX_MATCH_NEWLINE_CRLF match options. These settings are also * relevant when compiling a pattern if REGEX_EXTENDED is set, and an unescaped * "#" outside a character class is encountered. This indicates a comment that * lasts until after the next newline. * * Creating and manipulating the same Glib::Regex class from different threads is * not a problem as Glib::Regex does not modify its internal state between creation and * destruction, on the other hand Glib::MatchInfo is not threadsafe. * * The regular expressions low level functionalities are obtained through the * excellent PCRE library written by Philip Hazel. * * @note When you call a match() or a match_all() method taking a MatchInfo, * use a Glib::ustring that still exists when you later call MatchInfo methods. * If you call match() or match_all() with a std::string or a string literal, * the method will internally use a temporary copy of the string. The copy will * not exist when you call MatchInfo methods. The use of temporary string values * is deprecated in all match() and match_all() methods since glibmm 2.64. * @code * Glib::ustring str1 = "String to search through"; * regex->match(str1, match_info); * // ... * if (match_info.matches()) // str1 must still exist here * @endcode * * @newin{2,14} */ class GLIBMM_API Regex final { public: #ifndef DOXYGEN_SHOULD_SKIP_THIS using CppObjectType = Regex; using BaseObjectType = GRegex; #endif /* DOXYGEN_SHOULD_SKIP_THIS */ /** Increment the reference count for this object. * You should never need to do this manually - use the object via a RefPtr instead. */ void reference() const; /** Decrement the reference count for this object. * You should never need to do this manually - use the object via a RefPtr instead. */ void unreference() const; ///Provides access to the underlying C instance. GRegex* gobj(); ///Provides access to the underlying C instance. const GRegex* gobj() const; ///Provides access to the underlying C instance. The caller is responsible for unrefing it. Use when directly setting fields in structs. GRegex* gobj_copy() const; Regex() = delete; // noncopyable Regex(const Regex&) = delete; Regex& operator=(const Regex&) = delete; protected: // Do not derive this. Glib::Regex can neither be constructed nor deleted. void operator delete(void*, std::size_t); private: public: /// @throws Glib::RegexError static Glib::RefPtr create(const Glib::ustring& pattern, RegexCompileFlags compile_options = static_cast(0), RegexMatchFlags match_options = static_cast(0)); /** Gets the pattern string associated with @a regex, i.e.\ a copy of * the string passed to g_regex_new(). * * @newin{2,14} * * @return The pattern of @a regex. */ Glib::ustring get_pattern() const; /** Returns the number of the highest back reference * in the pattern, or 0 if the pattern does not contain * back references. * * @newin{2,14} * * @return The number of the highest back reference. */ int get_max_backref() const; /** Returns the number of capturing subpatterns in the pattern. * * @newin{2,14} * * @return The number of capturing subpatterns. */ int get_capture_count() const; /** Checks whether the pattern contains explicit CR or LF references. * * @newin{2,34} * * @return true if the pattern contains explicit CR or LF references. */ bool get_has_cr_or_lf() const; /** Gets the number of characters in the longest lookbehind assertion in the * pattern. This information is useful when doing multi-segment matching using * the partial matching facilities. * * @newin{2,38} * * @return The number of characters in the longest lookbehind assertion. */ int get_max_lookbehind() const; /** Retrieves the number of the subexpression named @a name. * * @newin{2,14} * * @param name Name of the subexpression. * @return The number of the subexpression or -1 if @a name * does not exists. */ int get_string_number(const Glib::ustring& name) const; /** Returns the compile options that @a regex was created with. * * Depending on the version of PCRE that is used, this may or may not * include flags set by option expressions such as `(?i)` found at the * top-level within the compiled pattern. * * @newin{2,26} * * @return Flags from RegexCompileFlags. */ RegexCompileFlags get_compile_flags() const; /** Returns the match options that @a regex was created with. * * @newin{2,26} * * @return Flags from RegexMatchFlags. */ RegexMatchFlags get_match_flags() const; static Glib::ustring escape_string(const Glib::ustring& string); /** Scans for a match in @a string for @a pattern. * * This function is equivalent to g_regex_match() but it does not * require to compile the pattern with g_regex_new(), avoiding some * lines of code when you need just to do a match without extracting * substrings, capture counts, and so on. * * If this function is to be called on the same @a pattern more than * once, it's more efficient to compile the pattern once with * g_regex_new() and then use g_regex_match(). * * @newin{2,14} * * @param pattern The regular expression. * @param string The string to scan for matches. * @param compile_options Compile options for the regular expression, or 0. * @param match_options Match options, or 0. * @return true if the string matched, false otherwise. */ static bool match_simple(const Glib::ustring& pattern, const Glib::ustring& string, RegexCompileFlags compile_options = static_cast(0), RegexMatchFlags match_options = static_cast(0)); /** Scans for a match in @a string for the pattern in @a regex. * The @a match_options are combined with the match options specified * when the @a regex structure was created, letting you have more * flexibility in reusing Regex structures. * * Unless REGEX_RAW is specified in the options, @a string must be valid UTF-8. * * A MatchInfo structure, used to get information on the match, * is stored in @a match_info if not nullptr. Note that if @a match_info * is not nullptr then it is created even if the function returns false, * i.e. you must free it regardless if regular expression actually matched. * * To retrieve all the non-overlapping matches of the pattern in * string you can use g_match_info_next(). * * * [C example ellipted] * * @a string is not copied and is used in MatchInfo internally. If * you use any MatchInfo method (except g_match_info_free()) after * freeing or modifying @a string then the behaviour is undefined. * * @newin{2,14} * * @param string The string to scan for matches. * @param match_options Match options. * @param match_info Pointer to location where to store * the MatchInfo, or nullptr if you do not need it. * @return true is the string matched, false otherwise. */ bool match( const Glib::ustring& string, Glib::MatchInfo& match_info, RegexMatchFlags match_options = static_cast(0) ); /// A match() method not requiring a Glib::MatchInfo. bool match(const Glib::ustring& string, RegexMatchFlags match_options = static_cast(0)); /** A match() method with a start position and a Glib::MatchInfo. * @throws Glib::RegexError */ bool match( const Glib::ustring& string, int start_position, Glib::MatchInfo& match_info, RegexMatchFlags match_options = static_cast(0) ); /** Scans for a match in @a string for the pattern in @a regex. * The @a match_options are combined with the match options specified * when the @a regex structure was created, letting you have more * flexibility in reusing Regex structures. * * Setting @a start_position differs from just passing over a shortened * string and setting REGEX_MATCH_NOTBOL in the case of a pattern * that begins with any kind of lookbehind assertion, such as "\\b". * * Unless REGEX_RAW is specified in the options, @a string must be valid UTF-8. * * A MatchInfo structure, used to get information on the match, is * stored in @a match_info if not nullptr. Note that if @a match_info is * not nullptr then it is created even if the function returns false, * i.e. you must free it regardless if regular expression actually * matched. * * @a string is not copied and is used in MatchInfo internally. If * you use any MatchInfo method (except g_match_info_free()) after * freeing or modifying @a string then the behaviour is undefined. * * To retrieve all the non-overlapping matches of the pattern in * string you can use g_match_info_next(). * * * [C example ellipted] * * @newin{2,14} * * @param string The string to scan for matches. * @param string_len The length of @a string, in bytes, or -1 if @a string is nul-terminated. * @param start_position Starting index of the string to match, in bytes. * @param match_options Match options. * @param match_info Pointer to location where to store * the MatchInfo, or nullptr if you do not need it. * @return true is the string matched, false otherwise. * * @throws Glib::RegexError */ bool match( const Glib::ustring& string, gssize string_len, int start_position, Glib::MatchInfo& match_info, RegexMatchFlags match_options = static_cast(0) ); /** A match() method with a start position not requiring a Glib::MatchInfo. * @throws Glib::RegexError */ bool match(const Glib::ustring& string, int start_position, RegexMatchFlags match_options); /** A match() method with a string length and start position not requiring a * Glib::MatchInfo. */ bool match(const Glib::ustring& string, gssize string_len, int start_position, RegexMatchFlags match_options); #if defined(GLIBMM_DISABLE_DEPRECATED) || defined(DOXYGEN_SHOULD_SKIP_THIS) /** Rvalue references, such as temporary values, are deprecated. * This declaration is enabled if the preprocessor constant * GLIBMM_DISABLE_DEPRECATED is defined. */ bool match(Glib::ustring&& string, ...) = delete; #endif // GLIBMM_DISABLE_DEPRECATED || DOXYGEN_SHOULD_SKIP_THIS /** Using the standard algorithm for regular expression matching only * the longest match in the string is retrieved. This function uses * a different algorithm so it can retrieve all the possible matches. * For more documentation see g_regex_match_all_full(). * * A MatchInfo structure, used to get information on the match, is * stored in @a match_info if not nullptr. Note that if @a match_info is * not nullptr then it is created even if the function returns false, * i.e. you must free it regardless if regular expression actually * matched. * * @a string is not copied and is used in MatchInfo internally. If * you use any MatchInfo method (except g_match_info_free()) after * freeing or modifying @a string then the behaviour is undefined. * * @newin{2,14} * * @param string The string to scan for matches. * @param match_options Match options. * @param match_info Pointer to location where to store * the MatchInfo, or nullptr if you do not need it. * @return true is the string matched, false otherwise. */ bool match_all( const Glib::ustring& string, Glib::MatchInfo& match_info, RegexMatchFlags match_options = static_cast(0) ); /// A match_all() method not requiring a Glib::MatchInfo. bool match_all(const Glib::ustring& string, RegexMatchFlags match_options = static_cast(0)); /** A match_all() method with a start positon and a Glib::MatchInfo. * @throws Glib::RegexError */ bool match_all( const Glib::ustring& string, int start_position, Glib::MatchInfo& match_info, RegexMatchFlags match_options = static_cast(0) ); /** Using the standard algorithm for regular expression matching only * the longest match in the @a string is retrieved, it is not possible * to obtain all the available matches. For instance matching * " " against the pattern "<.*>" * you get " ". * * This function uses a different algorithm (called DFA, i.e. deterministic * finite automaton), so it can retrieve all the possible matches, all * starting at the same point in the string. For instance matching * " " against the pattern "<.*>;" * you would obtain three matches: " ", * " " and "". * * The number of matched strings is retrieved using * g_match_info_get_match_count(). To obtain the matched strings and * their position you can use, respectively, g_match_info_fetch() and * g_match_info_fetch_pos(). Note that the strings are returned in * reverse order of length; that is, the longest matching string is * given first. * * Note that the DFA algorithm is slower than the standard one and it * is not able to capture substrings, so backreferences do not work. * * Setting @a start_position differs from just passing over a shortened * string and setting REGEX_MATCH_NOTBOL in the case of a pattern * that begins with any kind of lookbehind assertion, such as "\\b". * * Unless REGEX_RAW is specified in the options, @a string must be valid UTF-8. * * A MatchInfo structure, used to get information on the match, is * stored in @a match_info if not nullptr. Note that if @a match_info is * not nullptr then it is created even if the function returns false, * i.e. you must free it regardless if regular expression actually * matched. * * @a string is not copied and is used in MatchInfo internally. If * you use any MatchInfo method (except g_match_info_free()) after * freeing or modifying @a string then the behaviour is undefined. * * @newin{2,14} * * @param string The string to scan for matches. * @param string_len The length of @a string, in bytes, or -1 if @a string is nul-terminated. * @param start_position Starting index of the string to match, in bytes. * @param match_options Match options. * @param match_info Pointer to location where to store * the MatchInfo, or nullptr if you do not need it. * @return true is the string matched, false otherwise. * * @throws Glib::RegexError */ bool match_all( const Glib::ustring& string, gssize string_len, int start_position, Glib::MatchInfo& match_info, RegexMatchFlags match_options = static_cast(0) ); /** A match_all() method with a start position not requiring a Glib::MatchInfo. * @throws Glib::RegexError */ bool match_all(const Glib::ustring& string, int start_position, RegexMatchFlags match_options); /** A match_all() method with a start position and a string length not * requiring a Glib::MatchInfo. * @throws Glib::RegexError */ bool match_all(const Glib::ustring& string, gssize string_len, int start_position, RegexMatchFlags match_options); #if defined(GLIBMM_DISABLE_DEPRECATED) || defined(DOXYGEN_SHOULD_SKIP_THIS) /** Rvalue references, such as temporary values, are deprecated. * This declaration is enabled if the preprocessor constant * GLIBMM_DISABLE_DEPRECATED is defined. */ bool match_all(Glib::ustring&& string, ...) = delete; #endif // GLIBMM_DISABLE_DEPRECATED || DOXYGEN_SHOULD_SKIP_THIS /** Breaks the string on the pattern, and returns an array of * the tokens. If the pattern contains capturing parentheses, * then the text for each of the substrings will also be returned. * If the pattern does not match anywhere in the string, then the * whole string is returned as the first token. * * This function is equivalent to g_regex_split() but it does * not require to compile the pattern with g_regex_new(), avoiding * some lines of code when you need just to do a split without * extracting substrings, capture counts, and so on. * * If this function is to be called on the same @a pattern more than * once, it's more efficient to compile the pattern once with * g_regex_new() and then use g_regex_split(). * * As a special case, the result of splitting the empty string "" * is an empty vector, not a vector containing a single string. * The reason for this special case is that being able to represent * an empty vector is typically more useful than consistent handling * of empty elements. If you do need to represent empty elements, * you'll need to check for the empty string before calling this * function. * * A pattern that can match empty strings splits @a string into * separate characters wherever it matches the empty string between * characters. For example splitting "ab c" using as a separator * "\\s*", you will get "a", "b" and "c". * * @newin{2,14} * * @param pattern The regular expression. * @param string The string to scan for matches. * @param compile_options Compile options for the regular expression, or 0. * @param match_options Match options, or 0. * @return A nullptr-terminated array of strings. */ static Glib::StringArrayHandle split_simple(const Glib::ustring& pattern, const Glib::ustring& string, RegexCompileFlags compile_options = static_cast(0), RegexMatchFlags match_options = static_cast(0)); /** Breaks the string on the pattern, and returns an array of the tokens. * If the pattern contains capturing parentheses, then the text for each * of the substrings will also be returned. If the pattern does not match * anywhere in the string, then the whole string is returned as the first * token. * * As a special case, the result of splitting the empty string "" is an * empty vector, not a vector containing a single string. The reason for * this special case is that being able to represent an empty vector is * typically more useful than consistent handling of empty elements. If * you do need to represent empty elements, you'll need to check for the * empty string before calling this function. * * A pattern that can match empty strings splits @a string into separate * characters wherever it matches the empty string between characters. * For example splitting "ab c" using as a separator "\\s*", you will get * "a", "b" and "c". * * @newin{2,14} * * @param string The string to split with the pattern. * @param match_options Match time option flags. * @return A nullptr-terminated gchar ** array. */ Glib::StringArrayHandle split(const Glib::ustring& string, RegexMatchFlags match_options = static_cast(0)); /** Breaks the string on the pattern, and returns an array of the tokens. * If the pattern contains capturing parentheses, then the text for each * of the substrings will also be returned. If the pattern does not match * anywhere in the string, then the whole string is returned as the first * token. * * As a special case, the result of splitting the empty string "" is an * empty vector, not a vector containing a single string. The reason for * this special case is that being able to represent an empty vector is * typically more useful than consistent handling of empty elements. If * you do need to represent empty elements, you'll need to check for the * empty string before calling this function. * * A pattern that can match empty strings splits @a string into separate * characters wherever it matches the empty string between characters. * For example splitting "ab c" using as a separator "\\s*", you will get * "a", "b" and "c". * * Setting @a start_position differs from just passing over a shortened * string and setting REGEX_MATCH_NOTBOL in the case of a pattern * that begins with any kind of lookbehind assertion, such as "\\b". * * @newin{2,14} * * @param string The string to split with the pattern. * @param string_len The length of @a string, in bytes, or -1 if @a string is nul-terminated. * @param start_position Starting index of the string to match, in bytes. * @param match_options Match time option flags. * @param max_tokens The maximum number of tokens to split @a string into. * If this is less than 1, the string is split completely. * @return A nullptr-terminated gchar ** array. * * @throws Glib::RegexError */ Glib::StringArrayHandle split(const gchar* string, gssize string_len, int start_position, RegexMatchFlags match_options = static_cast(0), int max_tokens = 0) const; /// @throws Glib::RegexError Glib::StringArrayHandle split(const Glib::ustring& string, int start_position, RegexMatchFlags match_options, int max_tokens) const; /** Replaces all occurrences of the pattern in @a regex with the * replacement text. Backreferences of the form '\\number' or * '\\g' in the replacement text are interpolated by the * number-th captured subexpression of the match, '\\g' refers * to the captured subexpression with the given name. '\\0' refers * to the complete match, but '\\0' followed by a number is the octal * representation of a character. To include a literal '\\' in the * replacement, write '\\\\\\\\'. * * There are also escapes that changes the case of the following text: * * - \\l: Convert to lower case the next character * - \\u: Convert to upper case the next character * - \\L: Convert to lower case till \\E * - \\U: Convert to upper case till \\E * - \\E: End case modification * * If you do not need to use backreferences use g_regex_replace_literal(). * * The @a replacement string must be UTF-8 encoded even if REGEX_RAW was * passed to g_regex_new(). If you want to use not UTF-8 encoded strings * you can use g_regex_replace_literal(). * * Setting @a start_position differs from just passing over a shortened * string and setting REGEX_MATCH_NOTBOL in the case of a pattern that * begins with any kind of lookbehind assertion, such as "\\b". * * @newin{2,14} * * @param string The string to perform matches against. * @param string_len The length of @a string, in bytes, or -1 if @a string is nul-terminated. * @param start_position Starting index of the string to match, in bytes. * @param replacement Text to replace each match with. * @param match_options Options for the match. * @return A newly allocated string containing the replacements. * * @throws Glib::RegexError */ Glib::ustring replace(const gchar* string, gssize string_len, int start_position, const Glib::ustring& replacement, RegexMatchFlags match_options = static_cast(0)); /// @throws Glib::RegexError Glib::ustring replace(const Glib::ustring& string, int start_position, const Glib::ustring& replacement, RegexMatchFlags match_options); /** Replaces all occurrences of the pattern in @a regex with the * replacement text. @a replacement is replaced literally, to * include backreferences use g_regex_replace(). * * Setting @a start_position differs from just passing over a * shortened string and setting REGEX_MATCH_NOTBOL in the * case of a pattern that begins with any kind of lookbehind * assertion, such as "\\b". * * @newin{2,14} * * @param string The string to perform matches against. * @param string_len The length of @a string, in bytes, or -1 if @a string is nul-terminated. * @param start_position Starting index of the string to match, in bytes. * @param replacement Text to replace each match with. * @param match_options Options for the match. * @return A newly allocated string containing the replacements. * * @throws Glib::RegexError */ Glib::ustring replace_literal(const gchar * string, gssize string_len, int start_position, const Glib::ustring& replacement, RegexMatchFlags match_options = static_cast(0)); /// @throws Glib::RegexError Glib::ustring replace_literal(const Glib::ustring& string, int start_position, const Glib::ustring& replacement, RegexMatchFlags match_options); /** Replaces occurrences of the pattern in regex with the output of * @a eval for that occurrence. * * Setting @a start_position differs from just passing over a shortened * string and setting REGEX_MATCH_NOTBOL in the case of a pattern * that begins with any kind of lookbehind assertion, such as "\\b". * * The following example uses g_regex_replace_eval() to replace multiple * strings at once: * * [C example ellipted] * * @newin{2,14} * * @param string String to perform matches against. * @param string_len The length of @a string, in bytes, or -1 if @a string is nul-terminated. * @param start_position Starting index of the string to match, in bytes. * @param match_options Options for the match. * @param eval A function to call for each match. * @param user_data User data to pass to the function. * @return A newly allocated string containing the replacements. * * @throws Glib::RegexError */ Glib::ustring replace_eval(const Glib::ustring& string, gssize string_len, int start_position, RegexMatchFlags match_options, GRegexEvalCallback eval, gpointer user_data); /** Checks whether @a replacement is a valid replacement string * (see g_regex_replace()), i.e.\ that all escape sequences in * it are valid. * * If @a has_references is not nullptr then @a replacement is checked * for pattern references. For instance, replacement text 'foo\\n' * does not contain references and may be evaluated without information * about actual match, but '\\0\\1' (whole match followed by first * subpattern) requires valid MatchInfo object. * * @newin{2,14} * * @param replacement The replacement string. * @param has_references Location to store information about * references in @a replacement or nullptr. * @return Whether @a replacement is a valid replacement string. * * @throws Glib::RegexError */ static bool check_replacement(const Glib::ustring& replacement, gboolean* has_references); }; //TODO: Add C++ iterator like functionality for this class. /** MatchInfo - MatchInfo is used to retrieve information about the regular * expression match which created it. * @newin{2,28} */ class GLIBMM_API MatchInfo { public: #ifndef DOXYGEN_SHOULD_SKIP_THIS using CppObjectType = MatchInfo; using BaseObjectType = GMatchInfo; #endif /* DOXYGEN_SHOULD_SKIP_THIS */ private: public: /// Default constructor. MatchInfo(); /** C object constructor. * @param castitem The C object. * @param take_the_ownership Whether to destroy the C object with the wrapper or * not. */ explicit MatchInfo(GMatchInfo* castitem, bool take_the_ownership = true); //TODO: Rename to take_ownership when we can rename the member variable. MatchInfo(const MatchInfo& other) = delete; MatchInfo& operator=(const MatchInfo& other) = delete; MatchInfo(MatchInfo&& other) noexcept; MatchInfo& operator=(MatchInfo&& other) noexcept; /// Destructor. virtual ~MatchInfo(); /// Provides access to the underlying C object. GMatchInfo* gobj() { return reinterpret_cast(gobject_); } /// Provides access to the underlying C object. const GMatchInfo* gobj() const { return reinterpret_cast(gobject_); } private: friend class Regex; public: /** Returns Regex object used in @a match_info. It belongs to Glib * and must not be freed. Use g_regex_ref() if you need to keep it * after you free @a match_info object. * * @newin{2,14} * * @return Regex object used in @a match_info. */ Glib::RefPtr get_regex(); /** Returns Regex object used in @a match_info. It belongs to Glib * and must not be freed. Use g_regex_ref() if you need to keep it * after you free @a match_info object. * * @newin{2,14} * * @return Regex object used in @a match_info. */ Glib::RefPtr get_regex() const; /** Returns the string searched with @a match_info. This is the * string passed to g_regex_match() or g_regex_replace() so * you may not free it before calling this function. * * @newin{2,14} * * @return The string searched with @a match_info. */ Glib::ustring get_string() const; /** Returns whether the previous match operation succeeded. * * @newin{2,14} * * @return true if the previous match operation succeeded, * false otherwise. */ bool matches() const; /** Scans for the next match using the same parameters of the previous * call to g_regex_match_full() or g_regex_match() that returned * @a match_info. * * The match is done on the string passed to the match function, so you * cannot free it before calling this function. * * @newin{2,14} * * @return true is the string matched, false otherwise. * * @throws Glib::RegexError */ bool next(); /** Retrieves the number of matched substrings (including substring 0, * that is the whole matched text), so 1 is returned if the pattern * has no substrings in it and 0 is returned if the match failed. * * If the last match was obtained using the DFA algorithm, that is * using g_regex_match_all() or g_regex_match_all_full(), the retrieved * count is not that of the number of capturing parentheses but that of * the number of matched substrings. * * @newin{2,14} * * @return Number of matched substrings, or -1 if an error occurred. */ int get_match_count() const; /** Usually if the string passed to g_regex_match*() matches as far as * it goes, but is too short to match the entire pattern, false is * returned. There are circumstances where it might be helpful to * distinguish this case from other cases in which there is no match. * * Consider, for example, an application where a human is required to * type in data for a field with specific formatting requirements. An * example might be a date in the form ddmmmyy, defined by the pattern * "^\\d?\\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\\d\\d$". * If the application sees the user’s keystrokes one by one, and can * check that what has been typed so far is potentially valid, it is * able to raise an error as soon as a mistake is made. * * GRegex supports the concept of partial matching by means of the * REGEX_MATCH_PARTIAL_SOFT and REGEX_MATCH_PARTIAL_HARD flags. * When they are used, the return code for * g_regex_match() or g_regex_match_full() is, as usual, true * for a complete match, false otherwise. But, when these functions * return false, you can check if the match was partial calling * g_match_info_is_partial_match(). * * The difference between REGEX_MATCH_PARTIAL_SOFT and * REGEX_MATCH_PARTIAL_HARD is that when a partial match is encountered * with REGEX_MATCH_PARTIAL_SOFT, matching continues to search for a * possible complete match, while with REGEX_MATCH_PARTIAL_HARD matching * stops at the partial match. * When both REGEX_MATCH_PARTIAL_SOFT and REGEX_MATCH_PARTIAL_HARD * are set, the latter takes precedence. * * There were formerly some restrictions on the pattern for partial matching. * The restrictions no longer apply. * * See pcrepartial(3) for more information on partial matching. * * @newin{2,14} * * @return true if the match was partial, false otherwise. */ bool is_partial_match() const; /** Returns a new string containing the text in @a string_to_expand with * references and escape sequences expanded. References refer to the last * match done with @a string against @a regex and have the same syntax used by * g_regex_replace(). * * The @a string_to_expand must be UTF-8 encoded even if REGEX_RAW was * passed to g_regex_new(). * * The backreferences are extracted from the string passed to the match * function, so you cannot call this function after freeing the string. * * @a match_info may be nullptr in which case @a string_to_expand must not * contain references. For instance "foo\\n" does not refer to an actual * pattern and '\\n' merely will be replaced with \\n character, * while to expand "\\0" (whole match) one needs the result of a match. * Use g_regex_check_replacement() to find out whether @a string_to_expand * contains references. * * @newin{2,14} * * @param string_to_expand The string to expand. * @return The expanded string, or nullptr if an error occurred. * * @throws Glib::RegexError */ Glib::ustring expand_references(const Glib::ustring& string_to_expand); /** Retrieves the text matching the @a match_num'th capturing * parentheses. 0 is the full text of the match, 1 is the first paren * set, 2 the second, and so on. * * If @a match_num is a valid sub pattern but it didn't match anything * (e.g. sub pattern 1, matching "b" against "(a)?b") then an empty * string is returned. * * If the match was obtained using the DFA algorithm, that is using * g_regex_match_all() or g_regex_match_all_full(), the retrieved * string is not that of a set of parentheses but that of a matched * substring. Substrings are matched in reverse order of length, so * 0 is the longest match. * * The string is fetched from the string passed to the match function, * so you cannot call this function after freeing the string. * * @newin{2,14} * * @param match_num Number of the sub expression. * @return The matched substring, or nullptr if an error * occurred. You have to free the string yourself. */ Glib::ustring fetch(int match_num); /** Retrieves the position in bytes of the @a match_num'th capturing * parentheses. 0 is the full text of the match, 1 is the first * paren set, 2 the second, and so on. * * If @a match_num is a valid sub pattern but it didn't match anything * (e.g. sub pattern 1, matching "b" against "(a)?b") then @a start_pos * and @a end_pos are set to -1 and true is returned. * * If the match was obtained using the DFA algorithm, that is using * g_regex_match_all() or g_regex_match_all_full(), the retrieved * position is not that of a set of parentheses but that of a matched * substring. Substrings are matched in reverse order of length, so * 0 is the longest match. * * @newin{2,14} * * @param match_num Number of the sub expression. * @param start_pos Pointer to location where to store * the start position, or nullptr. * @param end_pos Pointer to location where to store * the end position, or nullptr. * @return true if the position was fetched, false otherwise. If * the position cannot be fetched, @a start_pos and @a end_pos are left * unchanged. */ bool fetch_pos(int match_num, int& start_pos, int& end_pos); /** Retrieves the text matching the capturing parentheses named @a name. * * If @a name is a valid sub pattern name but it didn't match anything * (e.g. sub pattern "X", matching "b" against "(?Pa)?b") * then an empty string is returned. * * The string is fetched from the string passed to the match function, * so you cannot call this function after freeing the string. * * @newin{2,14} * * @param name Name of the subexpression. * @return The matched substring, or nullptr if an error * occurred. You have to free the string yourself. */ Glib::ustring fetch_named(const Glib::ustring& name); /** Retrieves the position in bytes of the capturing parentheses named @a name. * * If @a name is a valid sub pattern name but it didn't match anything * (e.g. sub pattern "X", matching "b" against "(?Pa)?b") * then @a start_pos and @a end_pos are set to -1 and true is returned. * * @newin{2,14} * * @param name Name of the subexpression. * @param start_pos Pointer to location where to store * the start position, or nullptr. * @param end_pos Pointer to location where to store * the end position, or nullptr. * @return true if the position was fetched, false otherwise. * If the position cannot be fetched, @a start_pos and @a end_pos * are left unchanged. */ bool fetch_named_pos(const Glib::ustring& name, int& start_pos, int& end_pos); /** Bundles up pointers to each of the matching substrings from a match * and stores them in an array of gchar pointers. The first element in * the returned array is the match number 0, i.e. the entire matched * text. * * If a sub pattern didn't match anything (e.g. sub pattern 1, matching * "b" against "(a)?b") then an empty string is inserted. * * If the last match was obtained using the DFA algorithm, that is using * g_regex_match_all() or g_regex_match_all_full(), the retrieved * strings are not that matched by sets of parentheses but that of the * matched substring. Substrings are matched in reverse order of length, * so the first one is the longest match. * * The strings are fetched from the string passed to the match function, * so you cannot call this function after freeing the string. * * @newin{2,14} * * @return A nullptr-terminated array of gchar * * pointers. If the previous * match failed nullptr is returned. */ Glib::StringArrayHandle fetch_all(); protected: GMatchInfo* gobject_; // The C object. bool take_ownership; // Bool signaling ownership. //TODO: Give this a _ suffix when we can break API. protected: // So that Glib::Regex::match() can set the C object. void set_gobject(GMatchInfo* castitem, bool take_ownership = true); }; } // namespace Glib namespace Glib { /** A Glib::wrap() method for this object. * * @param object The C instance. * @param take_copy False if the result should take ownership of the C instance. True if it should take a new copy or ref. * @result A C++ instance that wraps this C instance. * * @relates Glib::Regex */ GLIBMM_API Glib::RefPtr wrap(GRegex* object, bool take_copy = false); } // namespace Glib #endif /* _GLIBMM_REGEX_H */