CSRegEx Class ReferenceThe main class for Regular Expressions.
More...
#include <csregex.h>
List of all members.
|
Public Types |
enum | RGError {
rge_ok = 0,
rge_too_many_refs = 1,
rge_missing_round_bracket = 2,
rge_overlapping_chars = 3,
rge_esc_eof = 4,
rge_missing_square_bracket = 5,
rge_invalid_esc_hex = 6,
rge_invalid_repeat_format = 7,
rge_invalid_repeat_range = 8,
rge_unbalanced_round_bracket = 9,
rge_invalid_range = 10,
rge_invalid_backreference = 11,
rge_regex_too_long = 12
} |
| Error codes for compilation. More...
|
Public Member Functions |
| CSRegEx () |
| ~CSRegEx () |
bool | Compile (const unsigned char *str) |
| Compile a regex before scanning.
|
bool | Compile (const char *str) |
bool | MatchRE (const unsigned char *str, const unsigned char *re) |
| Match a string. re is the plaintext regular expression.
|
bool | MatchRE (const char *str, const char *re) |
bool | MatchRE (const unsigned char *str, const char *re) |
bool | MatchRE (const char *str, const unsigned char *re) |
bool | Match (const unsigned char *str) |
| Match a string.
|
bool | Match (const char *str) |
bool | Match (const unsigned char *str, const unsigned char *cmp) |
| Match a string. cmp is the compiled regular expression string.
|
bool | Match (const char *str, const char *cmp) |
bool | Match (const unsigned char *str, const char *cmp) |
bool | Match (const char *str, const unsigned char *cmp) |
unsigned char * | GetCompiledString () const |
| Returns a copy of the compiled string. Must delete[] return pointer.
|
void | SetCompiledString (unsigned char *str) |
| Sets the compiled string.
|
Public Attributes |
int | error |
| -1 is ok, anything else is index in str where error occured.
|
enum RGError | error_code |
| Error code.
|
char * | error_str |
| Human readable error information.
|
bool | bMatchHead |
| Set to true if you want to always match start of search string.
|
int | MatchStart |
| Results from a match. Index into search string.
|
int | MatchEnd |
| Results from a match. Non-inclusive. Index into search string.
|
int | BackStart [10] |
| Backreferences.
|
int | BackEnd [10] |
| End of Backreferences. Non-inclusive.
|
Private Types |
enum | RGTok {
rg_end = 0,
rg_str = 1,
rg_grp = 2,
rg_grn = 3,
rg_hed = 32,
rg_tal = 33,
rg_dot = 34,
rg_cpn = 35,
rg_cpa = 36,
rg_cpa1 = 37,
rg_cpa2 = 38,
rg_cpa3 = 39,
rg_cpa4 = 40,
rg_cpa5 = 41,
rg_cpa6 = 42,
rg_cpa7 = 43,
rg_cpa8 = 44,
rg_cpa9 = 45,
rg_bck = 46,
rg_bck1 = 47,
rg_bck2 = 48,
rg_bck3 = 49,
rg_bck4 = 50,
rg_bck5 = 51,
rg_bck6 = 52,
rg_bck7 = 53,
rg_bck8 = 54,
rg_bck9 = 55,
rg_chr = 64,
rg_opa = 96,
rg_opa1 = 97,
rg_opa2 = 98,
rg_opa3 = 99,
rg_opa4 = 100,
rg_opa5 = 101,
rg_opa6 = 102,
rg_opa7 = 103,
rg_opa8 = 104,
rg_opa9 = 105,
rg_orm = 106,
rg_opn = 108,
rg_sta = 109,
rg_pls = 110,
rg_qes = 111,
rg_sta_lazy = 112,
rg_pls_lazy = 113,
rg_qes_lazy = 114,
rg_rpt = 224,
rg_rpt_lazy = 225
} |
| Internal use for buf. Specifes item types. More...
|
Private Member Functions |
void | Put (char c) |
| Inserts a value in buf, the compiled string.
|
void | PutValue16 (unsigned short c) |
| Same as Put(), but for 16 bit values.
|
int | Compile (int pos) |
| Recursive compiling.
|
int | Escape (int pos, unsigned char &c) |
| Escape character if necessary.
|
int | CompileModifiers (int pos, int st_pos) |
| Compiles quantifiers for *?+{start,end}.
|
bool | RangeAddChar (vector< unsigned char > &chars, vector< unsigned char > &start, vector< unsigned char > &end, unsigned char c) |
| Adds a character to the set [].
|
bool | RangeAddRange (vector< unsigned char > &chars, vector< unsigned char > &start, vector< unsigned char > &end, unsigned char cstart, unsigned char cend) |
| Add a range to the set [].
|
void | CompressRange (vector< unsigned char > &chars, vector< unsigned char > &start, vector< unsigned char > &end) |
| Remove redundant info such as a char that is 1 below a range.
|
int | CompileRange (int pos) |
| Compiles sets of square brackets [].
|
void | CheckString (int str_idx) |
| Converts 1 char strings into a char. rg_str -> rg_chr conversion at end of buf only.
|
unsigned int | Next (unsigned int pos) const |
| Finds location of next item in compiled string.
|
unsigned int | GetRpt (unsigned int buf_idx, int &low, int &high, bool &lazy, unsigned int &prev) const |
| Gets repeat ranges and returns next item pos.
|
bool | Match1 (const unsigned char *str) |
| The main matching algorithm.
|
Private Attributes |
const unsigned char * | str |
| String that we are matching or compiling.
|
int | str_len |
| length of string.
|
unsigned char * | buf |
| Compiled regular expression.
|
int | buf_len |
| How many chars used in buf.
|
int | buf_size |
| Size of buffer allocated in _TCHAR's.
|
bool | tal |
| Is $ used?
|
int | opa_cnt |
| Round bracket count. Used with match_start and match_end.
|
vector< int > | last_par |
| Used for inserting 'or' indexes. Only used during compilation.
|
vector< CSMatch > | matches |
| Vector of items.
|
vector< CSPar > | par |
| Info for each currently open round bracket.
|
vector< int > | match_start [10] |
| Backreferences.
|
vector< int > | match_end [10] |
| Backreferences.
|
Detailed Description
The main class for Regular Expressions.
Implementation of regular expressions. It supports both compiling and matching. Compiling a regular expression converts it into a form that the matching engine can use. This compiled string is stored internally for future use so that there's no need for recompiling the regular expression every time.
All functions support UNICODE! Simply compile with UNICODE support on and all functions will use wchar_t instead of char.
Matching is done non-recursively.
Here is a list of its features.
Elements of a regular expression.
- letters letters are matched on a one on one basis.
- . matches any char, even newlines.
- \c characters can be escaped at any time and will have no special meaning except for below.
- \# Exceptions to the above are the numbers # = 0 to 9 which are reserved for backreferences.
- [] matches a single character from a list. - signifies a range. ^ as 1st char negates the sets. anything immediately after the opening [ or ^ is treated as a normal character and has no special meaning unless it is a backslash (ie. becomes an escape sequence). This means you can put [ ] - or any other character and that character will be inserted into the set. [[] and []] are valid sets although they look strange. The first one consists of only '[' and the other ']'.
Special escape characters:
- \n 0x0A newline (unix & dos differences not parsed. You have to handle that on your own).
- \r 0x0D linefeed
- \t 0x09 tab
- \a 0x07 bell
- \b 0x08 backspace
- \f 0x0C formfeed
- \v 0x0B vertical tab
- \x## or \X## hex code for character.
Grouping
( ) You can group any items together. These can be backreferenced with \1 to \9 depending on the opening bracket count. Backreferences will match the exact string matched inside the ().
(?: ) If you don't need backreferences, place a ?: after the opening round bracket. This will save time and space during the matching process.
| Match an alternate set of items if the previous set did not match.
Quantifiers
? match previous item 0 or 1 time.
* match previous item 0 to infinite times.
+ match previous item 1 to infinite times.
{n} match previous item exactly n times. n is a number from 1 to 253.
{n,} match previous item n to infinite times. n is a number from 0 to 253.
{n,m} match previous item n to m times. n and m are numbers from 1 to 253. n can also be 0.
The ranges for n and m in the last 3 qualifiers have a maximum of 253 because one byte is used for the range. Also, 255 is used for infinite, so that's reserved. And if want to be able to use the compiled string just as a normal string, 0 can't be used internally either. So that makes 2 numbers that are reserved. So 255-2 = 253 and that's the largest value you can use.
? can be used after the above qualifers to make it lazy. ie. it'll only match as little as necessarry. Ex: c.*?t will return "cat" from "catabctch" instead of the usual "catabct"
^ anchors to the front of the string. Can only be used as 1st char, otherwise, normal char.
$ anchors to the end of the string. Can only be used as last char, otherwise, normal char.
Note about the following examples: I use printf. Sorry if you're used to cout, but I despise streams. I think they are a bad idea that should have never seen the light of day.
Simple example:
CSRegEx re();
if (re.MatchRE("bla blae 0.457","[-+]?([0-9]*\.)?[0-9]+([eE][-+]?[0-9]+)?"))
prinf("Found Match!\n");
else printf("Fail!\n");
Example with full error checking:
char *regex = "[-+]?([0-9]*\.)?[0-9]+([eE][-+]?[0-9]+)?";
char *mystr = "bla blae 0.457";
CSRegEx *re = new CSRegEx();
if (!re->Compile(regex))
{
printf("Error: %s\n",re->error_str);
printf("Reg expr: %s\n",regex);
printf(" ");
for(int i=0;i<re->error;i++) printf(" ");
printf("^\n");
}
else if (!re->Match(mystr))
{
printf("No match");
}
else
{
printf("Match: ");
for(int i=re->MatchStart;i<re->MatchEnd;i++) printf("%c",mystr[i]);
printf("\n");
}
delete re;
Definition at line 240 of file csregex.h.
Member Enumeration Documentation
|
Error codes for compilation.
After compilation, error_code will be set to one of these values. If an error (other than rge_ok), then error will hold the location of the error in the source string. - Enumerator:
-
rge_ok |
OK. |
rge_too_many_refs |
Too many references. ie. More than 9. |
rge_missing_round_bracket |
Missing closing round bracket. |
rge_overlapping_chars |
Inside a set [], some characters have been specified multiple times. |
rge_esc_eof |
Escape char at end of string is not allowed. |
rge_missing_square_bracket |
Closing square bracket for set not found. |
rge_invalid_esc_hex |
Invalid hex characters in escape sequence. |
rge_invalid_repeat_format |
Repeat format {} is invalid. |
rge_invalid_repeat_range |
Ranges in repeat {} are invalid. |
rge_unbalanced_round_bracket |
Unbalanced round brackets. |
rge_invalid_range |
Invalid range in set []. Usually because 1st item is higher than 2nd. ie. [z-a]. |
rge_invalid_backreference |
Backreference specified not yet defined. |
rge_regex_too_long |
Regular expression is too long (>65535 chars) when compiled. |
Definition at line 339 of file csregex.h. |
|
Internal use for buf. Specifes item types.
The ID's are numbered so that the high 3 bits are how many bytes it takes for the whole item. High order of zero is a special case. - Enumerator:
-
rg_end |
END : End of compiled string. |
rg_str |
STR len chars... : Matches a string. |
rg_grp |
GRP chars ranges chars... ranges... : Set for square brackets []. |
rg_grn |
GRN chars ranges chars... ranges... : Negated set. |
rg_hed |
HED : Matches start of string. ie. ^. |
rg_tal |
TAL : Matches end of string. ie. $. |
rg_dot |
DOT : Matches any char. |
rg_cpn |
CPN : Close round bracket without backreference. ie. ). |
rg_cpa |
CPA : Close round bracket for backreference 0 (entire match string). |
rg_cpa1 |
CPA : Close round bracket for backreference 1. |
rg_cpa2 |
CPA : Close round bracket for backreference 2. |
rg_cpa3 |
CPA : Close round bracket for backreference 3. |
rg_cpa4 |
CPA : Close round bracket for backreference 4. |
rg_cpa5 |
CPA : Close round bracket for backreference 5. |
rg_cpa6 |
CPA : Close round bracket for backreference 6. |
rg_cpa7 |
CPA : Close round bracket for backreference 7. |
rg_cpa8 |
CPA : Close round bracket for backreference 8. |
rg_cpa9 |
CPA : Close round bracket for backreference 9. |
rg_bck |
BCK : Matches backindex 0. |
rg_bck1 |
BCK : Matches backindex 1. |
rg_bck2 |
BCK : Matches backindex 2. |
rg_bck3 |
BCK : Matches backindex 3. |
rg_bck4 |
BCK : Matches backindex 4. |
rg_bck5 |
BCK : Matches backindex 5. |
rg_bck6 |
BCK : Matches backindex 6. |
rg_bck7 |
BCK : Matches backindex 7. |
rg_bck8 |
BCK : Matches backindex 8. |
rg_bck9 |
BCK : Matches backindex 9. |
rg_chr |
CHR char : Single char. |
rg_opa |
OPA next : Open round bracket with backreference 0 (entire matched string). |
rg_opa1 |
OPA1 next : Open round bracket with backreference 1. |
rg_opa2 |
OPA2 next : Open round bracket with backreference 2. |
rg_opa3 |
OPA3 next : Open round bracket with backreference 3. |
rg_opa4 |
OPA4 next : Open round bracket with backreference 4. |
rg_opa5 |
OPA5 next : Open round bracket with backreference 5. |
rg_opa6 |
OPA6 next : Open round bracket with backreference 6. |
rg_opa7 |
OPA7 next : Open round bracket with backreference 7. |
rg_opa8 |
OPA8 next : Open round bracket with backreference 8. |
rg_opa9 |
OPA9 next : Open round bracket with backreference 9. |
rg_orm |
ORM next : Alternative. ie. | next points to next | or ) item. |
rg_opn |
OPN next : Open round bracket without backreference. ie. (?: next points to the next | or ) item from the location of next. |
rg_sta |
STA prev : Repeat 0 to infinite times. ie. * prev points to start of item being repeated. prev is positive and must be substracted from the position of prev. |
rg_pls |
PLS prev : Repeat 1 to inifnite times. ie. + prev points to start of item being repeated. |
rg_qes |
QES prev : Repeat 0 or 1 time. ie. ? prev points to start of item being repeated. |
rg_sta_lazy |
STA_LAZY prev : Lazy repeat 0 to infinite times. ie. * prev points to start of item being repeated. prev is positive and must be substracted from the position of prev. |
rg_pls_lazy |
PLS_LAZY prev : Lazy repeat 1 to inifnite times. ie. + prev points to start of item being repeated. |
rg_qes_lazy |
QES_LAZY prev : Lazy repeat 0 or 1 time. ie. ? prev points to start of item being repeated. |
rg_rpt |
RPT prev start end : Repeat count from start to end. ie. {start,end} end of 255 is infinite. start and end must substract 1 before using. prev points ot start of item being repeated. |
rg_rpt_lazy |
RPT_LAZY prev start end : Lazy repeat count from start to end. ie. {start,end} end of 255 is infinite. start and end must substract 1 before using. prev points ot start of item being repeated. |
Definition at line 249 of file csregex.h. |
Member Function Documentation
void CSRegEx::CheckString |
( |
int |
str_idx |
) |
[private] |
|
|
Converts 1 char strings into a char. rg_str -> rg_chr conversion at end of buf only.
Only used on end of buffer. - Parameters:
-
| str_idx | Index of STR item in buf, the compiled string. |
- Note:
- Used only during compilation.
Definition at line 265 of file csregex.cpp.
References buf, buf_len, CSREGEX_OFSWIDTH, GetValue16, Put(), and rg_chr.
Referenced by Compile(). |
bool CSRegEx::Compile |
( |
const unsigned char * |
str |
) |
|
|
|
Compile a regex before scanning.
After successful compilation, the compiled version of the regular expression is stored internally. You can use GetCompiledString() to retrieve a copy. Also, after success of this function, you can use the Match() function with only the search string.
On error (return false), CSRegEx::error_code and CSRegEx::error_str will be set. Also, CSRegEx::error will point to the location in the plaintext regular expression where the error occurred. - Parameters:
-
- Returns:
- True if success.
- See also:
- GetCompiledString()
Match()
Definition at line 210 of file csregex.cpp.
References _T, _tcslen, _TUCHAR, buf, buf_len, buf_size, Compile(), CSRegExErrorStr, error, error_code, error_str, last_par, opa_cnt, Put(), PUT_OFFSET, PutValue16(), rg_cpa, rg_end, rg_hed, rg_opa, rg_tal, rge_ok, rge_unbalanced_round_bracket, str_len, and tal. |
int CSRegEx::Compile |
( |
int |
pos |
) |
[private] |
|
|
Recursive compiling.
This is recursive on round brackets. - Parameters:
-
| pos | Position in regular expression string to start compiling. |
- Returns:
- Position in regular expression where compilation was completed. -1 on error.
Definition at line 281 of file csregex.cpp.
References _T, _TUCHAR, buf_len, CheckString(), CompileModifiers(), CompileRange(), error, error_code, Escape(), GetValue16, last_par, opa_cnt, Put(), PUT_OFFSET, PutValue16(), rg_bck, rg_chr, rg_cpa, rg_dot, rg_opa, rg_opn, rg_orm, rg_str, rge_invalid_backreference, rge_missing_round_bracket, rge_regex_too_long, rge_too_many_refs, str, str_len, and tal.
Referenced by Compile(), and MatchRE(). |
int CSRegEx::CompileModifiers |
( |
int |
pos, |
|
|
int |
st_pos |
|
) |
[private] |
|
|
Compiles quantifiers for *?+{start,end}.
- Parameters:
-
| pos | Current position in str, the plaintext regular expression. |
| st_pos | The start position in buf, the compiled regular expression, of the item being quantified. |
- Returns:
- New current position in str, the plaintext regular expression. This will be the same as pos if no quantifier found.
- Note:
- Used only during compilation.
Definition at line 568 of file csregex.cpp.
References _istdigit, _T, _TUCHAR, buf, buf_len, error, error_code, Put(), PutValue16(), rg_pls, rg_pls_lazy, rg_qes, rg_qes_lazy, rg_rpt, rg_rpt_lazy, rg_sta, rg_sta_lazy, rge_invalid_repeat_format, rge_invalid_repeat_range, str, and str_len.
Referenced by Compile(). |
int CSRegEx::CompileRange |
( |
int |
pos |
) |
[private] |
|
|
Compiles sets of square brackets [].
- Parameters:
-
| pos | Position in str, the plaintext regular expression, to start compiling the set. This is 1 position after the opening '['. |
- Returns:
- Position in str after the closing square bracket ']' unless error.
- Warning:
- On error, CSRegEx::error will not be -1 and CSRegEx::error_code will be set. So check CSRegEx::error after calling this.
- Note:
- Used only during compilation.
Definition at line 988 of file csregex.cpp.
References _T, _TUCHAR, CompressRange(), error, error_code, Escape(), Put(), PutValue16(), RangeAddChar(), RangeAddRange(), rge_invalid_range, rge_missing_square_bracket, str, and str_len.
Referenced by Compile(). |
void CSRegEx::CompressRange |
( |
vector< unsigned char > & |
chars, |
|
|
vector< unsigned char > & |
start, |
|
|
vector< unsigned char > & |
end |
|
) |
[private] |
|
|
Remove redundant info such as a char that is 1 below a range.
[6-90-45] will be converted to [0-9]
[0123456789] will also be converted to [0-9]. - Parameters:
-
| chars | Vector of characters. |
| start | Vector of start characters for ranges. |
| end | Vector of end characters for ranges. (Inclusive) |
- Note:
- Used only during compilation.
Definition at line 855 of file csregex.cpp.
References _TUCHAR.
Referenced by CompileRange(). |
int CSRegEx::Escape |
( |
int |
pos, |
|
|
unsigned char & |
c |
|
) |
[private] |
|
|
Escape character if necessary.
Update c to be the actual character. - Parameters:
-
| pos | One position after the '\' in str, the plaintext regular expression. |
| c | The unescaped character (This is written to only). Original value is not used. |
- Returns:
- New string current position in str.
- Note:
- Used only during compilation.
Definition at line 486 of file csregex.cpp.
References _istdigit, _T, _totlower, _TUCHAR, error, error_code, rge_esc_eof, rge_invalid_esc_hex, str, and str_len.
Referenced by Compile(), and CompileRange(). |
unsigned char * CSRegEx::GetCompiledString |
( |
|
) |
const |
|
|
Returns a copy of the compiled string. Must delete[] return pointer.
- Returns:
- Copy of compiled string.
- Warning:
- You must delete[] the retuned pointer when done.
Definition at line 102 of file csregex.cpp.
References _tcscpy, _tcslen, _TUCHAR, and buf. |
unsigned int CSRegEx::GetRpt |
( |
unsigned int |
buf_idx, |
|
|
int & |
low, |
|
|
int & |
high, |
|
|
bool & |
lazy, |
|
|
unsigned int & |
prev |
|
) |
const [private] |
|
|
Gets repeat ranges and returns next item pos.
Very important function. Used during matching.
Call this with buf_idx being the positon in the compiled string.
This can be called after any item, even if it doesn't have a quantifier. In this case, the return value will be the same as buf_idx and low and high will be 1. Also lazy will be false, and prev will be undefined. - Parameters:
-
| buf_idx | Position in buf, the compiled regular expression, to check for quantifiers. |
| low | This will be set to the low range. |
| high | This will be set to the high range. In case of infinite 0x7fffffff. |
| lazy | This will be true if lazy qualifier, false otherwise. |
| prev | This will be the position in buf of the item this qualifier belongs to. In case of round brackets, this will point to the very beginning ie. opening round bracket. |
- Returns:
- Position after the quantifier in the compiled string.
- Note:
- Used only during matching.
Definition at line 1128 of file csregex.cpp.
References buf, CSREGEX_OFSWIDTH, GetValue16, rg_pls, rg_pls_lazy, rg_qes, rg_qes_lazy, rg_rpt, rg_rpt_lazy, rg_sta, and rg_sta_lazy.
Referenced by Match1(). |
bool CSRegEx::Match |
( |
const unsigned char * |
str, |
|
|
const unsigned char * |
cmp |
|
) |
|
|
|
Match a string. cmp is the compiled regular expression string.
Uses cmp for the regular expression instead of the internally stored one. The internal regular expression is not touched and can be reused after a call to this function.
See Match() to find out more about successful matching. - See also:
- Match()
- Parameters:
-
| str | Search string. |
| cmp | Compiled regular expression. |
- Returns:
- True if match found. False otherwise.
- Note:
- There exists overloaded versions of this function for signed versions of str and re.
Definition at line 1238 of file csregex.cpp.
References _TUCHAR, buf, and Match(). |
bool CSRegEx::Match |
( |
const unsigned char * |
str |
) |
|
|
|
Match a string.
On success (return true), CSRegEx::MatchStart and CSRegEx::MatchEnd (non-inclusive) will be indexes into the search string where the match was found.
CSRegEx::BackStart[10] and CSRegEx::BackEnd[10] are indexes like the above for backreferences. Array index 0 is the same as MatchStart and MatchEnd.
The regular expression used is the one stored internally. It can be set with Compile(), SetCompiledString() or MatchRE(). - Parameters:
-
- Returns:
- True if match found. False otherwise.
- See also:
- Compile()
SetCompiledString()
MatchRE()
Definition at line 1276 of file csregex.cpp.
References _tcslen, BackEnd, BackStart, bMatchHead, buf, Match1(), match_end, match_start, MatchEnd, MatchStart, par, and rg_hed.
Referenced by Match(), and MatchRE(). |
bool CSRegEx::Match1 |
( |
const unsigned char * |
str |
) |
[private] |
|
|
The main matching algorithm.
This function is non-recursive.
Something to note about CSMatch is that an end bracket will have different meaning for start and end members.
CSMatch::start will have the buf position of the opening round bracket.
CSMatch::end will be the index in matches[] of the opening round bracket.
- Parameters:
-
- Returns:
- True if match found. False otherwise.
Definition at line 1356 of file csregex.cpp.
References _T, _TUCHAR, buf, CSMatch::buf_idx, CHECK_MATCH, CSREGEX_OFSWIDTH, CSMatch::end, GetRpt(), GetValue16, match_end, match_start, MatchEnd, matches, MatchStart, Next(), par, rg_bck, rg_bck1, rg_bck2, rg_bck3, rg_bck4, rg_bck5, rg_bck6, rg_bck7, rg_bck8, rg_bck9, rg_chr, rg_cpa, rg_cpa1, rg_cpa2, rg_cpa3, rg_cpa4, rg_cpa5, rg_cpa6, rg_cpa7, rg_cpa8, rg_cpa9, rg_cpn, rg_dot, rg_end, rg_grn, rg_grp, rg_hed, rg_opa, rg_opa1, rg_opa2, rg_opa3, rg_opa4, rg_opa5, rg_opa6, rg_opa7, rg_opa8, rg_opa9, rg_opn, rg_orm, rg_pls, rg_pls_lazy, rg_qes, rg_qes_lazy, rg_rpt, rg_rpt_lazy, rg_sta, rg_sta_lazy, rg_str, rg_tal, CSMatch::rpt, and CSMatch::start.
Referenced by Match(). |
bool CSRegEx::MatchRE |
( |
const unsigned char * |
str, |
|
|
const unsigned char * |
re |
|
) |
|
|
|
Match a string. re is the plaintext regular expression.
The internal regular expression is replaced with the compiled version of re.
Because the internal regular expression is replaced, if you wish to use the same regular expression again, you can simply call Match() with only the search string after a call to this function.
On failure, check if error_code=rge_ok, then everything went ok, but no match was found. If error_code!=rge_ok, then there was an error with the compilation.
See Compile() to find out more about the errors.
See Match() to find out more about successful matching. - See also:
- Compile()
Match()
- Parameters:
-
| str | Search string. |
| re | Plaintext regular expression. |
- Returns:
- True if match found. False otherwise.
- Note:
- There exists overloaded versions of this function for signed versions of str and re.
Definition at line 1188 of file csregex.cpp.
References Compile(), and Match().
Referenced by MatchRE(). |
unsigned int CSRegEx::Next |
( |
unsigned int |
pos |
) |
const [private] |
|
void CSRegEx::Put |
( |
char |
c |
) |
[private] |
|
void CSRegEx::PutValue16 |
( |
unsigned short |
c |
) |
[private] |
|
bool CSRegEx::RangeAddChar |
( |
vector< unsigned char > & |
chars, |
|
|
vector< unsigned char > & |
start, |
|
|
vector< unsigned char > & |
end, |
|
|
unsigned char |
c |
|
) |
[private] |
|
|
Adds a character to the set [].
Checks if already inserted. - Parameters:
-
| chars | Vector of characters. |
| start | Vector of start characters for ranges. |
| end | Vector of end characters for ranges. (Inclusive) |
| c | Character to insert in set. |
- Returns:
- True if success. False if character has already been inserted.
- Note:
- Used only during compilation.
Definition at line 717 of file csregex.cpp.
References error_code, and rge_overlapping_chars.
Referenced by CompileRange(). |
bool CSRegEx::RangeAddRange |
( |
vector< unsigned char > & |
chars, |
|
|
vector< unsigned char > & |
start, |
|
|
vector< unsigned char > & |
end, |
|
|
unsigned char |
cstart, |
|
|
unsigned char |
cend |
|
) |
[private] |
|
|
Add a range to the set [].
Checks if range overlaps. - Parameters:
-
| chars | Vector of characters. |
| start | Vector of start characters for ranges. |
| end | Vector of end characters for ranges. (Inclusive) |
| cstart | Start character of range to insert in set. |
| cend | End character of range to insert in set. (Inclusive) |
- Returns:
- True if success. False if any part of the range has already been inserted.
- Note:
- Used only during compilation.
Definition at line 784 of file csregex.cpp.
References error_code, and rge_overlapping_chars.
Referenced by CompileRange(). |
void CSRegEx::SetCompiledString |
( |
unsigned char * |
str |
) |
|
|
|
Sets the compiled string.
Allows the use of previously compiled strings. - Parameters:
-
- Note:
- The data is copied, so you retain ownership of the pointer and its data.
Definition at line 115 of file csregex.cpp.
References _tcscpy, _tcslen, _TUCHAR, buf, and buf_size. |
The documentation for this class was generated from the following files:
Docs for CSRegEx created on Tue Dec 11 14:36:54 2007 by Doxygen 1.4.3
Webmaster: Cléo Saulnier
|