00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #ifndef csregexH
00019 #define csregexH
00020
00021 #include <vector>
00022
00023 #ifdef _UNICODE
00024 #include <tchar.h>
00025 #endif
00026
00027 using namespace std;
00028
00029 #ifndef _UNICODE
00030 #ifndef __TCHAR_DEFINED
00031 #define _TCHAR char
00032 #define _TUCHAR unsigned char
00033 #endif
00034 #endif
00035
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00100 class CSMatch
00101 {
00102 public:
00103 unsigned int buf_idx;
00104 unsigned int start;
00105 unsigned int end;
00106 int rpt;
00107 };
00108
00118 class CSPar
00119 {
00120 public:
00121 unsigned int buf_idx;
00122 unsigned int match_idx;
00123 };
00124
00240 class CSRegEx
00241 {
00242 private:
00249 enum RGTok {
00250 rg_end = 0,
00251 rg_str = 1,
00252 rg_grp = 2,
00253 rg_grn = 3,
00254 rg_hed = 32,
00255 rg_tal = 33,
00256 rg_dot = 34,
00257 rg_cpn = 35,
00258 rg_cpa = 36,
00259 rg_cpa1 = 37,
00260 rg_cpa2 = 38,
00261 rg_cpa3 = 39,
00262 rg_cpa4 = 40,
00263 rg_cpa5 = 41,
00264 rg_cpa6 = 42,
00265 rg_cpa7 = 43,
00266 rg_cpa8 = 44,
00267 rg_cpa9 = 45,
00268 rg_bck = 46,
00269 rg_bck1 = 47,
00270 rg_bck2 = 48,
00271 rg_bck3 = 49,
00272 rg_bck4 = 50,
00273 rg_bck5 = 51,
00274 rg_bck6 = 52,
00275 rg_bck7 = 53,
00276 rg_bck8 = 54,
00277 rg_bck9 = 55,
00278 rg_chr = 64,
00279 rg_opa = 96,
00280 rg_opa1 = 97,
00281 rg_opa2 = 98,
00282 rg_opa3 = 99,
00283 rg_opa4 = 100,
00284 rg_opa5 = 101,
00285 rg_opa6 = 102,
00286 rg_opa7 = 103,
00287 rg_opa8 = 104,
00288 rg_opa9 = 105,
00289 rg_orm = 106,
00290 rg_opn = 108,
00291 rg_sta = 109,
00292 rg_pls = 110,
00293 rg_qes = 111,
00294 rg_sta_lazy = 112,
00295 rg_pls_lazy = 113,
00296 rg_qes_lazy = 114,
00297 rg_rpt = 224,
00298 rg_rpt_lazy = 225,
00299 };
00300
00301 const _TUCHAR *str;
00302 int str_len;
00303 _TUCHAR *buf;
00304 int buf_len;
00305 int buf_size;
00306 bool tal;
00307 int opa_cnt;
00308 vector<int> last_par;
00309 vector<CSMatch> matches;
00310 vector<CSPar> par;
00311 vector<int> match_start[10];
00312 vector<int> match_end[10];
00313 void Put(_TCHAR c);
00314 #ifndef _UNICODE
00315 void PutValue16(unsigned short c);
00316 #endif
00317 int Compile(int pos);
00318 int Escape(int pos, _TUCHAR &c);
00319 int CompileModifiers(int pos, int st_pos);
00320 bool RangeAddChar(vector<_TUCHAR> &chars, vector<_TUCHAR> &start,
00321 vector<_TUCHAR> &end, _TUCHAR c);
00322 bool RangeAddRange(vector<_TUCHAR> &chars, vector<_TUCHAR> &start,
00323 vector<_TUCHAR> &end, _TUCHAR cstart, _TUCHAR cend);
00324 void CompressRange(vector<_TUCHAR> &chars, vector<_TUCHAR> &start,
00325 vector<_TUCHAR> &end);
00326 int CompileRange(int pos);
00327 void CheckString(int str_idx);
00328 unsigned int Next(unsigned int pos) const;
00329 unsigned int GetRpt(unsigned int buf_idx, int &low, int &high, bool &lazy, unsigned int &prev) const;
00330 bool Match1(const _TUCHAR *str);
00331
00332 public:
00339 enum RGError {
00340 rge_ok=0,
00341 rge_too_many_refs=1,
00342 rge_missing_round_bracket=2,
00343 rge_overlapping_chars=3,
00344 rge_esc_eof=4,
00345 rge_missing_square_bracket=5,
00346 rge_invalid_esc_hex=6,
00347 rge_invalid_repeat_format=7,
00348 rge_invalid_repeat_range=8,
00349 rge_unbalanced_round_bracket=9,
00350 rge_invalid_range=10,
00351 rge_invalid_backreference=11,
00352 rge_regex_too_long=12
00353 };
00354
00355 int error;
00356 enum RGError error_code;
00357 _TCHAR *error_str;
00358 bool bMatchHead;
00359 int MatchStart;
00360 int MatchEnd;
00361 int BackStart[10];
00362 int BackEnd[10];
00363
00364
00365 CSRegEx::CSRegEx();
00366 CSRegEx::~CSRegEx();
00367 bool Compile(const _TUCHAR *str);
00368 #ifndef _UNICODE
00369 bool Compile(const char *str);
00370 #endif
00371 bool MatchRE(const _TUCHAR *str, const _TUCHAR *re);
00372 #ifndef _UNICODE
00373 bool MatchRE(const char *str, const char *re);
00374 bool MatchRE(const unsigned char *str, const char *re);
00375 bool MatchRE(const char *str, const unsigned char *re);
00376 #endif
00377 bool Match(const _TUCHAR *str);
00378 #ifndef _UNICODE
00379 bool Match(const char *str);
00380 #endif
00381 bool Match(const _TUCHAR *str, const _TUCHAR *cmp);
00382 #ifndef _UNICODE
00383 bool Match(const char *str, const char *cmp);
00384 bool Match(const unsigned char *str, const char*cmp);
00385 bool Match(const char *str, const unsigned char *cmp);
00386 #endif
00387 _TUCHAR* GetCompiledString() const;
00388 void SetCompiledString(_TUCHAR *str);
00389 };
00390 #endif