00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00023
00024
00025
00026
00027
00028 #include <ctype.h>
00029 #include <string.h>
00030
00031 #ifdef __BCPLUSPLUS__
00032 #pragma hdrstop
00033 #endif
00034
00035 #include "csregex.h"
00036
00037
00038 #ifdef __BCPLUSPLUS__
00039 #pragma package(smart_init)
00040 #endif
00041
00042 #ifndef _UNICODE
00043 #ifndef __TCHAR_DEFINED
00044 #define _tcslen strlen
00045 #define _tcscpy strcpy
00046 #define _totlower tolower
00047 #define _istdigit isdigit
00048 #define _T(a) a
00049 #endif
00050 #endif
00051
00052
00053
00054
00055 #ifdef _UNICODE
00056 #define CSREGEX_OFSWIDTH 1
00057 #else
00058 #define CSREGEX_OFSWIDTH 2
00059 #endif
00060
00065 static _TCHAR* CSRegExErrorStr[] = {_T("OK."),
00066 _T("Back references (round brackets) exceed maximum of 9."),
00067 _T("Missing end round bracket."),
00068 _T("Overlapping/redundant character specs."),
00069 _T("Escape character cannot appear at end of string by itself."),
00070 _T("Missing end square bracket."),
00071 _T("Invalid hex characters in escape sequence."),
00072 _T("Invalid repeat format."),
00073 _T("Invalid repeat range."),
00074 _T("Unbalanced round brackets."),
00075 _T("Invalid range."),
00076 _T("Invalid backreference."),
00077 _T("Regular expression is too long.")};
00078
00079 CSRegEx::CSRegEx()
00080 {
00081 str = NULL;
00082 buf = NULL;
00083 str_len = 0;
00084 buf_len = 0;
00085 buf_size = 0;
00086 bMatchHead = false;
00087 error = -1;
00088 error_code = rge_ok;
00089 error_str = CSRegExErrorStr[0];
00090 }
00091
00092 CSRegEx::~CSRegEx()
00093 {
00094 if (buf) delete[] buf;
00095 }
00096
00102 _TUCHAR* CSRegEx::GetCompiledString() const
00103 {
00104 _TUCHAR *b = new _TUCHAR[_tcslen(reinterpret_cast<_TCHAR* const>(buf))+1];
00105 _tcscpy(reinterpret_cast<_TCHAR*>(b),reinterpret_cast<_TCHAR*>(buf));
00106 return b;
00107 }
00108
00115 void CSRegEx::SetCompiledString(_TUCHAR *str)
00116 {
00117 int len = _tcslen(reinterpret_cast<_TCHAR*>(str))+1;
00118 if (len>buf_size)
00119 {
00120 if (buf_size==0) buf_size = 1024;
00121 else delete[] buf;
00122 while(len>buf_size) buf_size*=2;
00123 buf = new _TUCHAR[buf_size];
00124 }
00125 _tcscpy(reinterpret_cast<_TCHAR*>(buf),reinterpret_cast<_TCHAR*>(str));
00126 }
00127
00133 void CSRegEx::Put(_TCHAR c)
00134 {
00135 if (buf_len+1==buf_size)
00136 {
00137 _TUCHAR *buf1 = new _TUCHAR[buf_size*2];
00138 memcpy(buf1,buf,buf_size*sizeof(_TUCHAR));
00139 delete[] buf;
00140 buf = buf1;
00141 buf_size*=2;
00142 }
00143 buf[buf_len++]=c;
00144 }
00145
00146
00147
00148
00149 #ifdef _UNICODE
00150 #define PutValue16 Put
00151 #define GetValue16(i) \
00152 static_cast<unsigned int>(buf[i])
00153 #else
00154
00159 void CSRegEx::PutValue16(unsigned short c)
00160 {
00161 if (buf_len+2>=buf_size)
00162 {
00163 unsigned char *buf1 = new unsigned char[buf_size*2];
00164 memcpy(buf1,buf,buf_size);
00165 delete[] buf;
00166 buf = buf1;
00167 buf_size*=2;
00168 }
00169
00170
00171 *(reinterpret_cast<unsigned short*>(&buf[buf_len])) = c+0x0100;
00172 buf_len+=2;
00173 }
00174
00175 #define GetValue16(i) \
00176 static_cast<unsigned int>((*(reinterpret_cast<unsigned short*>(&buf[i])))-0x0100)
00177
00178 #endif
00179
00180 #ifndef _UNICODE
00181 bool CSRegEx::Compile(const char *str)
00182 {
00183 return Compile((const unsigned char*)str);
00184 }
00185 #endif
00186
00187
00188 #ifndef _UNICODE
00189 #define PUT_OFFSET(a, v) \
00190 *(reinterpret_cast<unsigned short*>(&buf[(a)])) = (v)+0x0100
00191 #else
00192 #define PUT_OFFSET(a, v) \
00193 buf[(a)] = (v)
00194 #endif
00195
00210 bool CSRegEx::Compile(const _TUCHAR *str)
00211 {
00212 error_code = rge_ok;
00213 if (buf==NULL)
00214 {
00215 buf_size = 1024;
00216 buf = new _TUCHAR[buf_size];
00217 }
00218 buf_len = 0;
00219 tal = false;
00220 if (str[0]==_T('^'))
00221 {
00222 Put(rg_hed);
00223 str++;
00224 }
00225 str_len = _tcslen(reinterpret_cast<const _TCHAR*>(str));
00226 this->str = str;
00227 opa_cnt=0;
00228 error = -1;
00229 Put(rg_opa);
00230 last_par.push_back(buf_len);
00231 PutValue16(0);
00232 int pos = Compile(0);
00233 if ((last_par.size()!=1)||(pos!=str_len))
00234 {
00235 error = pos;
00236 error_code = rge_unbalanced_round_bracket;
00237 error_str = CSRegExErrorStr[error_code];
00238 last_par.clear();
00239 return false;
00240 }
00241 if (error!=-1)
00242 {
00243 error_str = CSRegExErrorStr[error_code];
00244 last_par.clear();
00245 return false;
00246 }
00247 int parpos = last_par.back();
00248 int offset = buf_len-parpos;
00249 PUT_OFFSET(parpos, offset);
00250 last_par.pop_back();
00251 Put(rg_cpa);
00252 if (tal) Put(rg_tal);
00253 Put(rg_end);
00254 error_str = CSRegExErrorStr[0];
00255 last_par.clear();
00256 return true;
00257 }
00258
00265 void CSRegEx::CheckString(int str_idx)
00266 {
00267 if (GetValue16(str_idx)==1)
00268 {
00269 buf[str_idx-1] = rg_chr;
00270 buf_len = str_idx;
00271 Put(buf[str_idx+CSREGEX_OFSWIDTH]);
00272 }
00273 }
00274
00281 int CSRegEx::Compile(int pos)
00282 {
00283
00284
00285
00286 bool str_started = false;
00287 int str_idx = 0;
00288 int st_pos;
00289 while(pos<str_len)
00290 {
00291 _TUCHAR c = str[pos];
00292 pos++;
00293 if (buf_len>=65279)
00294 {
00295 error = pos-1;
00296 error_code = rge_regex_too_long;
00297 return pos-1;
00298 }
00299 bool esc = false;
00300 switch(c)
00301 {
00302 case _T('.'):
00303 if (str_started)
00304 {
00305 CheckString(str_idx);
00306 str_started = false;
00307 }
00308 st_pos = buf_len;
00309 Put(rg_dot);
00310 pos=CompileModifiers(pos, st_pos);
00311 if (error!=-1) return pos;
00312 break;
00313 case _T('('):
00314 {
00315 if (str_started)
00316 {
00317 CheckString(str_idx);
00318 str_started = false;
00319 }
00320 st_pos = buf_len;
00321
00322 int old_pos;
00323 int type = 0;
00324 old_pos = pos-1;
00325 int opa_ofs;
00326 if ((pos+1<str_len)&&(str[pos]==_T('?'))&&(str[pos+1]==_T(':')))
00327 {
00328 Put(rg_opn);
00329 last_par.push_back(buf_len);
00330 PutValue16(0);
00331 pos+=2;
00332 }
00333 else
00334 {
00335 opa_cnt++;
00336 opa_ofs = opa_cnt;
00337 if (opa_cnt==10)
00338 {
00339 error = pos;
00340 error_code = rge_too_many_refs;
00341 return old_pos;
00342 }
00343 Put(rg_opa+opa_cnt);
00344 last_par.push_back(buf_len);
00345 PutValue16(0);
00346 type = 1;
00347 }
00348 pos = Compile(pos);
00349 if (error!=-1) return pos;
00350 if ((pos>=str_len)||(str[pos]!=_T(')')))
00351 {
00352
00353 error = old_pos;
00354 error = rge_missing_round_bracket;
00355 return old_pos;
00356 }
00357 pos++;
00358 int parpos = last_par.back();
00359 int offset = buf_len-parpos;
00360 PUT_OFFSET(parpos, offset);
00361 last_par.pop_back();
00362 Put(type?rg_cpa+opa_ofs:rg_cpn);
00363 pos=CompileModifiers(pos, st_pos);
00364 if (error!=-1) return pos;
00365 break;
00366 }
00367 case _T('['):
00368 if (str_started)
00369 {
00370 CheckString(str_idx);
00371 str_started = false;
00372 }
00373 st_pos = buf_len;
00374 pos = CompileRange(pos);
00375 if (error!=-1) return pos;
00376 pos = CompileModifiers(pos,st_pos);
00377 if (error!=-1) return pos;
00378 break;
00379 case _T('|'):
00380 {
00381 if (str_started)
00382 {
00383 CheckString(str_idx);
00384 str_started = false;
00385 }
00386 int parpos = last_par.back();
00387 int offset = buf_len-parpos;
00388 PUT_OFFSET(parpos, offset);
00389 Put(rg_orm);
00390 last_par.back()=buf_len;
00391 PutValue16(0);
00392 break;
00393 }
00394 case _T(')'):
00395 if (str_started)
00396 {
00397 CheckString(str_idx);
00398 str_started = false;
00399 }
00400 return pos-1;
00401 case _T('\\'):
00402
00403
00404 if ((str[pos]>=_T('1'))&&(str[pos]<=_T('9')))
00405 {
00406 if (str_started)
00407 {
00408 CheckString(str_idx);
00409 str_started = false;
00410 }
00411 st_pos = buf_len;
00412 Put(rg_bck+(str[pos++]-_T('0')));
00413 pos = CompileModifiers(pos,st_pos);
00414 if (error!=-1) return pos;
00415 break;
00416 }
00417 else if (str[pos]==_T('0'))
00418 {
00419 error = pos-1;
00420 error_code = rge_invalid_backreference;
00421 return pos-1;
00422 }
00423 pos = Escape(pos, c);
00424 if (error!=-1) return pos;
00425 esc = true;
00426 default:
00427 {
00428 if ((!esc)&&(c==_T('$'))&&(pos==str_len))
00429 {
00430 tal=true;
00431 break;
00432 }
00433
00434
00435 _TUCHAR cc;
00436 if ((pos<str_len)&&(((cc=str[pos])==_T('?'))||(cc==_T('*'))||(cc==_T('+'))))
00437 {
00438 if (str_started)
00439 {
00440 CheckString(str_idx);
00441 str_started = false;
00442 }
00443 st_pos = buf_len;
00444 Put(rg_chr);
00445 Put(c);
00446 pos = CompileModifiers(pos,st_pos);
00447 if (error!=-1) return pos;
00448 break;
00449 }
00450 if (str_started)
00451 {
00452 unsigned int cnt = GetValue16(str_idx);
00453
00454
00455 Put(c);
00456 cnt++;
00457 PUT_OFFSET(str_idx, cnt);
00458 }
00459 else
00460 {
00461 Put(rg_str);
00462 str_idx = buf_len;
00463 PutValue16(1);
00464 Put(c);
00465 str_started = true;
00466 }
00467 }
00468 }
00469 }
00470 if (str_started)
00471 {
00472 CheckString(str_idx);
00473 str_started = false;
00474 }
00475 return pos;
00476 }
00477
00486 int CSRegEx::Escape(int pos, _TUCHAR &c)
00487 {
00488 _TUCHAR c1 = str[pos];
00489 switch(c1)
00490 {
00491 case _T('n'):
00492 c = (_TUCHAR)10;
00493 break;
00494 case _T('r'):
00495 c = (_TUCHAR)13;
00496 break;
00497 case _T('t'):
00498 c = (_TUCHAR)9;
00499 break;
00500 case _T('a'):
00501 c = (_TUCHAR)7;
00502 break;
00503 case _T('b'):
00504 c = (_TUCHAR)8;
00505 break;
00506 case _T('f'):
00507 c = (_TUCHAR)12;
00508 break;
00509 case _T('v'):
00510 c = (_TUCHAR)11;
00511 break;
00512 case _T('x'):
00513 case _T('X'):
00514
00515 #ifdef _UNICODE
00516 #define ESC_CHARS 4
00517 #else
00518 #define ESC_CHARS 2
00519 #endif
00520 if (pos+ESC_CHARS>=str_len)
00521 {
00522 error = pos-1;
00523 error_code = rge_esc_eof;
00524 return pos;
00525 }
00526 c = 0;
00527 unsigned char h[ESC_CHARS];
00528 h[0] = _totlower(str[pos+1]);
00529 h[1] = _totlower(str[pos+2]);
00530 #ifdef _UNICODE
00531 h[2] = _totlower(str[pos+2]);
00532 h[3] = _totlower(str[pos+3]);
00533 #endif
00534 for(int k=0;k<ESC_CHARS;k++)
00535 {
00536 c<<=4;
00537 if (_istdigit(h[k]))
00538 {
00539 c += h[k]-_T('0');
00540 }
00541 else if ((h[k]>='a')&&(h[k]<='f'))
00542 {
00543 c += h[k]-_T('a')+(_TUCHAR)10;
00544 }
00545 else
00546 {
00547 error = pos+k+1;
00548 error_code = rge_invalid_esc_hex;
00549 return pos;
00550 }
00551 }
00552 pos+=ESC_CHARS;
00553 break;
00554 default:
00555 c=c1;
00556 }
00557 pos++;
00558 return pos;
00559 }
00560
00568 int CSRegEx::CompileModifiers(int pos, int st_pos)
00569 {
00570 if (pos>=str_len) return pos;
00571 int old_pos = pos;
00572 int buf_idx = buf_len;
00573 _TUCHAR c = str[pos];
00574 switch(c)
00575 {
00576 case _T('?'):
00577 Put(rg_qes);
00578 PutValue16(buf_len-st_pos);
00579 pos++;
00580 break;
00581 case _T('*'):
00582 Put(rg_sta);
00583 PutValue16(buf_len-st_pos);
00584 pos++;
00585 break;
00586 case _T('+'):
00587 Put(rg_pls);
00588 PutValue16(buf_len-st_pos);
00589 pos++;
00590 break;
00591 case _T('{'):
00592
00593
00594
00595
00596
00597 int start=0;
00598 int end = 0;
00599 bool inf = false;
00600 pos++;
00601 if (!_istdigit(str[pos]))
00602 {
00603 error = pos;
00604 error_code = rge_invalid_repeat_format;
00605 return pos;
00606 }
00607 do {start=start*10+static_cast<int>(str[pos++]-_T('0'));}
00608 while(_istdigit(str[pos]));
00609 if (str[pos]==_T('}'))
00610 {
00611
00612 end = start;
00613 pos++;
00614 }
00615 else
00616 {
00617 if (str[pos]!=_T(','))
00618 {
00619 error = pos;
00620 error_code = rge_invalid_repeat_format;
00621 return pos;
00622 }
00623 pos++;
00624 if (str[pos]==_T('}'))
00625 {
00626
00627 inf = true;
00628 end = 0;
00629 pos++;
00630 }
00631 else
00632 {
00633 if (!_istdigit(str[pos]))
00634 {
00635 error = pos;
00636 error_code = rge_invalid_repeat_format;
00637 return pos;
00638 }
00639 do {end=end*10+static_cast<int>(str[pos++]-_T('0'));}
00640 while(_istdigit(str[pos]));
00641 if (str[pos]!=_T('}'))
00642 {
00643 error = pos;
00644 error_code = rge_invalid_repeat_format;
00645 return pos;
00646 }
00647
00648 pos++;
00649 }
00650 }
00651 if ((start==0)&&(end==1))
00652 {
00653
00654 Put(rg_qes);
00655 PutValue16(buf_len-st_pos);
00656 }
00657 else if ((start==0)&&(inf))
00658 {
00659
00660 Put(rg_sta);
00661 PutValue16(buf_len-st_pos);
00662 }
00663 else if ((start==1)&&(inf))
00664 {
00665
00666 Put(rg_pls);
00667 PutValue16(buf_len-st_pos);
00668 }
00669 else if (((start==0)&&(end==0))||(start>end)||(start>65277)||(end>65277)||((!inf)&&(end==0)))
00670 {
00671
00672 error = old_pos;
00673 error_code = rge_invalid_repeat_range;
00674 return pos;
00675 }
00676 else
00677 {
00678 Put(rg_rpt);
00679 PutValue16(buf_len-st_pos);
00680 PutValue16(start+1);
00681 PutValue16((inf)?65279:end+1);
00682 }
00683 }
00684
00685 if (str[pos]==_T('?'))
00686 {
00687
00688 RGTok t = static_cast<RGTok>(buf[buf_idx]);
00689 RGTok tt = static_cast<RGTok>(0);
00690 if (t==rg_qes)
00691 tt=rg_qes_lazy;
00692 else if (t==rg_sta)
00693 tt=rg_sta_lazy;
00694 else if (t==rg_pls)
00695 tt=rg_pls_lazy;
00696 else if (t==rg_rpt)
00697 tt=rg_rpt_lazy;
00698 if (tt)
00699 {
00700 buf[buf_idx] = tt;
00701 pos++;
00702 }
00703 }
00704 return pos;
00705 }
00706
00717 bool CSRegEx::RangeAddChar(vector<_TUCHAR> &chars, vector<_TUCHAR> &start, vector<_TUCHAR> &end, _TUCHAR c)
00718 {
00719 #ifdef _UNICODE
00720 unsigned int xc = static_cast<unsigned int>(c);
00721 #else
00722 #define xc c
00723 #define xi *i
00724 #define xj *j
00725 #endif
00726
00727
00728 for(vector<_TUCHAR>::iterator i=chars.begin();i!=chars.end();i++)
00729 {
00730 #ifdef _UNICODE
00731 unsigned int xi = static_cast<unsigned int>(*i);
00732 #endif
00733 if (xc==xi)
00734 {
00735 error_code = rge_overlapping_chars;
00736 return false;
00737 }
00738 }
00739 bool added = false;
00740 vector<_TUCHAR>::iterator i = start.begin();
00741 vector<_TUCHAR>::iterator j = end.begin();
00742 for(;i!=start.end();i++,j++)
00743 {
00744 #ifdef _UNICODE
00745 unsigned int xi = static_cast<unsigned int>(*i);
00746 unsigned int xj = static_cast<unsigned int>(*j);
00747 #endif
00748 if ((xc>=xi)&&(xc<=xj))
00749 {
00750 error_code = rge_overlapping_chars;
00751 return false;
00752 }
00753 if ((!added)&&(xc==xi-1))
00754 {
00755 *i = c;
00756 added = true;
00757 }
00758 else if ((!added)&&(xc==xj+1))
00759 {
00760 *j = c;
00761 added = true;
00762 }
00763 }
00764 if (!added) chars.push_back(c);
00765 return true;
00766 #ifndef _UNICODE
00767 #undef xc
00768 #undef xi
00769 #undef xj
00770 #endif
00771 }
00772
00784 bool CSRegEx::RangeAddRange(vector<_TUCHAR> &chars, vector<_TUCHAR> &start, vector<_TUCHAR> &end, _TUCHAR cstart, _TUCHAR cend)
00785 {
00786 #ifdef _UNICODE
00787 unsigned int xcstart = static_cast<unsigned int>(cstart);
00788 unsigned int xcend = static_cast<unsigned int>(cend);
00789 #else
00790 #define xcstart cstart
00791 #define xcend cend
00792 #define xi *i
00793 #define xj *j
00794 #endif
00795
00796 for(vector<_TUCHAR>::iterator i=chars.begin();i!=chars.end();i++)
00797 {
00798 #ifdef _UNICODE
00799 unsigned int xi = static_cast<unsigned int>(*i);
00800 #endif
00801 if ((xi>=xcstart)&&(xi<=xcend))
00802 {
00803 error_code = rge_overlapping_chars;
00804 return false;
00805 }
00806 }
00807 bool added = false;
00808 vector<_TUCHAR>::iterator i = start.begin();
00809 vector<_TUCHAR>::iterator j = end.begin();
00810 for(;i!=start.end();i++,j++)
00811 {
00812 #ifdef _UNICODE
00813 unsigned int xi = static_cast<unsigned int>(*i);
00814 unsigned int xj = static_cast<unsigned int>(*j);
00815 #endif
00816 if (((xcstart>=xi)&&(xcstart<=xj))||((xcend>=xi)&&(xcend<=xj)))
00817 {
00818 error_code = rge_overlapping_chars;
00819 return false;
00820 }
00821 if ((!added)&&(xcend==xi-1))
00822 {
00823 *i = cstart;
00824 added = true;
00825 }
00826 else if ((!added)&&(xcstart==xj+1))
00827 {
00828 *j = cend;
00829 added = true;
00830 }
00831 }
00832 if (!added)
00833 {
00834 start.push_back(cstart);
00835 end.push_back(cend);
00836 }
00837 return true;
00838 #ifndef _UNICODE
00839 #undef xcstart
00840 #undef xcend
00841 #undef xi
00842 #undef xj
00843 #endif
00844 }
00845
00855 void CSRegEx::CompressRange(vector<_TUCHAR> &chars, vector<_TUCHAR> &start, vector<_TUCHAR> &end)
00856 {
00857 #ifndef _UNICODE
00858 #define xc c
00859 #define xc1 c1
00860 #define xst st
00861 #define xen en
00862 #define xs1 s1
00863 #define xe1 e1
00864 #define xs2 s2
00865 #define xe2 e2
00866 #endif
00867
00868 for(unsigned int i=0;i<chars.size();i++)
00869 {
00870 _TUCHAR c = chars[i];
00871 #ifdef _UNICODE
00872 unsigned int xc = static_cast<unsigned int>(c);
00873 #endif
00874 for (unsigned int j=i+1;j<chars.size();j++)
00875 {
00876 _TUCHAR c1 = chars[j];
00877 #ifdef _UNICODE
00878 unsigned int xc1 = static_cast<unsigned int>(c1);
00879 #endif
00880 if (xc+1==xc1)
00881 {
00882 start.push_back(c);
00883 end.push_back(c1);
00884
00885 chars.erase(chars.begin()+j);
00886 chars.erase(chars.begin()+i);
00887 i--;
00888 break;
00889 }
00890 else if (xc1+1==xc)
00891 {
00892 start.push_back(c1);
00893 end.push_back(c);
00894 chars.erase(chars.begin()+j);
00895 chars.erase(chars.begin()+i);
00896 i--;
00897 break;
00898 }
00899 }
00900 }
00901
00902
00903 for(unsigned int i=0;i<chars.size();i++)
00904 {
00905 _TUCHAR c = chars[i];
00906 #ifdef _UNICODE
00907 unsigned int xc = static_cast<unsigned int>(c);
00908 #endif
00909 for(unsigned int j=0;j<start.size();j++)
00910 {
00911 _TUCHAR st = start[j];
00912 _TUCHAR en = end[j];
00913 #ifdef _UNICODE
00914 unsigned int xst = static_cast<unsigned int>(st);
00915 unsigned int xen = static_cast<unsigned int>(en);
00916 #endif
00917 if (xc==xst-1)
00918 {
00919 start[j] = c;
00920 chars.erase(chars.begin()+i);
00921 i--;
00922 }
00923 else if (xc==xen+1)
00924 {
00925 end[j] = c;
00926 chars.erase(chars.begin()+i);
00927 i--;
00928 }
00929 }
00930 }
00931
00932
00933 for(unsigned int i=0;i<start.size();i++)
00934 {
00935 _TUCHAR s1 = start[i];
00936 _TUCHAR e1 = end[i];
00937 #ifdef _UNICODE
00938 unsigned int xs1 = static_cast<unsigned int>(s1);
00939 unsigned int xe1 = static_cast<unsigned int>(e1);
00940 #endif
00941 bool found = false;
00942 for(unsigned int j=i+1;j<end.size();j++)
00943 {
00944 _TUCHAR s2 = start[j];
00945 _TUCHAR e2 = end[j];
00946 #ifdef _UNICODE
00947 unsigned int xs2 = static_cast<unsigned int>(s2);
00948 unsigned int xe2 = static_cast<unsigned int>(e2);
00949 #endif
00950 if (xe2+1==xs1)
00951 {
00952 start[i] = s2;
00953 start.erase(start.begin()+j);
00954 end.erase(end.begin()+j);
00955 j--;
00956 found = true;
00957 }
00958 else if (xs2-1==xe1)
00959 {
00960 end[i] = e2;
00961 start.erase(start.begin()+j);
00962 end.erase(end.begin()+j);
00963 j--;
00964 found = true;
00965 }
00966 }
00967 if (found) i--;
00968 }
00969 #ifndef _UNICODE
00970 #undef xc
00971 #undef xc1
00972 #undef xst
00973 #undef xen
00974 #undef xs1
00975 #undef xe1
00976 #undef xs2
00977 #undef xe2
00978 #endif
00979 }
00980
00988 int CSRegEx::CompileRange(int pos)
00989 {
00990 vector<_TUCHAR> chars;
00991 vector<_TUCHAR> start;
00992 vector<_TUCHAR> end;
00993
00994 int old_pos = pos-1;
00995 int err_pos = pos;
00996 if (pos>=str_len)
00997 {
00998 error = old_pos;
00999 error_code = rge_missing_square_bracket;
01000 return old_pos;
01001 }
01002 _TUCHAR c = str[pos];
01003 bool neg = false;
01004 if (c==_T('^'))
01005 {
01006 neg = true;
01007 pos++;
01008 if (pos>=str_len)
01009 {
01010 error = pos-1;
01011 error_code = rge_missing_square_bracket;
01012 return old_pos;
01013 }
01014 err_pos = pos;
01015 c = str[pos];
01016 }
01017 pos++;
01018 if (c==_T('\\'))
01019 {
01020 pos = Escape(pos, c);
01021 if (error!=-1) return pos;
01022 }
01023 if (pos>=str_len)
01024 {
01025 error = old_pos;
01026 error_code = rge_missing_square_bracket;
01027 return old_pos;
01028 }
01029 for(;;)
01030 {
01031 bool range=false;
01032 if (pos+1<str_len)
01033 {
01034 int pos1 = pos;
01035
01036 _TUCHAR c1 = str[pos1++];
01037 _TUCHAR c2=static_cast<_TUCHAR>(0);
01038 if ((c1==_T('-'))&&(pos1<str_len))
01039 {
01040 c2 = str[pos1++];
01041 if (c2==_T('\\'))
01042 {
01043 pos1 = Escape(pos1, c2);
01044 if (error!=-1) return pos1;
01045 }
01046 if (c2!=_T(']'))
01047 {
01048 #ifdef _UNICODE
01049 if (static_cast<unsigned int>(c)>=static_cast<unsigned int>(c2))
01050 #else
01051 if (c>=c2)
01052 #endif
01053 {
01054 error = err_pos;
01055 error_code = rge_invalid_range;
01056 return pos1;
01057 }
01058 pos = pos1;
01059 bool b = RangeAddRange(chars,start,end,c,c2);
01060 if (!b)
01061 {
01062 error = err_pos;
01063 return pos;
01064 }
01065 range = true;
01066 }
01067 }
01068 }
01069 if (!range)
01070 {
01071 bool b = RangeAddChar(chars, start, end, c);
01072 if (!b)
01073 {
01074 error = err_pos;
01075 return pos;
01076 }
01077 }
01078
01079 if (pos>=str_len)
01080 {
01081 error = old_pos;
01082 error_code = rge_missing_square_bracket;
01083 return pos;
01084 }
01085 err_pos = pos;
01086 c = str[pos++];
01087 if (c==_T(']')) break;
01088 if (c==_T('\\'))
01089 {
01090 pos = Escape(pos, c);
01091 if (error!=-1) return pos;
01092 }
01093 }
01094 CompressRange(chars,start,end);
01095
01096 Put(neg?rg_grn:rg_grp);
01097 PutValue16(chars.size()+1);
01098 PutValue16(start.size()+1);
01099 for(unsigned int i=0;i<chars.size();i++)
01100 {
01101 Put(chars[i]);
01102 }
01103 for(unsigned int i=0;i<start.size();i++)
01104 {
01105 Put(start[i]);
01106 Put(end[i]);
01107 }
01108 return pos;
01109 }
01110
01128 unsigned int CSRegEx::GetRpt(unsigned int buf_idx, int &low, int &high, bool &lazy, unsigned int &prev) const
01129 {
01130 RGTok t = static_cast<RGTok>(buf[buf_idx]);
01131 lazy = false;
01132 switch(t)
01133 {
01134 case rg_qes_lazy:
01135 lazy = true;
01136 case rg_qes:
01137 low = 0;
01138 high = 1;
01139 prev = buf_idx+1-GetValue16(buf_idx+1);
01140 return buf_idx+1+CSREGEX_OFSWIDTH;
01141 case rg_sta_lazy:
01142 lazy = true;
01143 case rg_sta:
01144 low = 0;
01145 high = 0x7fffffff;
01146 prev = buf_idx+1-GetValue16(buf_idx+1);
01147 return buf_idx+1+CSREGEX_OFSWIDTH;
01148 case rg_pls_lazy:
01149 lazy = true;
01150 case rg_pls:
01151 low = 1;
01152 high = 0x7fffffff;
01153 prev = buf_idx+1-GetValue16(buf_idx+1);
01154 return buf_idx+1+CSREGEX_OFSWIDTH;
01155 case rg_rpt_lazy:
01156 lazy = true;
01157 case rg_rpt:
01158 low = GetValue16(buf_idx+1+CSREGEX_OFSWIDTH)-1;
01159 high = GetValue16(buf_idx+1+2*CSREGEX_OFSWIDTH)-1;
01160 if (high==65534) high = 0x7fffffff;
01161 prev = buf_idx+1-GetValue16(buf_idx+1);
01162 return buf_idx+1+3*CSREGEX_OFSWIDTH;
01163 default:
01164 low=1;
01165 high=1;
01166 lazy=false;
01167 return buf_idx;
01168 }
01169 }
01170
01188 bool CSRegEx::MatchRE(const _TUCHAR *str, const _TUCHAR *re)
01189 {
01190 if (!Compile(re)) return false;
01191 return Match(str);
01192 }
01193
01194 #ifndef _UNICODE
01195 bool CSRegEx::MatchRE(const char *str, const char *re)
01196 {
01197 return MatchRE((const unsigned char*)str,(const unsigned char*)re);
01198 }
01199
01200 bool CSRegEx::MatchRE(const unsigned char *str, const char *re)
01201 {
01202 return MatchRE(str,(const unsigned char*)re);
01203 }
01204
01205 bool CSRegEx::MatchRE(const char *str, const unsigned char *re)
01206 {
01207 return MatchRE((const unsigned char*)str,re);
01208 }
01209
01210 bool CSRegEx::Match(const unsigned char *str, const char *cmp)
01211 {
01212 return Match(str,(const unsigned char*)cmp);
01213 }
01214
01215 bool CSRegEx::Match(const char *str, const unsigned char *cmp)
01216 {
01217 return Match((const unsigned char*)str,cmp);
01218 }
01219
01220 bool CSRegEx::Match(const char *str, const char *cmp)
01221 {
01222 return Match((const unsigned char*)str,(const unsigned char*)cmp);
01223 }
01224 #endif
01225
01238 bool CSRegEx::Match(const _TUCHAR *str, const _TUCHAR *cmp)
01239 {
01240 _TUCHAR *buf1 = buf;
01241
01242
01243
01244
01245
01246
01247 buf = const_cast<_TUCHAR*>(cmp);
01248 bool b = Match(str);
01249 buf = buf1;
01250 return b;
01251 }
01252
01253 #ifndef _UNICODE
01254 bool CSRegEx::Match(const char *str)
01255 {
01256 return Match((const unsigned char*)str);
01257 }
01258 #endif
01259
01276 bool CSRegEx::Match(const _TUCHAR *str)
01277 {
01278 int max=1;
01279 if ((buf[0]!=rg_hed)&&(!bMatchHead)) max = _tcslen(reinterpret_cast<const _TCHAR*>(str));
01280
01281 for(int i=0;i<max;i++)
01282 {
01283 if (Match1(str+i))
01284 {
01285 MatchStart += i;
01286 MatchEnd += i;
01287
01288 for(int j=0;j<10;j++)
01289 {
01290 if (match_start[j].size()!=0)
01291 {
01292 BackStart[j] = match_start[j].back()+i;
01293 BackEnd[j] = match_end[j].back()+i;
01294 }
01295 else
01296 {
01297 BackStart[j] = 0;
01298 BackEnd[j] = 0;
01299 }
01300 match_start[j].clear();
01301 match_end[j].clear();
01302 }
01303 par.clear();
01304 return true;
01305 }
01306 }
01307
01308 for(int i=0;i<10;i++)
01309 {
01310 match_start[i].clear();
01311 match_end[i].clear();
01312 }
01313 par.clear();
01314 return false;
01315 }
01316
01317
01318
01319 #define CHECK_MATCH() \
01320 if (cnt>=low) \
01321 { \
01322 \
01323 \
01324 \
01325 if ((lazy)&&(cnt==0)) \
01326 { \
01327 m->rpt = -1; \
01328 idx++; \
01329 if (idx+1>=matches_size) {matches.resize(matches_size*=2); m=&matches[idx-1];} \
01330 m1 = &matches[idx]; \
01331 m1->start = m->end; \
01332 m = m1; \
01333 } \
01334 m->buf_idx = next; \
01335 m->rpt = 0; \
01336 } \
01337 else \
01338 { \
01339 \
01340 if (cnt==-1) cnt = min; \
01341 idx-=(cnt+1); \
01342 state=1; \
01343 }
01344
01345
01356 bool CSRegEx::Match1(const _TUCHAR *str)
01357 {
01358 unsigned int par_index=-1;
01359 int idx=0;
01360 int matches_size = 1024;
01361 bool backtrack=false;
01362
01363
01364 this->str = str;
01365
01366 matches.resize(matches_size);
01367 matches[0].buf_idx=0;
01368 matches[0].start = 0;
01369 matches[0].rpt = 0;
01370
01371 if (buf[0]==rg_hed) matches[0].buf_idx++;
01372
01373 int state = 0;
01374
01375 CSMatch *m1;
01376
01377 while(idx!=-1)
01378 {
01379 CSMatch *m = &matches[idx];
01380 RGTok t = static_cast<RGTok>(buf[m->buf_idx]);
01381 RGTok tt;
01382
01383 unsigned int rpt_idx;
01384 int low, high;
01385 bool lazy;
01386 unsigned int next, prev;
01387
01388 if (state==0)
01389 {
01390 m->end = m->start;
01391
01392 switch(t)
01393 {
01394 case rg_tal:
01395
01396 if (str[m->start]!=_T('\0'))
01397 {
01398 state = 1;
01399 idx--;
01400 break;
01401 }
01402 idx++;
01403 if (idx+1>=matches_size) {matches.resize(matches_size*=2); m=&matches[idx-1];}
01404 m1 = &matches[idx];
01405 m1->buf_idx = m->buf_idx+1;
01406 m1->start = m->end;
01407 m1->rpt = 0;
01408 break;
01409 case rg_chr:
01410 {
01411
01412 rpt_idx = m->buf_idx+2;
01413 next = GetRpt(rpt_idx,low,high,lazy, prev);
01414
01415 int cnt=(backtrack)?-1:0;
01416 unsigned int min=(backtrack)?m->rpt:0;
01417 unsigned int max=(lazy)?((backtrack)?m->rpt+1:low):high;
01418 for(unsigned int i=min;i<max;i++)
01419 {
01420
01421 if (str[m->start]!=buf[m->buf_idx+1]) break;
01422 cnt=i+1;
01423 m->end++;
01424 idx++;
01425 if (idx+1>=matches_size) {matches.resize(matches_size*=2); m=&matches[idx-1];}
01426 m1 = &matches[idx];
01427 m1->buf_idx = m->buf_idx;
01428 m1->start = m->end;
01429 m1->end = m1->start;
01430 m1->rpt = m->rpt+1;
01431 m = m1;
01432 }
01433 CHECK_MATCH();
01434 break;
01435 }
01436 case rg_str:
01437 {
01438
01439 unsigned int cnt = GetValue16(m->buf_idx+1);
01440 for(unsigned int i=0;i<cnt;i++)
01441 {
01442 if (str[m->end++]!=buf[m->buf_idx+1+CSREGEX_OFSWIDTH+i])
01443 {
01444 state = 1;
01445 idx--;
01446 break;
01447 }
01448 }
01449 if (state==1) break;
01450 idx++;
01451 if (idx+1>=matches_size) {matches.resize(matches_size*=2); m=&matches[idx-1];}
01452 m1 = &matches[idx];
01453 m1->buf_idx = m->buf_idx+1+CSREGEX_OFSWIDTH+cnt;
01454 m1->start = m->end;
01455 m1->rpt = 0;
01456 break;
01457 }
01458 case rg_dot:
01459 {
01460 rpt_idx = m->buf_idx+1;
01461 next = GetRpt(rpt_idx,low,high,lazy,prev);
01462
01463 int cnt=(backtrack)?-1:0;
01464 unsigned int min=(backtrack)?m->rpt:0;
01465 unsigned int max=(lazy)?((backtrack)?m->rpt+1:low):high;
01466 for(unsigned int i=min;i<max;i++)
01467 {
01468 if (str[m->start]==_T('\0')) break;
01469 cnt=i+1;
01470 m->end++;
01471 idx++;
01472 if (idx+1>=matches_size) {matches.resize(matches_size*=2); m=&matches[idx-1];}
01473 m1 = &matches[idx];
01474 m1->buf_idx = m->buf_idx;
01475 m1->start = m->end;
01476 m1->end = m1->start;
01477 m1->rpt = m->rpt+1;
01478 m = m1;
01479 }
01480 CHECK_MATCH();
01481 break;
01482 }
01483 case rg_bck:
01484 case rg_bck1: case rg_bck2: case rg_bck3:
01485 case rg_bck4: case rg_bck5: case rg_bck6:
01486 case rg_bck7: case rg_bck8: case rg_bck9:
01487 {
01488
01489 rpt_idx = m->buf_idx+1;
01490 next = GetRpt(rpt_idx,low,high,lazy,prev);
01491
01492 int cnt=(backtrack)?-1:0;
01493 unsigned int min=(backtrack)?m->rpt:0;
01494 unsigned int max=(lazy)?((backtrack)?m->rpt+1:low):high;
01495 unsigned int back_idx = t-rg_bck;
01496 unsigned int start = match_start[back_idx].back();
01497 unsigned int end = match_end[back_idx].back();
01498 if (start==end) max = low;
01499 for(unsigned int i=min;i<max;i++)
01500 {
01501 bool bok = true;
01502 for(unsigned int j=start;j<end;j++)
01503 {
01504 if (str[j]!=str[m->end++])
01505 {
01506 bok = false;
01507 break;
01508 }
01509 }
01510 if (!bok) break;
01511 cnt=i+1;
01512 idx++;
01513 if (idx+1>=matches_size) {matches.resize(matches_size*=2); m=&matches[idx-1];}
01514 m1 = &matches[idx];
01515 m1->buf_idx = m->buf_idx;
01516 m1->start = m->end;
01517 m1->end = m1->start;
01518 m1->rpt = m->rpt+1;
01519 m = m1;
01520 }
01521 CHECK_MATCH();
01522 break;
01523 }
01524 case rg_grp:
01525 case rg_grn:
01526 {
01527
01528 unsigned int char_cnt = GetValue16(m->buf_idx+1)-1;
01529 unsigned int rng_cnt = GetValue16(m->buf_idx+1+CSREGEX_OFSWIDTH)-1;
01530 _TUCHAR c = str[m->start];
01531
01532 rpt_idx = m->buf_idx+1+2*CSREGEX_OFSWIDTH+char_cnt+2*rng_cnt;
01533 next = GetRpt(rpt_idx,low,high,lazy,prev);
01534
01535 int cnt=(backtrack)?-1:0;
01536 unsigned int min=(backtrack)?m->rpt:0;
01537 unsigned int max=(lazy)?((backtrack)?m->rpt+1:low):high;
01538
01539 if (c!=_T('\0'))
01540 {
01541 for(unsigned int i=min;i<max;i++)
01542 {
01543 c = str[m->start];
01544 if (c==_T('\0')) break;
01545 bool found = false;
01546 unsigned int pos = m->buf_idx+1+2*CSREGEX_OFSWIDTH;
01547 for(unsigned int j=0;j<char_cnt;j++)
01548 {
01549 if (c==buf[pos++])
01550 {
01551 found = true;
01552 break;
01553 }
01554 }
01555 if (!found)
01556 {
01557 #ifdef _UNICODE
01558 unsigned int xc = static_cast<unsigned int>(c);
01559 #else
01560 #define xc c
01561 #define xlow1 low1
01562 #define xhigh1 high1
01563 #endif
01564 for(unsigned int j=0;j<rng_cnt;j++)
01565 {
01566 _TUCHAR low1 = buf[pos++];
01567 _TUCHAR high1 = buf[pos++];
01568 #ifdef _UNICODE
01569 unsigned int xlow1 = static_cast<unsigned int>(low1);
01570 unsigned int xhigh1 = static_cast<unsigned int>(high1);
01571 #endif
01572 if ((xc>=xlow1)&&(xc<=xhigh1))
01573 {
01574 found = true;
01575 break;
01576 }
01577 }
01578 }
01579 #ifndef _UNICODE
01580 #undef xc
01581 #undef xlow1
01582 #undef xhigh1
01583 #endif
01584
01585 bool b = (t==rg_grn);
01586 if (found==b) break;
01587 cnt=i+1;
01588 idx++;
01589 if (idx+1>=matches_size) {matches.resize(matches_size*=2); m=&matches[idx-1];}
01590 m1 = &matches[idx];
01591 m1->buf_idx = m->buf_idx;
01592 m->end++;
01593 m1->start = m->end;
01594 m1->end = m1->start;
01595 m1->rpt = m->rpt+1;
01596 m = m1;
01597 }
01598 }
01599 CHECK_MATCH();
01600 break;
01601 }
01602 case rg_opa:
01603 case rg_opa1: case rg_opa2: case rg_opa3:
01604 case rg_opa4: case rg_opa5: case rg_opa6:
01605 case rg_opa7: case rg_opa8: case rg_opa9:
01606 case rg_opn:
01607 {
01608 par_index++;
01609 if (par_index+1>par.size()) par.resize(par_index+1);
01610 par[par_index].match_idx = idx;
01611 par[par_index].buf_idx = m->buf_idx;
01612
01613
01614 idx++;
01615 if (idx+1>=matches_size) {matches.resize(matches_size*=2); m=&matches[idx-1];}
01616 m1 = &matches[idx];
01617 m1->buf_idx = m->buf_idx+1+CSREGEX_OFSWIDTH;
01618 m1->start = m->end;
01619 m1->rpt = 0;
01620
01621
01622 if (m->rpt==0)
01623 {
01624 unsigned int pos = m->buf_idx+1+GetValue16(m->buf_idx+1);
01625 tt = static_cast<RGTok>(buf[pos]);
01626 while(tt==rg_orm)
01627 {
01628 pos += GetValue16(pos+1)+1;
01629 tt = static_cast<RGTok>(buf[pos]);
01630 }
01631 pos++;
01632 int n = GetRpt(pos,low,high,lazy,prev);
01633 if ((lazy)&&(low==0))
01634 {
01635
01636 m->end = m->start;
01637 m->rpt = -1;
01638 idx++;
01639 if (idx+1>=matches_size) {matches.resize(matches_size*=2); m=&matches[idx-1];}
01640 m1 = &matches[idx];
01641 m1->buf_idx = n;
01642 m1->start = m->buf_idx;
01643 m1->end = par[par_index].match_idx;
01644 tt = (RGTok)buf[m->buf_idx];
01645 if ((tt>=rg_opa)&&(tt<=rg_opa9))
01646 {
01647 unsigned int k = tt-rg_opa;
01648 match_start[k].push_back(m->start);
01649 match_end[k].push_back(m->end);
01650 }
01651 par_index--;
01652 m1->rpt = -1;
01653 idx++;
01654 if (idx+1>=matches_size) {matches.resize(matches_size*=2); m=&matches[idx-2];}
01655 CSMatch *m2 = &matches[idx];
01656 m2->buf_idx = n;
01657 m2->start = m->start;
01658 m2->rpt = 0;
01659 }
01660 }
01661 break;
01662 }
01663 case rg_cpa:
01664 case rg_cpa1: case rg_cpa2: case rg_cpa3:
01665 case rg_cpa4: case rg_cpa5: case rg_cpa6:
01666 case rg_cpa7: case rg_cpa8: case rg_cpa9:
01667 case rg_cpn:
01668 {
01669 int k = par[par_index].match_idx;
01670 matches[k].end = m->start;
01671 m->rpt = matches[k].rpt;
01672 m->start = par[par_index].buf_idx;
01673 m->end = k;
01674 if (t!=rg_cpn)
01675 {
01676 int k1 = t-rg_cpa;
01677 match_start[k1].push_back(matches[k].start);
01678 match_end[k1].push_back(matches[k].end);
01679 }
01680 idx++;
01681 if (idx+1>=matches_size) {matches.resize(matches_size*=2); m=&matches[idx-1];}
01682 m1 = &matches[idx];
01683 m1->buf_idx = m->buf_idx+1;
01684 m1->start = matches[k].end;
01685 m1->rpt = 0;
01686 par_index--;
01687 break;
01688 }
01689 case rg_orm:
01690
01691 {
01692 unsigned int ofs = GetValue16(m->buf_idx+1);
01693 unsigned int pos = m->buf_idx+1+ofs;
01694 tt = static_cast<RGTok>(buf[pos]);
01695 while(tt==rg_orm)
01696 {
01697 ofs = GetValue16(pos+1);
01698 pos += ofs+1;
01699 tt = static_cast<RGTok>(buf[pos]);
01700 }
01701 m->buf_idx=pos;
01702 break;
01703 }
01704 case rg_pls:
01705 case rg_sta:
01706 case rg_qes:
01707 case rg_rpt:
01708 case rg_pls_lazy:
01709 case rg_sta_lazy:
01710 case rg_qes_lazy:
01711 case rg_rpt_lazy:
01712 {
01713
01714 next = GetRpt(m->buf_idx,low,high,lazy,prev);
01715 CSMatch *m2 = &matches[idx-1];
01716 m1 = &matches[m2->end];
01717 int rpt = m2->rpt+1;
01718 if ((rpt==high)||((lazy)&&(rpt>=low)))
01719 {
01720
01721 m->buf_idx = next;
01722 break;
01723 }
01724 else if (rpt==0)
01725 {
01726
01727
01728 idx--;
01729 state = 1;
01730 break;
01731 }
01732
01733 m->buf_idx = prev;
01734 m->rpt = m1->rpt+1;
01735 break;
01736 }
01737 case rg_end:
01738
01739 MatchStart = match_start[0].back();
01740 MatchEnd = match_end[0].back();
01741 return true;
01742 }
01743 backtrack = false;
01744 }
01745 else
01746 {
01747
01748
01749
01750
01751
01752
01753
01754
01755 unsigned int next = Next(m->buf_idx);
01756 unsigned int n = GetRpt(next,low,high,lazy,prev);
01757 tt = static_cast<RGTok>(buf[m->buf_idx]);
01758
01759 if ((tt==rg_cpn)||((tt>=rg_cpa)&&(tt<=rg_cpa9)))
01760 {
01761 if ((lazy)&&(m->rpt+1<high))
01762 {
01763 if (m->rpt==-1)
01764 {
01765
01766 idx--;
01767 m = &matches[idx];
01768 m->rpt = 0;
01769 }
01770 else
01771 {
01772
01773 idx++;
01774 if (idx+1>=matches_size) {matches.resize(matches_size*=2); m=&matches[idx-1];}
01775 m1 = &matches[idx];
01776 CSMatch *m2 = &matches[m->end];
01777 m1->buf_idx = prev;
01778 m1->start = m2->end;
01779 m1->rpt = m2->rpt+1;
01780 }
01781 state = 0;
01782 }
01783 else
01784 {
01785
01786
01787 if (m->rpt==-1)
01788 {
01789
01790
01791 idx-=2;
01792 }
01793 else
01794 {
01795 par_index++;
01796 if (par_index+1>par.size()) par.resize(par_index);
01797 par[par_index].buf_idx = m->start;
01798 par[par_index].match_idx = m->end;
01799 idx--;
01800 }
01801 if (tt!=rg_cpn)
01802 {
01803 int k = tt-rg_cpa;
01804 match_start[k].pop_back();
01805 match_end[k].pop_back();
01806 }
01807 }
01808 }
01809 else if ((tt==rg_orm)||(tt==rg_opn)||((tt>=rg_opa)&&(tt<=rg_opa9)))
01810 {
01811
01812 m->buf_idx = m->buf_idx+1+GetValue16(m->buf_idx+1);
01813 tt = static_cast<RGTok>(buf[m->buf_idx]);
01814 if (tt==rg_orm)
01815 {
01816
01817 idx++;
01818 if (idx+1>=matches_size) {matches.resize(matches_size*=2); m=&matches[idx-1];}
01819 m1 = &matches[idx];
01820 m1->buf_idx = m->buf_idx+1+CSREGEX_OFSWIDTH;
01821 m1->start = m->start;
01822 m1->rpt = 0;
01823 state = 0;
01824 }
01825 else
01826 {
01827
01828
01829 n = GetRpt(Next(m->buf_idx),low,high,lazy,prev);
01830 if (lazy)
01831 {
01832
01833
01834
01835
01836
01837
01838
01839
01840
01841
01842
01843
01844
01845
01846
01847
01848
01849 par_index--;
01850 idx--;
01851 if (m->rpt!=0)
01852 {
01853 m = &matches[idx];
01854 par_index++;
01855 if (par_index+1>par.size()) par.resize(par_index);
01856 par[par_index].buf_idx = m->start;
01857 par[par_index].match_idx = m->end;
01858 if (tt!=rg_cpn)
01859 {
01860 int k = tt-rg_cpa;
01861 match_start[k].pop_back();
01862 match_end[k].pop_back();
01863 }
01864 idx--;
01865 }
01866 }
01867 else if (m->rpt<low)
01868 {
01869
01870 par_index--;
01871 idx--;
01872 }
01873 else if ((m->rpt==0)&&(low==0))
01874 {
01875
01876
01877
01878
01879
01880
01881 m->end = m->start;
01882 m->rpt = -1;
01883
01884 idx++;
01885 if (idx+1>=matches_size) {matches.resize(matches_size*=2); m=&matches[idx-1];}
01886 m1 = &matches[idx];
01887 m1->buf_idx = m->buf_idx;
01888 m1->start = par[par_index].buf_idx;
01889 m1->end = par[par_index].match_idx;
01890 tt = static_cast<RGTok>(buf[par[par_index].buf_idx]);
01891 if ((tt>=rg_opa)&&(tt<=rg_opa9))
01892 {
01893 unsigned int k = tt-rg_opa;
01894 match_start[k].push_back(m->start);
01895 match_end[k].push_back(m->end);
01896 }
01897 par_index--;
01898 m1->rpt = -1;
01899
01900
01901 idx++;
01902 if (idx+1>=matches_size) {matches.resize(matches_size*=2); m=&matches[idx-2];}
01903 CSMatch *m2 = &matches[idx];
01904 m2->buf_idx = n;
01905 m2->start = m->start;
01906 m2->rpt = 0;
01907 state = 0;
01908 }
01909 else
01910 {
01911
01912
01913 par_index--;
01914 m1 = &matches[idx-1];
01915 m->buf_idx = GetRpt(Next(m1->buf_idx),low,high,lazy,prev);
01916 m1 = &matches[m1->end];
01917 m->start = m1->end;
01918 m->rpt = 0;
01919 state = 0;
01920 }
01921 }
01922 }
01923 else if ((lazy)&&(m->rpt+1==high))
01924 {
01925
01926 idx-=(m->rpt+1);
01927 }
01928 else if ((lazy)&&(m->rpt+1>=low))
01929 {
01930 if (m->rpt==-1)
01931 {
01932
01933
01934 m->rpt = 0;
01935 }
01936 else
01937 {
01938
01939 idx++;
01940 if (idx+1>=matches_size) {matches.resize(matches_size*=2); m=&matches[idx-1];}
01941 m1 = &matches[idx];
01942 m1->buf_idx = m->buf_idx;
01943 m1->start = m->end;
01944 m1->rpt = m->rpt+1;
01945 }
01946 backtrack = true;
01947 state = 0;
01948 }
01949 else if (m->rpt<low)
01950 {
01951
01952
01953 idx--;
01954 }
01955 else if ((m->rpt==0)&&(low==0))
01956 {
01957
01958
01959
01960
01961
01962 m->end = m->start;
01963 m->rpt = -1;
01964
01965 idx++;
01966 if (idx+1>=matches_size) {matches.resize(matches_size*=2); m=&matches[idx-1];}
01967 m1 = &matches[idx];
01968 m1->buf_idx = n;
01969 m1->start = m->end;
01970 m1->rpt = 0;
01971 state = 0;
01972 }
01973 else
01974 {
01975
01976
01977 m1 = &matches[idx-1];
01978 m->buf_idx = GetRpt(Next(m1->buf_idx),low,high,lazy,prev);
01979 m->start = m1->end;
01980 m->rpt = 0;
01981 state = 0;
01982 }
01983 }
01984 }
01985 return false;
01986 }
01987
01994 unsigned int CSRegEx::Next(unsigned int pos) const
01995 {
01996 RGTok t = static_cast<RGTok>(buf[pos]);
01997
01998 unsigned int cpy = t;
01999 cpy>>=5;
02000 #ifdef _UNICODE
02001
02002
02003
02004
02005
02006
02007
02008
02009
02010
02011
02012
02013
02014
02015 cpy = (cpy+1+(t==rg_chr))>>1;
02016 #endif
02017 if (cpy==0)
02018 {
02019 if (t==rg_str)
02020 {
02021 cpy = GetValue16(pos+1)+1+CSREGEX_OFSWIDTH;
02022 }
02023 else if ((t==rg_grp)||(t==rg_grn))
02024 {
02025 cpy = GetValue16(pos+1)-1+2*(GetValue16(pos+1+CSREGEX_OFSWIDTH)-1)+1+2*CSREGEX_OFSWIDTH;
02026 }
02027 }
02028 return pos+cpy;
02029 }