Jump to content
Janilabo

Find

Recommended Posts

Purpose/Functionality: Returns FIND_STR position in TEXT.

Supported Methods: mmAll, mmBackward, mmIgnoreCase, mmOverlap, mmWholeWords and mmStrictWW (Strict Whole Words) [TMatchMethod's]

 

This was 1 of my hardest functions to write... Took me nearly 4 days to get it working correctly (rewrote it couple times, few major massive structure changes, working around the regex, etc).

 

Included a small test with it. :)

 

const
 TEXT = 'TestesTESTTestest test TEST Test Test.';
 FIND_STR = 'test'; 

type
 TMatchMethod = (mmAll, mmBackward, mmIgnoreCase, mmOverlap, mmWholeWords, mmStrictWW);
 TMatchMethods = set of TMatchMethod; 

function Find(Text, FindStr: string; Methods: TMatchMethods; Offset: Integer): TIntArray;
var
 tL, fsL, d, i, t: Integer;
 rgx_fs: string;            
 r: TRegexMatchArray;
begin
 fsL := Length(FindStr);
 tL := Length(Text);
 if ((tL < 1) or (Length(FindStr) > tL) or (FindStr = '')) then
   Exit;             
 if (Offset < 1) then
   Offset := 1;     
 FindStr := PregQuote(FindStr);  
 if (mmWholeWords in Methods) then
 begin
   if (mmStrictWW in Methods) then  
     rgx_fs := '/(?<!\S)' + FindStr + '(?!\S)/'
   else
     rgx_fs := '/(^|\(?<=\s))' + FindStr + '((?=\s)|\$)/';
 end else
   rgx_fs := '/' + FindStr + '/';
 if (mmIgnoreCase in Methods) then
   rgx_fs := (rgx_fs + 'i');
 rgx_fs := (rgx_fs + 'm');
 if (mmOverlap in Methods) then
   SetLength(Result, (tL - (fsL - 1)))
 else
   SetLength(Result, ((tL div fsL) + 1));         
 case (mmBackward in Methods) of 
   True:
   begin
     Text := Copy(Text, 1, Offset); 
     while PregMatchEx(rgx_fs, Text, r) do
     begin
       Result[d] := (r[0].Offset + d); 
       Inc(d);
       Delete(Text, r[0].Offset, 1);
       SetLength(r, 0);
     end;         
     SetLength(Result, d);    
     if (mmAll in Methods) then   
       InvertTIA(Result)              
     else
       if (d > 0) then                         
         Result := [Result[(d - 1)]];
     if (mmAll in Methods) then
       if not (mmOverlap in Methods) then
         if (d > 1) then
           for i := (0 + t) to ((d - 2) - t) do
             if ((Result[i] - Result[(i + 1)]) <= fsL) then
             begin       
               Result[(i + 1)] := Result[i]; 
               Delete(Result, i, 1);
               Inc(t);
             end;  
   end;
   False:                    
   begin    
     if (Offset > 1) then
       Delete(Text, 1, (Offset - 1));
     if (mmAll in Methods) then 
     begin    
       while PregMatchEx(rgx_fs, Text, r) do
       begin
         Result[d] := ((r[0].Offset + d) + (Offset - 1)); 
         Inc(d);
         Delete(Text, r[0].Offset, 1);
         SetLength(r, 0);
       end; 
       SetLength(Result, d);
       if (mmAll in Methods) then
         if not (mmOverlap in Methods) then
           if (d > 1) then
             for i := (0 + t) to ((d - 2) - t) do
               if ((Result[(i + 1)] - Result[i]) <= fsL) then
               begin         
                 Result[(i + 1)] := Result[i]; 
                 Delete(Result, i, 1);
                 Inc(t);
               end; 
     end else
     if PregMatchEx(rgx_fs, Text, r) then
     begin
       Result := [((r[0].Offset + d) + (Offset - 1))];
       SetLength(r, 0);
     end;
   end;
 end;
 TIAUnique(Result);      
end;

var
 l, h, i, i2, s, o: Integer;
 TIA: TIntArray;
 m_sets: array of TMatchMethods;
 TSA: TStrArray;

procedure BuildSets(find_method: (fmBackward, fmForward));
begin
 case find_method of
   fmForward:
   begin
     TSA := ['[]', '[mmIgnoreCase]', '[mmIgnoreCase, mmAll]', '[mmIgnoreCase, mmAll, mmOverlap]', '[mmIgnoreCase, mmAll, mmOverlap, mmWholeWords]', '[mmIgnoreCase, mmAll, mmOverlap, mmWholeWords, mmStrictWW]'];
     l := Length(FIND_STR);
     o := 1;   
     s := High(TSA); 
     SetLength(m_sets, (s + 1));
     m_sets[0] := [];
     m_sets[1] := [mmIgnoreCase];
     m_sets[2] := [mmIgnoreCase, mmAll];
     m_sets[3] := [mmIgnoreCase, mmAll, mmOverlap];
     m_sets[4] := [mmIgnoreCase, mmAll, mmOverlap, mmWholeWords];
     m_sets[5] := [mmIgnoreCase, mmAll, mmOverlap, mmWholeWords, mmStrictWW];  
   end;
   fmBackward:
   begin
     TSA := ['[mmBackward]', '[mmBackward, mmIgnoreCase]', '[mmBackward, mmIgnoreCase, mmAll]', '[mmBackward, mmIgnoreCase, mmAll, mmOverlap]', '[mmBackward, mmIgnoreCase, mmAll, mmOverlap, mmWholeWords]', '[mmBackward, mmIgnoreCase, mmAll, mmOverlap, mmWholeWords, mmStrictWW]'];
     l := Length(FIND_STR);
     o := Length(TEXT);
     s := High(TSA); 
     SetLength(m_sets, (s + 1));
     m_sets[0] := [mmBackward];
     m_sets[1] := [mmBackward, mmIgnoreCase];
     m_sets[2] := [mmBackward, mmIgnoreCase, mmAll];
     m_sets[3] := [mmBackward, mmIgnoreCase, mmAll, mmOverlap];
     m_sets[4] := [mmBackward, mmIgnoreCase, mmAll, mmOverlap, mmWholeWords];
     m_sets[5] := [mmBackward, mmIgnoreCase, mmAll, mmOverlap, mmWholeWords, mmStrictWW];  
   end;    
 end;
end;

begin                
 ClearDebug;                              
 BuildSets(fmForward);      
 WriteLn('FORWARD:');
 for i := 0 to s do
 begin
   TIA := Find(TEXT, FIND_STR, m_sets[i], o);
   h := High(TIA);   
   WriteLn('Find(''' + FIND_STR + ''', ''' + TEXT + ''', ' + TSA[i] + ', ' + IntToStr(o) + ')')
   for i2 := 0 to h do
     WriteLn('Match[' + IntToStr(i2 + 1) + ']: ' + Copy(TEXT, TIA[i2], l) + ' (@POS.' + IntToStr(TIA[i2]) + ')');
   WriteLn('');
   SetLength(TIA, 0);
 end;                 
 BuildSets(fmBackward)
 WriteLn('BACKWARD:'); 
 for i := 0 to s do
 begin
   TIA := Find(TEXT, FIND_STR, m_sets[i], o);
   h := High(TIA);   
   WriteLn('Find(''' + FIND_STR + ''', ''' + TEXT + ''', ' + TSA[i] + ', ' + IntToStr(o) + ')')
   for i2 := 0 to h do
     WriteLn('Match[' + IntToStr(i2 + 1) + ']: ' + Copy(TEXT, TIA[i2], l) + ' (@POS.' + IntToStr(TIA[i2]) + ')');
   if (i < s) then 
     WriteLn('');
   SetLength(TIA, 0);
 end;
 SetLength(m_sets, 0);
 SetLength(TSA, 0);       
end.

Edited by Janilabo
Added TIAUnique to get rid of duplicate positions.
Link to comment
Share on other sites

So, essentially this is a more extended version of the FindStr you've been using in ReplaceEx?

Looks good!

Yep, exactly. Well, of course with an exception that this function is only used for finding the str positions inside text, whereas ReplaceEx is used for replacing.

 

I am working on ReplaceEx2 right now, which will be based on those TTextMethods. :)

 

Thanks, MarkD!

Link to comment
Share on other sites

Join the conversation

You can post now and register later. If you have an account, sign in now to post with your account.

Guest
Reply to this topic...

×   Pasted as rich text.   Paste as plain text instead

  Only 75 emoji are allowed.

×   Your link has been automatically embedded.   Display as a link instead

×   Your previous content has been restored.   Clear editor

×   You cannot paste images directly. Upload or insert images from URL.



×
  • Create New...