Jump to content
  • Sky
  • Blueberry
  • Slate
  • Blackcurrant
  • Watermelon
  • Strawberry
  • Orange
  • Banana
  • Apple
  • Emerald
  • Chocolate
  • Charcoal
sjesper

CountWords

Recommended Posts

Return the number of words in the given string

[sCAR]

function CountWords(S: String): Integer;

var

Len, i, ii, Index: Integer;

S2: String;

TSA: TStringArray;

begin

TSA := Explode(' ', S);

for ii := 0 to high(TSA) do begin

for i := low(TSA) to high(TSA) do begin

Index := Pos(TSA, S);

Len := PosEx(' ', S, Index);

S2 := Copy(S, Len + 1, 1);

if S2 = ' ' then

Delete(S, Len, 1);

end;

end;

result := length(Explode(' ', S));

for i := 1 to Length(S) do begin

if i = 1 then

if not StartsWith(' ', S) then

exit;

if PosEx(' ', S, i) = i then begin

result := result - 1;

end else begin

break;

end;

end;

end;

 

begin

writeln('The number of words: ' + IntToStr(CountWords(' Hello there ')));

end.[/sCAR]

Share this post


Link to post
Share on other sites

Hi sjesper,

 

You could also just do:

[scar]function CountWords(const S: String): Integer;

begin

Result := Length(Explode(#32, Trim(S)));

end;[/scar]

 

:)

Share this post


Link to post
Share on other sites

Ello sjesper & Daniel!

 

Small bump over here, as I wrote some alternative functions for this:

 

function GetWords(str: string): TStrArray;
var
 h, i, r: Integer;
begin
 Result := Explode(' ', Replace(Replace(str, #10, #13), #13, ' '));
 h := High(Result); 
 for i := 0 to h do  
   if (Result[i] <> '') then 
   begin 
     Result[r] := Result[i];
     Inc(r);
   end;
 SetLength(Result, r);
end;

function GetWordsR(str: string): TStrArray;
begin
 Result := Explode(#32, PregReplace('/\s{2,}/', ' ', Trim(str)));
end;

function CountWords(str: string): Integer;
var
 h, i: Integer;
 tmp: TStrArray;
begin
 tmp := Explode(' ', Replace(Replace(str, #10, #13), #13, ' '));
 h := High(tmp); 
 Result := (h + 1);
 for i := 0 to h do  
   if (tmp[i] = '') then 
     Dec(Result);
 if (h > -1) then  
   SetLength(tmp, 0);
end;

function CountWordsR(str: string): Integer;
begin
 Result := Length(Explode(#32, PregReplace('/\s{2,}/', ' ', Trim(str))));
end;

var
 text, tmp: string;
 words: TStrArray;
 h, i: Integer;

begin
 ClearDebug;
 text := '  This  is  a    TEST! ' + #13#10 +
         'Lets count some words, yay! ' + #13#10 +
         ' MULTILINE WORKS, TOO.';
 WriteLn('CountWords: ' + IntToStr(CountWords(text)));
 WriteLn('CountWordsR: ' + IntToStr(CountWordsR(text)));
 tmp := 'GetWords: ';
 words := GetWords(text);
 h := High(words);
 for i := 0 to h do
   if (i < h) then
     tmp := (tmp + '"' + (words[i]) + '", ')
   else
     tmp := (tmp + '"' + (words[i]) + '"');
 WriteLn(tmp);
 tmp := 'GetWordsR: ';
 words := GetWordsR(text);
 h := High(words);
 for i := 0 to h do
   if (i < h) then
     tmp := (tmp + '"' + (words[i]) + '", ')
   else
     tmp := (tmp + '"' + (words[i]) + '"');
 WriteLn(tmp);
end.

 

Not sure if anyone needs em, but I wanted to release em here anyways, so there you go. :)

 

-Jani

Edited by Janilabo
Tweaked GetWords & CountWords

Share this post


Link to post
Share on other sites

Hello Janilabo, sjesper and Daniel!

 

I just had to show you guys how this is REALLY done.. (with regular expression):

 

function PregMatchAll(pattern, str:string): TStrArray;
var
 Match: Boolean;
 Matches: TRegexMatchArray;  
 CurpossE : Integer;
begin
 match := True;
 while(match = True) do 
 begin  
   match := PregMatchEx(pattern, str, Matches);
   if match then                                
   begin                                                
     CurpossE :=  Length(Matches[0].MatchedText) + Matches[0].OffSet
     str := Right(str, Length(str) - CurpossE);   
     SetLength(Result, Length(Result)+1);
     Result[High(Result)] := Matches[0].MatchedText;
   end;
 end;    
end;

function CountWords(str: string): Integer;
var
 words: TStrArray;
begin
 words := PregMatchAll('/(\w+)/', str);
 Result := Length(Words);
end;

var
 mystr: string;

begin
 mystr := 'Ost World1 Amf#World2,but hello there.Are we        friends?';
 WriteLn(CountWords(mystr));
end.

 

A nifty thing you might take out of this is a PregMatchAll-function (finds ALL matches of a given string that fits your pattern)! :)

Edited by slacky

Share this post


Link to post
Share on other sites
I just had to show you guys how this is REALLY done..:

 

function PregMatchAll(pattern, str:string): TStrArray;
var
 Match: Boolean;
 Matches: TRegexMatchArray;  
 CurpossE : Integer;
begin
 match := True;
 while(match = True) do 
 begin  
   match := PregMatchEx(pattern, str, Matches);
   if match then                                
   begin                                                
     CurpossE :=  Length(Matches[0].MatchedText) + Matches[0].OffSet
     str := Right(str, Length(str) - CurpossE);   
     SetLength(Result, Length(Result)+1);
     Result[High(Result)] := Matches[0].MatchedText;
   end;
 end;    
end;

function CountWords(str: string): Integer;
var
 words: TStrArray;
begin
 words := PregMatchAll('/(\w+)/im', str);
 Result := Length(Words);
end;

var
 mystr: string;

begin
 mystr := 'Ost World1 Amf#World2,but hello there.Are we        friends?';
 WriteLn(CountWords(mystr));
end.

 

A nifty thing you might take out of this is a PregMatchAll-function (finds ALL matches of a given string that fits your pattern)! :)

It runs smoothly.

 

Nice job slacky!

 

I LIKE IT! :P

Share this post


Link to post
Share on other sites

Technically, regular expressions are more for convenience than performance, a custom algorithm for a simple case like this will probably perform better than bulky regular expressions, while still being fairly easy to construct. But in SCAR of course you have to take into account the performance of the script engine as well, so in that case, regular expressions would indeed be the way to go.

Share this post


Link to post
Share on other sites
Hello Janilabo, sjesper and Daniel!

 

I just had to show you guys how this is REALLY done.. (with regular expression):

 

function PregMatchAll(pattern, str:string): TStrArray;
var
 Match: Boolean;
 Matches: TRegexMatchArray;  
 CurpossE : Integer;
begin
 match := True;
 while(match = True) do 
 begin  
   match := PregMatchEx(pattern, str, Matches);
   if match then                                
   begin                                                
     CurpossE :=  Length(Matches[0].MatchedText) + Matches[0].OffSet
     str := Right(str, Length(str) - CurpossE);   
     SetLength(Result, Length(Result)+1);
     Result[High(Result)] := Matches[0].MatchedText;
   end;
 end;    
end;

function CountWords(str: string): Integer;
var
 words: TStrArray;
begin
 words := PregMatchAll('/(\w+)/', str);
 Result := Length(Words);
end;

var
 mystr: string;

begin
 mystr := 'Ost World1 Amf#World2,but hello there.Are we        friends?';
 WriteLn(CountWords(mystr));
end.

 

A nifty thing you might take out of this is a PregMatchAll-function (finds ALL matches of a given string that fits your pattern)! :)

 

Your algorithms after some tiny changes:

 

function PregMatchAll(pattern, str: string): TStrArray;
var
 matches: TRegexMatchArray;  
 CurpossE: Integer;
begin
 while PregMatchEx(pattern, str, matches) do 
 begin                                                 
   CurpossE := (Length(matches[0].MatchedText) + Matches[0].OffSet);
   str := Right(str, (Length(str) - CurpossE));   
   SetLength(Result, (Length(Result) + 1));
   Result[High(Result)] := matches[0].MatchedText;
 end;    
end;

function GetWords(str: string): TStrArray;
begin
 Result := PregMatchAll('/(\w+)/im', str);
end;

function CountWords(str: string): Integer;
begin
 Result := Length(PregMatchAll('/(\w+)/im', str));
end;

var
 mystr: string;
 words: TStrArray; 
 h, i: Integer;

begin
 mystr := 'Ost World1 Amf#World2,but hello there.Are we        friends?';
 WriteLn('CountWords: ' + IntToStr(CountWords(mystr)));
 words := GetWords(mystr);
 mystr := 'GetWords: ';
 h := High(words);
 for i := 0 to h do
   if (i < h) then
     mystr := (mystr + '"' + words[i] + '", ')
   else
     mystr := (mystr + '"' + words[i] + '"');  
 WriteLn(mystr);
end.

 

+Example of course..

 

Works well. :)

Share this post


Link to post
Share on other sites

Looks a lot smoother with them changes to PregMatchAll! :) Hope you, or someone will put it in there include, I've often found cases where I needed A PregMatchAll-kind of function.. I've ended up avoding am due to me being to lazy to create a function like that (until now) :)

Share this post


Link to post
Share on other sites
Looks a lot smoother with them changes to PregMatchAll! :) Hope you, or someone will put it in there include, I've often found cases where I needed A PregMatchAll-kind of function.. I've ended up avoding am due to me being to lazy to create a function like that (until now) :)
I think it's very useful little function.

So, I added it to MSSL, included those credits for you man!

Share this post


Link to post
Share on other sites

Couple more algorithms to dump here, non-regex ways:

 

const
 TEST_TEXT = 'Ost World1 Amf#World2,but hello there.Are we        friends?';

{==============================================================================]
 Explanation: Returns the words from text as TStringArray.
[==============================================================================}
function GetWords(text: string): TStringArray;
var
 l, i, r: Integer;
 w: string;
begin
 l := Length(text);
 if (l > 0) then
 begin
   w := 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_';
   SetLength(Result, l);
   for i := 1 to l do
     if (Pos(text[i], w) > 0) then
     begin
       Result[r] := text[i];
       for i := (i + 1) to l do
         if (Pos(text[i], w) > 0) then
           Result[r] := (Result[r] + text[i])
         else
           Break;
       Inc(r);
     end;
 end;
 SetLength(Result, r);
end;

{==============================================================================]
 Explanation: Returns the words from text as TStringArray.
              Supports custom character set (wordCharacters)
[==============================================================================}
function GetWordsEx(text, wordCharacters: string): TStringArray;
var
 l, i, r: Integer;
begin
 l := Length(text);
 if ((l > 0) and (wordCharacters <> '')) then
 begin
   SetLength(Result, l);
   for i := 1 to l do
     if (Pos(text[i], wordCharacters) > 0) then
     begin
       Result[r] := text[i];
       for i := (i + 1) to l do
         if (Pos(text[i], wordCharacters) > 0) then
           Result[r] := (Result[r] + text[i])
         else
           Break;
       Inc(r);
     end;
 end;
 SetLength(Result, r);
end;

{==============================================================================]
 Explanation: Returns count of words in text.
[==============================================================================}
function CountWords(text: string): Integer;
var
 l, i: Integer;
 w: string;
begin
 Result := 0;
 l := Length(text);
 if (l > 0) then
 begin
   w := 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_';
   for i := 1 to l do
     if (Pos(text[i], w) > 0) then
     begin       
       Inc(Result);
       for i := (i + 1) to l do
         if (Pos(text[i], w) < 1) then
           Break;
     end;
 end;
end;

{==============================================================================]
 Explanation: Returns count of words in text.
              Supports custom character set (wordCharacters)
[==============================================================================}
function CountWordsEx(text, wordCharacters: string): Integer;
var
 l, i: Integer;
begin
 Result := 0;
 l := Length(text);
 if ((l > 0) and (wordCharacters <> '')) then
 for i := 1 to l do
   if (Pos(text[i], wordCharacters) > 0) then
   begin       
     Inc(Result);
     for i := (i + 1) to l do
       if (Pos(text[i], wordCharacters) < 1) then
         Break;
   end;
end;

var
 words: TStrArray; 
 h, i: Integer;
 tmp: string;

begin
 ClearDebug;
 WriteLn('CountWords: ' + IntToStr(CountWords(TEST_TEXT)));
 words := GetWords(TEST_TEXT);
 tmp := 'GetWords: ';
 h := High(words);
 for i := 0 to h do
   if (i < h) then
     tmp := (tmp + '"' + words[i] + '", ')
   else
     tmp := (tmp + '"' + words[i] + '"');  
 WriteLn(tmp);
end.

 

Adapting the way slacky's regex-based algorithms work.. :P

Share this post


Link to post
Share on other sites

Please sign in to comment

You will be able to leave a comment after signing in



Sign In Now

×