sjesper Posted July 13, 2012 Share Posted July 13, 2012 Return the number of words in the given string [sCAR] function CountWords(S: String): Integer; var Len, i, ii, Index: Integer; S2: String; TSA: TStringArray; begin TSA := Explode(' ', S); for ii := 0 to high(TSA) do begin for i := low(TSA) to high(TSA) do begin Index := Pos(TSA, S); Len := PosEx(' ', S, Index); S2 := Copy(S, Len + 1, 1); if S2 = ' ' then Delete(S, Len, 1); end; end; result := length(Explode(' ', S)); for i := 1 to Length(S) do begin if i = 1 then if not StartsWith(' ', S) then exit; if PosEx(' ', S, i) = i then begin result := result - 1; end else begin break; end; end; end; begin writeln('The number of words: ' + IntToStr(CountWords(' Hello there '))); end.[/sCAR] Quote Link to comment Share on other sites More sharing options...
Daniel Posted September 3, 2012 Share Posted September 3, 2012 Hi sjesper, You could also just do: [scar]function CountWords(const S: String): Integer; begin Result := Length(Explode(#32, Trim(S))); end;[/scar] Quote Link to comment Share on other sites More sharing options...
Janilabo Posted July 11, 2013 Share Posted July 11, 2013 (edited) Ello sjesper & Daniel! Small bump over here, as I wrote some alternative functions for this: function GetWords(str: string): TStrArray; var h, i, r: Integer; begin Result := Explode(' ', Replace(Replace(str, #10, #13), #13, ' ')); h := High(Result); for i := 0 to h do if (Result[i] <> '') then begin Result[r] := Result[i]; Inc(r); end; SetLength(Result, r); end; function GetWordsR(str: string): TStrArray; begin Result := Explode(#32, PregReplace('/\s{2,}/', ' ', Trim(str))); end; function CountWords(str: string): Integer; var h, i: Integer; tmp: TStrArray; begin tmp := Explode(' ', Replace(Replace(str, #10, #13), #13, ' ')); h := High(tmp); Result := (h + 1); for i := 0 to h do if (tmp[i] = '') then Dec(Result); if (h > -1) then SetLength(tmp, 0); end; function CountWordsR(str: string): Integer; begin Result := Length(Explode(#32, PregReplace('/\s{2,}/', ' ', Trim(str)))); end; var text, tmp: string; words: TStrArray; h, i: Integer; begin ClearDebug; text := ' This is a TEST! ' + #13#10 + 'Lets count some words, yay! ' + #13#10 + ' MULTILINE WORKS, TOO.'; WriteLn('CountWords: ' + IntToStr(CountWords(text))); WriteLn('CountWordsR: ' + IntToStr(CountWordsR(text))); tmp := 'GetWords: '; words := GetWords(text); h := High(words); for i := 0 to h do if (i < h) then tmp := (tmp + '"' + (words[i]) + '", ') else tmp := (tmp + '"' + (words[i]) + '"'); WriteLn(tmp); tmp := 'GetWordsR: '; words := GetWordsR(text); h := High(words); for i := 0 to h do if (i < h) then tmp := (tmp + '"' + (words[i]) + '", ') else tmp := (tmp + '"' + (words[i]) + '"'); WriteLn(tmp); end. Not sure if anyone needs em, but I wanted to release em here anyways, so there you go. -Jani Edited July 11, 2013 by Janilabo Tweaked GetWords & CountWords Quote Link to comment Share on other sites More sharing options...
slacky Posted July 11, 2013 Share Posted July 11, 2013 (edited) Hello Janilabo, sjesper and Daniel! I just had to show you guys how this is REALLY done.. (with regular expression): function PregMatchAll(pattern, str:string): TStrArray; var Match: Boolean; Matches: TRegexMatchArray; CurpossE : Integer; begin match := True; while(match = True) do begin match := PregMatchEx(pattern, str, Matches); if match then begin CurpossE := Length(Matches[0].MatchedText) + Matches[0].OffSet str := Right(str, Length(str) - CurpossE); SetLength(Result, Length(Result)+1); Result[High(Result)] := Matches[0].MatchedText; end; end; end; function CountWords(str: string): Integer; var words: TStrArray; begin words := PregMatchAll('/(\w+)/', str); Result := Length(Words); end; var mystr: string; begin mystr := 'Ost World1 Amf#World2,but hello there.Are we friends?'; WriteLn(CountWords(mystr)); end. A nifty thing you might take out of this is a PregMatchAll-function (finds ALL matches of a given string that fits your pattern)! Edited July 11, 2013 by slacky Quote Link to comment Share on other sites More sharing options...
Janilabo Posted July 11, 2013 Share Posted July 11, 2013 I just had to show you guys how this is REALLY done..: function PregMatchAll(pattern, str:string): TStrArray; var Match: Boolean; Matches: TRegexMatchArray; CurpossE : Integer; begin match := True; while(match = True) do begin match := PregMatchEx(pattern, str, Matches); if match then begin CurpossE := Length(Matches[0].MatchedText) + Matches[0].OffSet str := Right(str, Length(str) - CurpossE); SetLength(Result, Length(Result)+1); Result[High(Result)] := Matches[0].MatchedText; end; end; end; function CountWords(str: string): Integer; var words: TStrArray; begin words := PregMatchAll('/(\w+)/im', str); Result := Length(Words); end; var mystr: string; begin mystr := 'Ost World1 Amf#World2,but hello there.Are we friends?'; WriteLn(CountWords(mystr)); end. A nifty thing you might take out of this is a PregMatchAll-function (finds ALL matches of a given string that fits your pattern)! It runs smoothly. Nice job slacky! I LIKE IT! Quote Link to comment Share on other sites More sharing options...
FHannes Posted July 11, 2013 Share Posted July 11, 2013 Technically, regular expressions are more for convenience than performance, a custom algorithm for a simple case like this will probably perform better than bulky regular expressions, while still being fairly easy to construct. But in SCAR of course you have to take into account the performance of the script engine as well, so in that case, regular expressions would indeed be the way to go. Quote Link to comment Share on other sites More sharing options...
Janilabo Posted July 11, 2013 Share Posted July 11, 2013 Hello Janilabo, sjesper and Daniel! I just had to show you guys how this is REALLY done.. (with regular expression): function PregMatchAll(pattern, str:string): TStrArray; var Match: Boolean; Matches: TRegexMatchArray; CurpossE : Integer; begin match := True; while(match = True) do begin match := PregMatchEx(pattern, str, Matches); if match then begin CurpossE := Length(Matches[0].MatchedText) + Matches[0].OffSet str := Right(str, Length(str) - CurpossE); SetLength(Result, Length(Result)+1); Result[High(Result)] := Matches[0].MatchedText; end; end; end; function CountWords(str: string): Integer; var words: TStrArray; begin words := PregMatchAll('/(\w+)/', str); Result := Length(Words); end; var mystr: string; begin mystr := 'Ost World1 Amf#World2,but hello there.Are we friends?'; WriteLn(CountWords(mystr)); end. A nifty thing you might take out of this is a PregMatchAll-function (finds ALL matches of a given string that fits your pattern)! Your algorithms after some tiny changes: function PregMatchAll(pattern, str: string): TStrArray; var matches: TRegexMatchArray; CurpossE: Integer; begin while PregMatchEx(pattern, str, matches) do begin CurpossE := (Length(matches[0].MatchedText) + Matches[0].OffSet); str := Right(str, (Length(str) - CurpossE)); SetLength(Result, (Length(Result) + 1)); Result[High(Result)] := matches[0].MatchedText; end; end; function GetWords(str: string): TStrArray; begin Result := PregMatchAll('/(\w+)/im', str); end; function CountWords(str: string): Integer; begin Result := Length(PregMatchAll('/(\w+)/im', str)); end; var mystr: string; words: TStrArray; h, i: Integer; begin mystr := 'Ost World1 Amf#World2,but hello there.Are we friends?'; WriteLn('CountWords: ' + IntToStr(CountWords(mystr))); words := GetWords(mystr); mystr := 'GetWords: '; h := High(words); for i := 0 to h do if (i < h) then mystr := (mystr + '"' + words[i] + '", ') else mystr := (mystr + '"' + words[i] + '"'); WriteLn(mystr); end. +Example of course.. Works well. Quote Link to comment Share on other sites More sharing options...
slacky Posted July 11, 2013 Share Posted July 11, 2013 Looks a lot smoother with them changes to PregMatchAll! Hope you, or someone will put it in there include, I've often found cases where I needed A PregMatchAll-kind of function.. I've ended up avoding am due to me being to lazy to create a function like that (until now) Quote Link to comment Share on other sites More sharing options...
Janilabo Posted July 11, 2013 Share Posted July 11, 2013 Looks a lot smoother with them changes to PregMatchAll! Hope you, or someone will put it in there include, I've often found cases where I needed A PregMatchAll-kind of function.. I've ended up avoding am due to me being to lazy to create a function like that (until now) I think it's very useful little function.So, I added it to MSSL, included those credits for you man! Quote Link to comment Share on other sites More sharing options...
Janilabo Posted July 16, 2013 Share Posted July 16, 2013 Couple more algorithms to dump here, non-regex ways: const TEST_TEXT = 'Ost World1 Amf#World2,but hello there.Are we friends?'; {==============================================================================] Explanation: Returns the words from text as TStringArray. [==============================================================================} function GetWords(text: string): TStringArray; var l, i, r: Integer; w: string; begin l := Length(text); if (l > 0) then begin w := 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_'; SetLength(Result, l); for i := 1 to l do if (Pos(text[i], w) > 0) then begin Result[r] := text[i]; for i := (i + 1) to l do if (Pos(text[i], w) > 0) then Result[r] := (Result[r] + text[i]) else Break; Inc(r); end; end; SetLength(Result, r); end; {==============================================================================] Explanation: Returns the words from text as TStringArray. Supports custom character set (wordCharacters) [==============================================================================} function GetWordsEx(text, wordCharacters: string): TStringArray; var l, i, r: Integer; begin l := Length(text); if ((l > 0) and (wordCharacters <> '')) then begin SetLength(Result, l); for i := 1 to l do if (Pos(text[i], wordCharacters) > 0) then begin Result[r] := text[i]; for i := (i + 1) to l do if (Pos(text[i], wordCharacters) > 0) then Result[r] := (Result[r] + text[i]) else Break; Inc(r); end; end; SetLength(Result, r); end; {==============================================================================] Explanation: Returns count of words in text. [==============================================================================} function CountWords(text: string): Integer; var l, i: Integer; w: string; begin Result := 0; l := Length(text); if (l > 0) then begin w := 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_'; for i := 1 to l do if (Pos(text[i], w) > 0) then begin Inc(Result); for i := (i + 1) to l do if (Pos(text[i], w) < 1) then Break; end; end; end; {==============================================================================] Explanation: Returns count of words in text. Supports custom character set (wordCharacters) [==============================================================================} function CountWordsEx(text, wordCharacters: string): Integer; var l, i: Integer; begin Result := 0; l := Length(text); if ((l > 0) and (wordCharacters <> '')) then for i := 1 to l do if (Pos(text[i], wordCharacters) > 0) then begin Inc(Result); for i := (i + 1) to l do if (Pos(text[i], wordCharacters) < 1) then Break; end; end; var words: TStrArray; h, i: Integer; tmp: string; begin ClearDebug; WriteLn('CountWords: ' + IntToStr(CountWords(TEST_TEXT))); words := GetWords(TEST_TEXT); tmp := 'GetWords: '; h := High(words); for i := 0 to h do if (i < h) then tmp := (tmp + '"' + words[i] + '", ') else tmp := (tmp + '"' + words[i] + '"'); WriteLn(tmp); end. Adapting the way slacky's regex-based algorithms work.. Quote Link to comment Share on other sites More sharing options...