new f_replace implementation - will move to separate module soon

main
Samantha Wright 2024-08-05 21:18:02 -07:00
parent 81f7a97bd6
commit a37d25cd12
1 changed files with 72 additions and 60 deletions

View File

@ -99,71 +99,83 @@ string f_replace(string message, string dictionary_name) {
echo("[filter] warning: replacement dictionary " + dictionary_name + " does not exist. See 'help replace'"); echo("[filter] warning: replacement dictionary " + dictionary_name + " does not exist. See 'help replace'");
return message; return message;
} }
list EOS = [".", "?", "!"]; string out = "";
list separators = EOS + [" ", ",", "\"", "'", "-", ":", ";", "(", ")", "*", "~", "/", "@", "", "—"]; integer limit = strlen(message);
list tokens = llParseString2List(message, [], separators); // not sure if this would be better as llParseStringKeepNulls() integer sentence_start = TRUE;
list cases; string word;
list ltokens; while(limit >= 0) { // one extra iteration ensures last word is emitted
integer tmax = count(tokens); integer c = llOrd(message, 0);
string cs = llChar(c);
list headwords = jskeys(dictionary); if(c == 0x27 /* ' */ || llToLower(cs) != llToUpper(cs)) {
integer ti = 0; word += cs;
while(ti < tmax) { // echo("word includes " + cs);
string t = gets(tokens, ti);
string lt = llToLower(t);
ltokens += lt;
integer original_case;
if(lt == t) {
original_case = 0; // plain
} else if(t == "I") {
original_case = 3; // unique
} else if(delstring(lt, 0, 0) == delstring(t, 0, 0)) {
original_case = 1; // first-letter
} else if(llToUpper(t) == t) {
original_case = 2; // all-caps
} else { } else {
original_case = 3; // echo("separator " + cs);
integer c_len = strlen(word);
if(c_len > 0) {
while(llOrd(word, 0) == 0x27) { // '
out += "'";
word = delstring(word, 0, 0);
c_len -= 1;
}
string suffix = "";
while(llOrd(word, LAST) == 0x27) { // '
suffix = "'" + suffix;
word = delstring(word, LAST, LAST);
c_len -= 1;
}
if(c_len > 0) {
string replacement = word;
string new;
if((new = getjs(dictionary, [word])) != JSON_INVALID) {
replacement = new; // exact capitalization match
} else if((new = getjs(dictionary, [llToLower(word)])) != JSON_INVALID) {
if(word == llToUpper(word) && !(c_len == 1 && sentence_start) && word != "I") {
//echo(word + " is type 0");
// word was entered in all caps and isn't just a one-letter word at the start of the sentence or "I"
replacement = llToUpper(new);
} else if(word == "I" && !sentence_start) {
// echo(word + " is type 1");
// avoid transferring first-letter capitalization from "I" in the middle of a sentence:
replacement = new;
} else if(substr(word, 0, 0) == llToUpper(substr(word, 0, 0))) {
// echo(word + " is type 2");
// word began with a capital letter (including "I" at the start of a sentence):
replacement = llToUpper(substr(new, 0, 0)) + substr(new, 1, LAST);
} else {
// echo(word + " is type 3");
replacement = new;
}
}
// echo("emitting word " + replacement);
out += replacement + suffix;
/* if(sentence_start)
echo("no longer sentence start after emitting " + replacement + " at position " + (string)i); */
sentence_start = FALSE;
} else {
out += suffix;
}
word = "";
}
if(
((c == 33 || c == 65 || c == 46) && llOrd(message, 1) == 32) // ! ? . followed by space
|| (c == 10)) { // line feed
sentence_start = TRUE;
// echo("Sentence start at position " + (string)i);
}
// echo("appending characters " + cs);
out += cs;
} }
integer di = index(headwords, lt); message = delstring(message, 0, 0);
if(~di) { --limit;
string replacement = getjs(dictionary, [lt]);
if(llToLower(replacement) == replacement) { // only alter case for lower-case replacements
if(original_case == 3) {
string prequel = gets(ltokens, ti - 2);
if(contains(EOS, prequel) || prequel == "") // are we exactly two tokens after the end of a sentence?
original_case = 1;
else
original_case = 0;
}
if(original_case == 1) {
replacement = llToUpper(substr(replacement, 0, 0)) + delstring(replacement, 0, 0);
} else if(original_case == 2) {
replacement = llToUpper(replacement);
}
} else if(original_case == 2) { // actually, let's pass on all-caps so shouting looks right
replacement = llToUpper(replacement);
}
tokens = alter(tokens, [replacement], ti, ti);
}
++ti;
} }
message = concat(tokens, ""); return out;
integer hwi = count(headwords);
while(hwi--) {
string hw = gets(headwords, hwi);
if(~strpos(hw, "'")) {
string replacement = getjs(dictionary, [hw]);
message = replace(message, hw, replacement);
}
}
return message;
} }
string f_slang(string message, string flags) { string f_slang(string message, string flags) {