From 7efeb77a38213daa240522b202f83c62fac7ab4f Mon Sep 17 00:00:00 2001 From: Nad Alaba <37968805+nadalaba@users.noreply.github.com> Date: Fri, 20 Mar 2026 20:55:26 +0300 Subject: [PATCH 1/7] allow mutliple ordering of accents in arabic --- frontend/__tests__/utils/strings.spec.ts | 245 ++++++++++++++++++ frontend/src/ts/input/handlers/insert-text.ts | 22 +- frontend/src/ts/utils/strings.ts | 88 +++++++ 3 files changed, 352 insertions(+), 3 deletions(-) diff --git a/frontend/__tests__/utils/strings.spec.ts b/frontend/__tests__/utils/strings.spec.ts index 8fa02f4c5e81..3bd55a4e0d65 100644 --- a/frontend/__tests__/utils/strings.spec.ts +++ b/frontend/__tests__/utils/strings.spec.ts @@ -587,4 +587,249 @@ describe("string utils", () => { }); }); }); + + describe("Accent pattern rules", () => { + const commonRules = Strings.__testing.ACCENT_PATTERNS; + const languageRules = Object.values( + Strings.__testing.LANGUAGE_ACCENT_PATTERNS, + ); + const allRules: string[][] = [...languageRules.flat(), ...commonRules]; + const allPatterns = allRules.flat(); + + // correct unicode length + const ulen = (s: string) => Array.from(s).length; + + it("each rule has at least 2 patterns", () => { + for (const rule of allRules) { + expect(rule.length).toBeGreaterThanOrEqual(2); + } + }); + + it("each pattern has at least 2 unicode characters", () => { + for (const pattern of allPatterns) { + expect(ulen(pattern)).toBeGreaterThanOrEqual(2); + } + }); + + it("all patterns across common rules are distinct", () => { + const commonPatterns = commonRules.flat(); + const set = new Set(commonPatterns); + expect(set.size).toBe(commonPatterns.length); + }); + + it("all patterns across each language rules are distinct", () => { + for (const lang of languageRules) { + const languagePatterns = lang.flat(); + const set = new Set(languagePatterns); + expect(set.size).toBe(languagePatterns.length); + } + }); + + it("patterns inside each rule have the same unicode length", () => { + for (const rule of allRules) { + const lengths = rule.map(ulen); + const first = lengths[0]; + for (const len of lengths) { + expect(len).toBe(first); + } + } + }); + + it("common rules are sorted from longest pattern to shortest", () => { + const patternLengths = commonRules.map((rule) => ulen(rule[0]!)); + for (let i = 1; i < patternLengths.length; i++) { + expect(patternLengths[i]).toBeLessThanOrEqual(patternLengths[i - 1]!); + } + }); + + it("each language rules are sorted from longest pattern to shortest", () => { + for (const lang of languageRules) { + const patternLengths = lang.map((rule) => ulen(rule[0]!)); + for (let i = 1; i < patternLengths.length; i++) { + expect(patternLengths[i]).toBeLessThanOrEqual(patternLengths[i - 1]!); + } + } + }); + }); + + describe("_checkAccentOrderMismatchWithRules", () => { + const rules = [ + ["abc", "acb", "bac", "bca", "cab", "cba"], + ["ab", "ba"], + ]; + const langRules = { testLang: [["bc", "cb"]] }; + const allRules = [...langRules.testLang, ...rules]; + + it("returns null when neither input nor word matche a pattern", () => { + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xyy", + "yzz", + allRules, + ); + expect(result).toBeNull(); + }); + + it("returns null when only the word matches a pattern", () => { + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xzz", + "yab", + allRules, + ); + expect(result).toBeNull(); + }); + + it("returns null when only the input matches a pattern", () => { + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xab", + "yzz", + allRules, + ); + expect(result).toBeNull(); + }); + + it("returns no mismatch when both input and word match the same pattern", () => { + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xab", + "yab", + allRules, + ); + expect(result).toBeNull(); + }); + + it("returns input pattern when input and word match different patterns in the same rule", () => { + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xab", + "yba", + allRules, + ); + expect(result).toStrictEqual({ inputPattern: "ab", patternStart: 1 }); + }); + + it("returns input pattern if there is a mismatch even if input does not have full pattern", () => { + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xa", + "yba", + allRules, + ); + expect(result).toStrictEqual({ inputPattern: "ab", patternStart: 1 }); + }); + + it("returns no mismatch when word does not have full pattern", () => { + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xab", + "yb", + allRules, + ); + expect(result).toBeNull(); + }); + + it("returns no mismatch when both input and word match the same pattern (longer word)", () => { + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xab", + "yabzzz", + allRules, + ); + expect(result).toBeNull(); + }); + + it("returns input pattern when input and word match different patterns in the same rule (longer word)", () => { + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xab", + "ybazzz", + allRules, + ); + expect(result).toStrictEqual({ inputPattern: "ab", patternStart: 1 }); + }); + + it("prefers rules with longer patterns", () => { + // both rules ["ab", "ba"] and ["abc", "bac"] apply here + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xab", + "ybac", + allRules, + ); + expect(result).toStrictEqual({ inputPattern: "abc", patternStart: 1 }); // the input does not have to have the full pattern + }); + + it("prefers language-specific rules", () => { + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xbc", + "ycba", + allRules, + ); + expect(result).toStrictEqual({ inputPattern: "bc", patternStart: 1 }); // not the longer pattern "bca" + }); + + it("prefers matching with shortest overlap in the same rule", () => { + // There are 2 [input, word] matches: ["ab", "ba"] at position 1 and ["ba", "ab"] at position 2 + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xab", + "ybab", + allRules, + ); + expect(result).toStrictEqual({ inputPattern: "ba", patternStart: 2 }); + }); + + it("prefers earlier patterns if there are 2 input pattern matches in the same rule", () => { + // both "cab" and "cba" match input pattern + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xc", + "ybac", + allRules, + ); + expect(result).toStrictEqual({ inputPattern: "cab", patternStart: 1 }); + }); + + // always check patterns in the same position + it("returns null when word's pattern is after input's pattern", () => { + // pattern "ba" exists in word but in a different position from input + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xab", + "yyba", + allRules, + ); + expect(result).toBeNull(); + }); + + // always check patterns in the same position + it("returns null when word's pattern is before input's pattern", () => { + // pattern "ba" exists in word but in a different position from input + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xxab", + "ybay", + allRules, + ); + expect(result).toBeNull(); + }); + + it("returns the pattern that mismatches at the same position", () => { + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xba", + "ybcab", + allRules, + ); + expect(result).toStrictEqual({ inputPattern: "acb", patternStart: 2 }); + }); + }); + + describe("checkAccentOrderMismatch", () => { + it("returns no mismatch when both input and word match the same pattern", () => { + const result = Strings.checkAccentOrderMismatch("حطَّ", "حطَّ"); + expect(result).toBeNull(); + }); + + it("returns input pattern when input and word match different patterns in the same rule", () => { + const result = Strings.checkAccentOrderMismatch("طلُّ", "طلُّ"); + expect(result).toStrictEqual({ inputPattern: "ُّ", patternStart: 2 }); + }); + + it("returns input pattern if there is a mismatch even if input does not have full pattern", () => { + const result = Strings.checkAccentOrderMismatch( + "خصوصاً", + "خصوصًا", + "arabic", + ); + expect(result).toStrictEqual({ inputPattern: "اً", patternStart: 4 }); + }); + }); }); diff --git a/frontend/src/ts/input/handlers/insert-text.ts b/frontend/src/ts/input/handlers/insert-text.ts index 4b577eece08c..bc53911e58e1 100644 --- a/frontend/src/ts/input/handlers/insert-text.ts +++ b/frontend/src/ts/input/handlers/insert-text.ts @@ -12,7 +12,11 @@ import { checkIfFailedDueToMinBurst, checkIfFinished, } from "../helpers/fail-or-finish"; -import { areCharactersVisuallyEqual, isSpace } from "../../utils/strings"; +import { + areCharactersVisuallyEqual, + checkAccentOrderMismatch, + isSpace, +} from "../../utils/strings"; import * as TestState from "../../test/test-state"; import * as TestLogic from "../../test/test-logic"; import { @@ -101,16 +105,16 @@ export async function onInsertText(options: OnInsertTextParams): Promise { // input and target word const testInput = TestInput.input.current; - const currentWord = TestWords.words.getCurrent(); // if the character is visually equal, replace it with the target character // this ensures all future equivalence checks work correctly const normalizedData = normalizeDataAndUpdateInputIfNeeded( options.data, testInput, - currentWord, + TestWords.words.getCurrent(), ); const data = normalizedData ?? options.data; + const currentWord = TestWords.words.getCurrent(); // start if needed if (!TestState.isActive) { @@ -302,6 +306,18 @@ function normalizeDataAndUpdateInputIfNeeded( replaceInputElementLastValueChar(targetChar); normalizedData = targetChar; } + + const accent = checkAccentOrderMismatch( + testInput + normalizedData, + currentWord, + Config.language, + ); + if (accent !== null) { + TestWords.words.list[TestState.activeWordIndex] = + currentWord.slice(0, accent.patternStart) + + accent.inputPattern + + currentWord.slice(accent.patternStart + accent.inputPattern.length); + } return normalizedData; } diff --git a/frontend/src/ts/utils/strings.ts b/frontend/src/ts/utils/strings.ts index 6d134f881407..5759fae496f6 100644 --- a/frontend/src/ts/utils/strings.ts +++ b/frontend/src/ts/utils/strings.ts @@ -319,6 +319,91 @@ export function areCharactersVisuallyEqual( return false; } +// put rules with longer patterns first +const ACCENT_PATTERNS = [ + ["َّ", "َّ"], + ["ًّ", "ًّ"], + ["ُّ", "ُّ"], + ["ٌّ", "ٌّ"], + ["ِّ", "ِّ"], + ["ٍّ", "ٍّ"], +]; +const LANGUAGE_ACCENT_PATTERNS: Partial> = { + arabic: [ + ["ّاً", "ًّا", "ًّا"], + ["اً", "ًا"], + ], +}; + +export function checkAccentOrderMismatch( + input: string, + currentWord: string, + language?: Language, +): { inputPattern: string; patternStart: number } | null { + const langRules = + language && LANGUAGE_ACCENT_PATTERNS[language] + ? LANGUAGE_ACCENT_PATTERNS[language] + : []; + return _checkAccentOrderMismatchWithRules(input, currentWord, [ + ...langRules, + ...ACCENT_PATTERNS, + ]); +} + +function _checkAccentOrderMismatchWithRules( + input: string, + currentWord: string, + accentPatterns: string[][], +): { inputPattern: string; patternStart: number } | null { + const minWordsLength = Math.min(input.length, currentWord.length); + + for (const rule of accentPatterns) { + let inputPattern: string | null = null; + let wordPattern: string | null = null; + let patternStart: number | null = null; + + let mismatch; + const checkMismatch = (): { + inputPattern: string; + patternStart: number; + } | null => { + if ( + inputPattern !== null && + patternStart !== null && + wordPattern !== null && + inputPattern !== wordPattern + ) { + return { inputPattern, patternStart }; + } + return null; + }; + + const patternLength = rule[0]?.length ?? 0; + const minLength = Math.min(patternLength, minWordsLength); + + for (let overlapLen = 1; overlapLen <= minLength; overlapLen++) { + const overlap = input.slice(-overlapLen); + const matchStart = input.length - overlapLen; + const matchEnd = matchStart + patternLength; + const wordSlice = currentWord.slice(matchStart, matchEnd); + + for (const pattern of rule) { + if (pattern.startsWith(overlap)) { + inputPattern = pattern; + patternStart = matchStart; + // same pattern in both, no mismatch + if (wordSlice === pattern) return null; + } else if (wordSlice === pattern) { + wordPattern = pattern; + } + if ((mismatch = checkMismatch())) return mismatch; + } + } + } + + return null; +} + export function toHex(buffer: ArrayBuffer): string { if (Uint8Array.prototype.toHex !== undefined) { return new Uint8Array(buffer).toHex(); @@ -365,4 +450,7 @@ export function isSpace(char: string): boolean { // Export testing utilities for unit tests export const __testing = { hasRTLCharacters, + ACCENT_PATTERNS, + LANGUAGE_ACCENT_PATTERNS, + _checkAccentOrderMismatchWithRules, }; From e63f4c8cf2f43a32a7bccf61eaf4173b21aa9aea Mon Sep 17 00:00:00 2001 From: Nad Alaba <37968805+nadalaba@users.noreply.github.com> Date: Fri, 20 Mar 2026 21:35:16 +0300 Subject: [PATCH 2/7] normalizedData can be undefined --- frontend/src/ts/input/handlers/insert-text.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/ts/input/handlers/insert-text.ts b/frontend/src/ts/input/handlers/insert-text.ts index bc53911e58e1..9aacd3a98d54 100644 --- a/frontend/src/ts/input/handlers/insert-text.ts +++ b/frontend/src/ts/input/handlers/insert-text.ts @@ -308,7 +308,7 @@ function normalizeDataAndUpdateInputIfNeeded( } const accent = checkAccentOrderMismatch( - testInput + normalizedData, + testInput + (normalizedData ?? data), currentWord, Config.language, ); From e6425df0e984029975c347ddab76b6eea1f1f3b8 Mon Sep 17 00:00:00 2001 From: Nad Alaba <37968805+nadalaba@users.noreply.github.com> Date: Sat, 21 Mar 2026 20:44:58 +0300 Subject: [PATCH 3/7] prefer longest overlap --- frontend/__tests__/utils/strings.spec.ts | 40 +++++++++++++-------- frontend/src/ts/utils/strings.ts | 44 ++++++++++++------------ monkeytype.code-workspace | 1 + 3 files changed, 48 insertions(+), 37 deletions(-) diff --git a/frontend/__tests__/utils/strings.spec.ts b/frontend/__tests__/utils/strings.spec.ts index 3bd55a4e0d65..e2bcc1f4815d 100644 --- a/frontend/__tests__/utils/strings.spec.ts +++ b/frontend/__tests__/utils/strings.spec.ts @@ -660,7 +660,7 @@ describe("string utils", () => { const langRules = { testLang: [["bc", "cb"]] }; const allRules = [...langRules.testLang, ...rules]; - it("returns null when neither input nor word matche a pattern", () => { + it("returns null when neither input nor word matches a pattern", () => { const result = Strings.__testing._checkAccentOrderMismatchWithRules( "xyy", "yzz", @@ -751,7 +751,7 @@ describe("string utils", () => { expect(result).toStrictEqual({ inputPattern: "abc", patternStart: 1 }); // the input does not have to have the full pattern }); - it("prefers language-specific rules", () => { + it("prefers language-specific rules even if longer common rules exist", () => { const result = Strings.__testing._checkAccentOrderMismatchWithRules( "xbc", "ycba", @@ -760,16 +760,6 @@ describe("string utils", () => { expect(result).toStrictEqual({ inputPattern: "bc", patternStart: 1 }); // not the longer pattern "bca" }); - it("prefers matching with shortest overlap in the same rule", () => { - // There are 2 [input, word] matches: ["ab", "ba"] at position 1 and ["ba", "ab"] at position 2 - const result = Strings.__testing._checkAccentOrderMismatchWithRules( - "xab", - "ybab", - allRules, - ); - expect(result).toStrictEqual({ inputPattern: "ba", patternStart: 2 }); - }); - it("prefers earlier patterns if there are 2 input pattern matches in the same rule", () => { // both "cab" and "cba" match input pattern const result = Strings.__testing._checkAccentOrderMismatchWithRules( @@ -780,6 +770,17 @@ describe("string utils", () => { expect(result).toStrictEqual({ inputPattern: "cab", patternStart: 1 }); }); + it("prefers matching with longest overlap in the same rule even if earlier-shorter-overlap-patterns match", () => { + // There are 2 [input, word] matches: ["ba", "ab"] at position 1 and ["ab", "ba"] at position 2 + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xba", + "yaba", + allRules, + ); + // even though an earlier match "ab" exist but "ba" has longer overlap + expect(result).toStrictEqual({ inputPattern: "ba", patternStart: 1 }); + }); + // always check patterns in the same position it("returns null when word's pattern is after input's pattern", () => { // pattern "ba" exists in word but in a different position from input @@ -804,11 +805,20 @@ describe("string utils", () => { it("returns the pattern that mismatches at the same position", () => { const result = Strings.__testing._checkAccentOrderMismatchWithRules( - "xba", - "ybcab", + "xa", + "ybac", + allRules, + ); + expect(result).toStrictEqual({ inputPattern: "abc", patternStart: 1 }); + }); + + it("returns the pattern that mismatches at the same position", () => { + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xabc", + "yyyabc", allRules, ); - expect(result).toStrictEqual({ inputPattern: "acb", patternStart: 2 }); + expect(result).toStrictEqual({ inputPattern: "cab", patternStart: 3 }); }); }); diff --git a/frontend/src/ts/utils/strings.ts b/frontend/src/ts/utils/strings.ts index 5759fae496f6..d0dfd49ebfd3 100644 --- a/frontend/src/ts/utils/strings.ts +++ b/frontend/src/ts/utils/strings.ts @@ -358,37 +358,37 @@ function _checkAccentOrderMismatchWithRules( const minWordsLength = Math.min(input.length, currentWord.length); for (const rule of accentPatterns) { - let inputPattern: string | null = null; - let wordPattern: string | null = null; - let patternStart: number | null = null; - - let mismatch; - const checkMismatch = (): { - inputPattern: string; - patternStart: number; - } | null => { - if ( - inputPattern !== null && - patternStart !== null && - wordPattern !== null && - inputPattern !== wordPattern - ) { - return { inputPattern, patternStart }; - } - return null; - }; - const patternLength = rule[0]?.length ?? 0; const minLength = Math.min(patternLength, minWordsLength); - for (let overlapLen = 1; overlapLen <= minLength; overlapLen++) { + for (let overlapLen = minLength; overlapLen >= 1; overlapLen--) { + let inputPattern: string | null = null; + let wordPattern: string | null = null; + let patternStart: number | null = null; + + let mismatch; + const checkMismatch = (): { + inputPattern: string; + patternStart: number; + } | null => { + if ( + inputPattern !== null && + patternStart !== null && + wordPattern !== null && + inputPattern !== wordPattern + ) { + return { inputPattern, patternStart }; + } + return null; + }; + const overlap = input.slice(-overlapLen); const matchStart = input.length - overlapLen; const matchEnd = matchStart + patternLength; const wordSlice = currentWord.slice(matchStart, matchEnd); for (const pattern of rule) { - if (pattern.startsWith(overlap)) { + if (inputPattern === null && pattern.startsWith(overlap)) { inputPattern = pattern; patternStart = matchStart; // same pattern in both, no mismatch diff --git a/monkeytype.code-workspace b/monkeytype.code-workspace index 3ddff0b55d9b..d93430c1a15f 100644 --- a/monkeytype.code-workspace +++ b/monkeytype.code-workspace @@ -68,6 +68,7 @@ "[javascriptreact]": { "editor.defaultFormatter": "oxc.oxc-vscode", }, + "js/ts.tsdk.path": "backend\\node_modules\\typescript\\lib", }, "launch": { From 95a5df6c0040c2e2088d38ed41d20ebadee44f6d Mon Sep 17 00:00:00 2001 From: Nad Alaba <37968805+nadalaba@users.noreply.github.com> Date: Sat, 21 Mar 2026 21:37:31 +0300 Subject: [PATCH 4/7] wording --- frontend/__tests__/utils/strings.spec.ts | 2 +- monkeytype.code-workspace | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/frontend/__tests__/utils/strings.spec.ts b/frontend/__tests__/utils/strings.spec.ts index e2bcc1f4815d..6a61c2085539 100644 --- a/frontend/__tests__/utils/strings.spec.ts +++ b/frontend/__tests__/utils/strings.spec.ts @@ -777,7 +777,7 @@ describe("string utils", () => { "yaba", allRules, ); - // even though an earlier match "ab" exist but "ba" has longer overlap + // even though an earlier pattern "ab" exist but "ba" has longer overlap expect(result).toStrictEqual({ inputPattern: "ba", patternStart: 1 }); }); diff --git a/monkeytype.code-workspace b/monkeytype.code-workspace index d93430c1a15f..3ddff0b55d9b 100644 --- a/monkeytype.code-workspace +++ b/monkeytype.code-workspace @@ -68,7 +68,6 @@ "[javascriptreact]": { "editor.defaultFormatter": "oxc.oxc-vscode", }, - "js/ts.tsdk.path": "backend\\node_modules\\typescript\\lib", }, "launch": { From 21286a6e1564ff47729aeb0aea07fb59ff16133c Mon Sep 17 00:00:00 2001 From: Nad Alaba <37968805+nadalaba@users.noreply.github.com> Date: Sun, 22 Mar 2026 01:57:27 +0300 Subject: [PATCH 5/7] add edge cases tests --- frontend/__tests__/utils/strings.spec.ts | 34 +++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/frontend/__tests__/utils/strings.spec.ts b/frontend/__tests__/utils/strings.spec.ts index 6a61c2085539..329432bf40c7 100644 --- a/frontend/__tests__/utils/strings.spec.ts +++ b/frontend/__tests__/utils/strings.spec.ts @@ -654,7 +654,7 @@ describe("string utils", () => { describe("_checkAccentOrderMismatchWithRules", () => { const rules = [ - ["abc", "acb", "bac", "bca", "cab", "cba"], + ["abc", "acb", "bac", "bca", "cab", "cba", "dba", "dbc"], ["ab", "ba"], ]; const langRules = { testLang: [["bc", "cb"]] }; @@ -803,7 +803,8 @@ describe("string utils", () => { expect(result).toBeNull(); }); - it("returns the pattern that mismatches at the same position", () => { + it("returns 1st input pattern match when word matches a pattern after 2 input matches", () => { + // input matches "abc" and "acb" before word matches "bac" but 1st match is returned const result = Strings.__testing._checkAccentOrderMismatchWithRules( "xa", "ybac", @@ -820,6 +821,28 @@ describe("string utils", () => { ); expect(result).toStrictEqual({ inputPattern: "cab", patternStart: 3 }); }); + + it("returns null if input and word has the same language specific pattern, even if a longer common-rule mismatch exists", () => { + // there is a longer pattern mismatch ["abc", "dbc"]. However, in a higher priority + // rule (language-specific) input and word have the same pattern ["bc", "bc"] + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xabc", + "ydbc", + allRules, + ); + expect(result).toBeNull(); + }); + + it("respects priority when there are 2 rules: 1 mismatch and 1 with the same pattern", () => { + // the longer pattern rule has a mismatch ["cba", "dba"], so it's returned + // even though input and word have the same pattern in a lower priority rule ["ba", "ba"] + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + "xcba", + "ydba", + allRules, + ); + expect(result).toStrictEqual({ inputPattern: "cba", patternStart: 1 }); + }); }); describe("checkAccentOrderMismatch", () => { @@ -833,7 +856,7 @@ describe("string utils", () => { expect(result).toStrictEqual({ inputPattern: "ُّ", patternStart: 2 }); }); - it("returns input pattern if there is a mismatch even if input does not have full pattern", () => { + it("returns input pattern if there is a mismatch in arabic specific 2 char rule", () => { const result = Strings.checkAccentOrderMismatch( "خصوصاً", "خصوصًا", @@ -841,5 +864,10 @@ describe("string utils", () => { ); expect(result).toStrictEqual({ inputPattern: "اً", patternStart: 4 }); }); + + it("returns input pattern if there is a mismatch in arabic specific 3 char rule", () => { + const result = Strings.checkAccentOrderMismatch("حقّاً", "حقًّا", "arabic"); + expect(result).toStrictEqual({ inputPattern: "ّاً", patternStart: 2 }); + }); }); }); From ed23d6e0e3d6785a7b7519c7bfe6e33db057b089 Mon Sep 17 00:00:00 2001 From: Nad Alaba <37968805+nadalaba@users.noreply.github.com> Date: Sun, 22 Mar 2026 17:48:55 +0300 Subject: [PATCH 6/7] MOAR TESTS --- frontend/__tests__/utils/strings.spec.ts | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/frontend/__tests__/utils/strings.spec.ts b/frontend/__tests__/utils/strings.spec.ts index 329432bf40c7..6d869c565d85 100644 --- a/frontend/__tests__/utils/strings.spec.ts +++ b/frontend/__tests__/utils/strings.spec.ts @@ -843,6 +843,23 @@ describe("string utils", () => { ); expect(result).toStrictEqual({ inputPattern: "cba", patternStart: 1 }); }); + + it.each([ + { input: "xab", word: "ycba", expected: ["abc", 1] }, + { input: "xab", word: "yyba", expected: null }, + ])( + "returns $expected for input $input and word $word", + ({ input, word, expected }) => { + const result = Strings.__testing._checkAccentOrderMismatchWithRules( + input, + word, + allRules, + ); + expect(result).toStrictEqual( + expected && { inputPattern: expected[0], patternStart: expected[1] }, + ); + }, + ); }); describe("checkAccentOrderMismatch", () => { From 6d5f3a5ed8e358e4ab47002668ae58472a284d05 Mon Sep 17 00:00:00 2001 From: Nad Alaba <37968805+nadalaba@users.noreply.github.com> Date: Sun, 22 Mar 2026 18:34:00 +0300 Subject: [PATCH 7/7] move call, naming, and jsdoc --- frontend/__tests__/utils/strings.spec.ts | 4 +- frontend/src/ts/input/handlers/insert-text.ts | 33 ++++++++------ frontend/src/ts/utils/strings.ts | 44 +++++++++++++------ 3 files changed, 51 insertions(+), 30 deletions(-) diff --git a/frontend/__tests__/utils/strings.spec.ts b/frontend/__tests__/utils/strings.spec.ts index 6d869c565d85..a1ffb3ee5630 100644 --- a/frontend/__tests__/utils/strings.spec.ts +++ b/frontend/__tests__/utils/strings.spec.ts @@ -589,9 +589,9 @@ describe("string utils", () => { }); describe("Accent pattern rules", () => { - const commonRules = Strings.__testing.ACCENT_PATTERNS; + const commonRules = Strings.__testing.ACCENT_RULES; const languageRules = Object.values( - Strings.__testing.LANGUAGE_ACCENT_PATTERNS, + Strings.__testing.LANGUAGE_ACCENT_RULES, ); const allRules: string[][] = [...languageRules.flat(), ...commonRules]; const allPatterns = allRules.flat(); diff --git a/frontend/src/ts/input/handlers/insert-text.ts b/frontend/src/ts/input/handlers/insert-text.ts index 9aacd3a98d54..bfb910963a77 100644 --- a/frontend/src/ts/input/handlers/insert-text.ts +++ b/frontend/src/ts/input/handlers/insert-text.ts @@ -105,16 +105,33 @@ export async function onInsertText(options: OnInsertTextParams): Promise { // input and target word const testInput = TestInput.input.current; + let currentWord = TestWords.words.getCurrent(); // if the character is visually equal, replace it with the target character // this ensures all future equivalence checks work correctly const normalizedData = normalizeDataAndUpdateInputIfNeeded( options.data, testInput, - TestWords.words.getCurrent(), + currentWord, ); const data = normalizedData ?? options.data; - const currentWord = TestWords.words.getCurrent(); + + // if the input is committing to a pattern that is different from target word's pattern + // and those patterns are equivalent, replace target word's pattern with input's. + // changing target word here ensures the input is considered correct, + // and actually typed characters are highlighted in `updateWordLetters()`. + const pattern = checkAccentOrderMismatch( + testInput + data, + currentWord, + Config.language, + ); + if (pattern !== null) { + currentWord = + currentWord.slice(0, pattern.patternStart) + + pattern.inputPattern + + currentWord.slice(pattern.patternStart + pattern.inputPattern.length); + TestWords.words.list[TestState.activeWordIndex] = currentWord; + } // start if needed if (!TestState.isActive) { @@ -306,18 +323,6 @@ function normalizeDataAndUpdateInputIfNeeded( replaceInputElementLastValueChar(targetChar); normalizedData = targetChar; } - - const accent = checkAccentOrderMismatch( - testInput + (normalizedData ?? data), - currentWord, - Config.language, - ); - if (accent !== null) { - TestWords.words.list[TestState.activeWordIndex] = - currentWord.slice(0, accent.patternStart) + - accent.inputPattern + - currentWord.slice(accent.patternStart + accent.inputPattern.length); - } return normalizedData; } diff --git a/frontend/src/ts/utils/strings.ts b/frontend/src/ts/utils/strings.ts index d0dfd49ebfd3..d45b42bf1735 100644 --- a/frontend/src/ts/utils/strings.ts +++ b/frontend/src/ts/utils/strings.ts @@ -320,7 +320,7 @@ export function areCharactersVisuallyEqual( } // put rules with longer patterns first -const ACCENT_PATTERNS = [ +const ACCENT_RULES = [ ["َّ", "َّ"], ["ًّ", "ًّ"], ["ُّ", "ُّ"], @@ -328,36 +328,52 @@ const ACCENT_PATTERNS = [ ["ِّ", "ِّ"], ["ٍّ", "ٍّ"], ]; -const LANGUAGE_ACCENT_PATTERNS: Partial> = { +const LANGUAGE_ACCENT_RULES: Partial> = { + // rules with longer patterns first arabic: [ ["ّاً", "ًّا", "ًّا"], ["اً", "ًا"], ], }; +/** + * Checks if there is a mismatch in patterns between 2 words: input and target word. + * A mismatch is when those words contain different patterns that are considered + * equivalent according to pre-determined set of rules, at the same position. + * The target word needs to have the full pattern, but the input only + * needs to end with the first part of the pattern. + * The rules have the following priority (from highest to lowest): language-specific + * rules - rules with longest pattern - rules having the longest overlap with input. + * If the input matches 2 patterns within a rule, earliest pattern is returned. + * @param input input word to check if it ends with pattern + * @param targetWord target word to check if it contains pattern + * @param language optional language to check for language-specific rules + * @returns an object containing the input pattern with its start position if there + * is a mismatch, null otherwise (having no equivalent patterns, or the same pattern) + */ export function checkAccentOrderMismatch( input: string, - currentWord: string, + targetWord: string, language?: Language, ): { inputPattern: string; patternStart: number } | null { const langRules = - language && LANGUAGE_ACCENT_PATTERNS[language] - ? LANGUAGE_ACCENT_PATTERNS[language] + language && LANGUAGE_ACCENT_RULES[language] + ? LANGUAGE_ACCENT_RULES[language] : []; - return _checkAccentOrderMismatchWithRules(input, currentWord, [ + return _checkAccentOrderMismatchWithRules(input, targetWord, [ ...langRules, - ...ACCENT_PATTERNS, + ...ACCENT_RULES, ]); } function _checkAccentOrderMismatchWithRules( input: string, - currentWord: string, - accentPatterns: string[][], + targetWord: string, + accentRules: string[][], ): { inputPattern: string; patternStart: number } | null { - const minWordsLength = Math.min(input.length, currentWord.length); + const minWordsLength = Math.min(input.length, targetWord.length); - for (const rule of accentPatterns) { + for (const rule of accentRules) { const patternLength = rule[0]?.length ?? 0; const minLength = Math.min(patternLength, minWordsLength); @@ -385,7 +401,7 @@ function _checkAccentOrderMismatchWithRules( const overlap = input.slice(-overlapLen); const matchStart = input.length - overlapLen; const matchEnd = matchStart + patternLength; - const wordSlice = currentWord.slice(matchStart, matchEnd); + const wordSlice = targetWord.slice(matchStart, matchEnd); for (const pattern of rule) { if (inputPattern === null && pattern.startsWith(overlap)) { @@ -450,7 +466,7 @@ export function isSpace(char: string): boolean { // Export testing utilities for unit tests export const __testing = { hasRTLCharacters, - ACCENT_PATTERNS, - LANGUAGE_ACCENT_PATTERNS, + ACCENT_RULES, + LANGUAGE_ACCENT_RULES, _checkAccentOrderMismatchWithRules, };