From 931f8650cfa2d562b42f5898cda983062b46ed67 Mon Sep 17 00:00:00 2001 From: Vladimir Pakhomchik Date: Fri, 15 Aug 2025 19:26:42 +0200 Subject: [PATCH] Enhance trimAndRemoveNonPrintableText to support extended characters (#15919) Co-authored-by: Jonathan Prusik --- .../collect-autofill-content.service.spec.ts | 48 +++++++++++++++++++ .../collect-autofill-content.service.ts | 2 +- 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/apps/browser/src/autofill/services/collect-autofill-content.service.spec.ts b/apps/browser/src/autofill/services/collect-autofill-content.service.spec.ts index 5f3bc4dfff9..f0aa9c1c440 100644 --- a/apps/browser/src/autofill/services/collect-autofill-content.service.spec.ts +++ b/apps/browser/src/autofill/services/collect-autofill-content.service.spec.ts @@ -1757,6 +1757,54 @@ describe("CollectAutofillContentService", () => { expect(parsedText).toEqual("Hello! This is a test string."); }); + + it("preserves extended Latin letters like Š and ć", () => { + const text = "Šifra ćevapčići korisnika"; + const result = collectAutofillContentService["trimAndRemoveNonPrintableText"](text); + expect(result).toEqual("Šifra ćevapčići korisnika"); + }); + + it("removes zero-width and control characters", () => { + const text = "Hello\u200B\u200C\u200D\u2060World\x00\x1F!"; + const result = collectAutofillContentService["trimAndRemoveNonPrintableText"](text); + expect(result).toEqual("Hello World !"); + }); + + it("removes leading and trailing whitespace", () => { + const text = " padded text with spaces "; + const result = collectAutofillContentService["trimAndRemoveNonPrintableText"](text); + expect(result).toEqual("padded text with spaces"); + }); + + it("replaces multiple whitespaces (tabs, newlines, spaces) with one space", () => { + const text = "one\t\ntwo \n three\t\tfour"; + const result = collectAutofillContentService["trimAndRemoveNonPrintableText"](text); + expect(result).toEqual("one two three four"); + }); + + it("preserves emoji and symbols", () => { + const text = "Text with emoji 🐍🚀 and ©®✓ symbols"; + const result = collectAutofillContentService["trimAndRemoveNonPrintableText"](text); + expect(result).toEqual("Text with emoji 🐍🚀 and ©®✓ symbols"); + }); + + it("handles RTL and LTR marks", () => { + const text = "abc\u200F\u202Edеf"; + const result = collectAutofillContentService["trimAndRemoveNonPrintableText"](text); + expect(result).toEqual("abc dеf"); + }); + + it("handles mathematical unicode letters", () => { + const text = "Unicode math: 𝒜𝒷𝒸𝒹"; + const result = collectAutofillContentService["trimAndRemoveNonPrintableText"](text); + expect(result).toEqual("Unicode math: 𝒜𝒷𝒸𝒹"); + }); + + it("removes only invisible non-printables, keeps Japanese", () => { + const text = "これは\u200Bテストです"; + const result = collectAutofillContentService["trimAndRemoveNonPrintableText"](text); + expect(result).toEqual("これは テストです"); + }); }); describe("recursivelyGetTextFromPreviousSiblings", () => { diff --git a/apps/browser/src/autofill/services/collect-autofill-content.service.ts b/apps/browser/src/autofill/services/collect-autofill-content.service.ts index 0f9c8993014..c6af9739773 100644 --- a/apps/browser/src/autofill/services/collect-autofill-content.service.ts +++ b/apps/browser/src/autofill/services/collect-autofill-content.service.ts @@ -713,7 +713,7 @@ export class CollectAutofillContentService implements CollectAutofillContentServ */ private trimAndRemoveNonPrintableText(textContent: string): string { return (textContent || "") - .replace(/[^\x20-\x7E]+|\s+/g, " ") // Strip out non-primitive characters and replace multiple spaces with a single space + .replace(/\p{C}+|\s+/gu, " ") // Strip out non-printable characters and replace multiple spaces with a single space .trim(); // Trim leading and trailing whitespace }