diff --git a/apps/web/src/app/tools/de-duplicate/duplicate-review-dialog.component.html b/apps/web/src/app/tools/de-duplicate/duplicate-review-dialog.component.html index 2dd9888e4b0..ae1d814e1cd 100644 --- a/apps/web/src/app/tools/de-duplicate/duplicate-review-dialog.component.html +++ b/apps/web/src/app/tools/de-duplicate/duplicate-review-dialog.component.html @@ -3,7 +3,7 @@ {{ totalDuplicateItemCount }} {{ "duplicatesFound" | i18n }}
-
+
-
+
-
{{ i + 1 }}. {{ set.displayKey }}
+
+ {{ i + 1 }}. {{ set.displayKey }} +
{{ set.ciphers.length }} {{ set.ciphers.length === 1 ? ("item" | i18n) : ("items" | i18n) }}
- - - - - {{ "name" | i18n }} - - - {{ "username" | i18n }} - - - {{ "websiteUri" | i18n }} - - - {{ "folder" | i18n }} - - - {{ "organization" | i18n }} - - - - + +
- - - {{ - row.name - }} - - ({{ "itemInTrash" | i18n }}) - - {{ row.login?.username }} - - {{ getCipherUris(row) || ("noValue" | i18n) }} - - {{ row.folderId || ("noValue" | i18n) }} - {{ row.organizationId || ("noValue" | i18n) }} - - +
+
+ {{ row.name }} +
+
+ ({{ "itemInTrash" | i18n }}) +
+
+
+ +
+
+
{{ "username" | i18n }}:
+
+ {{ row.login?.username || ("noValue" | i18n) }} +
+
+
+
{{ "websiteUri" | i18n }}:
+
+ {{ getCipherUris(row) || ("noValue" | i18n) }} +
+
+
+
+
diff --git a/apps/web/src/app/tools/de-duplicate/duplicate-review-dialog.component.ts b/apps/web/src/app/tools/de-duplicate/duplicate-review-dialog.component.ts index 03454a1705f..250c78fcb1f 100644 --- a/apps/web/src/app/tools/de-duplicate/duplicate-review-dialog.component.ts +++ b/apps/web/src/app/tools/de-duplicate/duplicate-review-dialog.component.ts @@ -10,6 +10,7 @@ import { DIALOG_DATA, TableDataSource, TableModule, + CardComponent, } from "@bitwarden/components"; import { I18nPipe } from "@bitwarden/ui-common"; @@ -21,7 +22,15 @@ export interface DuplicateReviewDialogResult { @Component({ selector: "app-duplicate-review-dialog", standalone: true, - imports: [CommonModule, DialogModule, ButtonModule, FormsModule, I18nPipe, TableModule], + imports: [ + CommonModule, + DialogModule, + ButtonModule, + FormsModule, + I18nPipe, + TableModule, + CardComponent, + ], templateUrl: "./duplicate-review-dialog.component.html", changeDetection: ChangeDetectionStrategy.OnPush, }) @@ -140,6 +149,10 @@ export class DuplicateReviewDialogComponent { return set.key; } + trackByCipher(_index: number, c: { id: string }): string { + return c?.id; + } + confirm(): void { const deleteCipherIds = Object.entries(this.selection) .filter(([, selected]) => selected) diff --git a/apps/web/src/app/vault/services/de-duplicate.service.spec.ts b/apps/web/src/app/vault/services/de-duplicate.service.spec.ts index 7620cdf2d75..75462c8ba47 100644 --- a/apps/web/src/app/vault/services/de-duplicate.service.spec.ts +++ b/apps/web/src/app/vault/services/de-duplicate.service.spec.ts @@ -31,10 +31,35 @@ function buildCipher({ describe("DeDuplicateService core duplicate detection", () => { let service: DeDuplicateService; - const findSets = (ciphers: CipherView[]) => - (service as any).findDuplicateSets(ciphers) as { key: string; ciphers: CipherView[] }[]; + const findSetsHostname = (ciphers: CipherView[]) => + (service as any).findDuplicateSets(ciphers, "Hostname") as { + key: string; + ciphers: CipherView[]; + }[]; + const findSetsBase = (ciphers: CipherView[]) => + (service as any).findDuplicateSets(ciphers, "Base") as { + key: string; + ciphers: CipherView[]; + }[]; + const findSetsHost = (ciphers: CipherView[]) => + (service as any).findDuplicateSets(ciphers, "Host") as { + key: string; + ciphers: CipherView[]; + }[]; + const findSetsExact = (ciphers: CipherView[]) => + (service as any).findDuplicateSets(ciphers, "Exact") as { + key: string; + ciphers: CipherView[]; + }[]; + const findSetsDefault = (ciphers: CipherView[]) => + (service as any).findDuplicateSets(ciphers) as { + key: string; + ciphers: CipherView[]; + }[]; // uses service default (Base) const normalize = (s: string) => (service as any).normalizeUri(s) as string; const extract = (c: CipherView) => (service as any).extractUriStrings(c) as string[]; + const keysFor = (uris: string[], strat: string) => + Array.from((service as any).getUriKeysForStrategy(uris, strat)).sort(); beforeEach(() => { jest.clearAllMocks(); @@ -45,7 +70,30 @@ describe("DeDuplicateService core duplicate detection", () => { ); }); - describe("username + URI bucket", () => { + // Default strategy check (Base) + describe("default strategy (Base)", () => { + it("uses Base when not specified: subdomains group by registrable domain", () => { + const c1 = buildCipher({ + id: "1", + name: "A", + username: "u", + uris: ["https://a.example.com"], + }); + const c2 = buildCipher({ + id: "2", + name: "B", + username: "u", + uris: ["https://b.example.com/login"], + }); + const sets = findSetsDefault([c1, c2]); + expect(sets).toHaveLength(1); + expect(sets[0].key).toBe("username+uri: u @ example.com"); + expect(new Set(sets[0].ciphers.map((c) => c.id))).toEqual(new Set(["1", "2"])); + }); + }); + + // Strategy-specific username+URI grouping tests — identical coverage across strategies + describe("username + URI bucket (Hostname strategy)", () => { it("groups items with same username and host (ignores path/query/fragment)", () => { const c1 = buildCipher({ id: "1", @@ -59,7 +107,7 @@ describe("DeDuplicateService core duplicate detection", () => { username: "user@example.com", uris: ["https://example.com/login?foo=1#frag"], }); - const sets = findSets([c1, c2]); + const sets = findSetsHostname([c1, c2]); expect(sets).toHaveLength(1); expect(sets[0].ciphers.map((c) => c.id).sort()).toEqual(["1", "2"]); expect(sets[0].key).toBe("username+uri: user@example.com @ example.com"); @@ -78,7 +126,7 @@ describe("DeDuplicateService core duplicate detection", () => { username: "u", uris: ["https://example.com/b"], }); - const sets = findSets([c1, c2]); + const sets = findSetsHostname([c1, c2]); expect(sets).toHaveLength(1); expect(sets[0].key).toBe("username+uri: u @ example.com"); }); @@ -96,7 +144,7 @@ describe("DeDuplicateService core duplicate detection", () => { username: "u", uris: ["https://example.com/path?x=2"], }); - const sets = findSets([c1, c2]); + const sets = findSetsHostname([c1, c2]); expect(sets).toHaveLength(1); expect(sets[0].key).toBe("username+uri: u @ example.com"); }); @@ -114,7 +162,7 @@ describe("DeDuplicateService core duplicate detection", () => { username: "u", uris: ["https://example.com/path#two"], }); - const sets = findSets([c1, c2]); + const sets = findSetsHostname([c1, c2]); expect(sets).toHaveLength(1); expect(sets[0].key).toBe("username+uri: u @ example.com"); }); @@ -132,7 +180,7 @@ describe("DeDuplicateService core duplicate detection", () => { username: "u", uris: ["https://example.com/login"], }); - const sets = findSets([c1, c2]); + const sets = findSetsHostname([c1, c2]); expect(sets).toHaveLength(1); expect(sets[0].key).toBe("username+uri: u @ example.com"); }); @@ -140,7 +188,7 @@ describe("DeDuplicateService core duplicate detection", () => { it("groups when host case and trailing slash differ", () => { const c1 = buildCipher({ id: "1", name: "A", username: "u", uris: ["HTTPS://EXAMPLE.COM/"] }); const c2 = buildCipher({ id: "2", name: "B", username: "u", uris: ["https://example.com"] }); - const sets = findSets([c1, c2]); + const sets = findSetsHostname([c1, c2]); expect(sets).toHaveLength(1); expect(sets[0].key).toBe("username+uri: u @ example.com"); }); @@ -158,7 +206,7 @@ describe("DeDuplicateService core duplicate detection", () => { username: "u", uris: ["https://b.example.com"], }); - expect(findSets([c1, c2])).toHaveLength(0); + expect(findSetsHostname([c1, c2])).toHaveLength(0); }); it("does NOT group when usernames differ even if host matches", () => { @@ -174,7 +222,7 @@ describe("DeDuplicateService core duplicate detection", () => { username: "bob", uris: ["http://service.test/"], }); - expect(findSets([c1, c2])).toHaveLength(0); + expect(findSetsHostname([c1, c2])).toHaveLength(0); }); it("multiple URIs per cipher can map to multiple hosts, but identical membership collapses to one set", () => { @@ -190,7 +238,7 @@ describe("DeDuplicateService core duplicate detection", () => { username: "u", uris: ["http://A.test/", "http://b.test"], }); - const sets = findSets([c1, c2]); + const sets = findSetsHostname([c1, c2]); expect(sets).toHaveLength(1); const key = sets[0].key; expect(["username+uri: u @ a.test", "username+uri: u @ b.test"]).toContain(key); @@ -206,7 +254,7 @@ describe("DeDuplicateService core duplicate detection", () => { uris: ["example.org/path" + i], }), ); - const sets = findSets(cs); + const sets = findSetsHostname(cs); expect(sets).toHaveLength(1); expect(sets[0].ciphers).toHaveLength(3); expect(new Set(sets[0].ciphers.map((c) => c.id))).toEqual(new Set(["1", "2", "3"])); @@ -223,7 +271,7 @@ describe("DeDuplicateService core duplicate detection", () => { uris: ["good.test"], }); const badNoUris = buildCipher({ id: "4", name: "D", username: "u", uris: [] }); - const sets = findSets([good1, badNoUsername, badWhitespaceUsername, badNoUris]); + const sets = findSetsHostname([good1, badNoUsername, badWhitespaceUsername, badNoUris]); expect(sets).toHaveLength(0); // only one valid item -> no duplicate set }); @@ -239,7 +287,7 @@ describe("DeDuplicateService core duplicate detection", () => { ], }); // Only one cipher overall; previously this could push the same cipher twice into the same bucket. - const sets = findSets([c1]); + const sets = findSetsHostname([c1]); expect(sets).toHaveLength(0); }); @@ -263,7 +311,7 @@ describe("DeDuplicateService core duplicate detection", () => { uris: ["forum.test.domain.org"], }); - const sets = findSets([c1, c2, c3]); + const sets = findSetsHostname([c1, c2, c3]); const setsByKey = new Map(sets.map((s) => [s.key, s])); const forumKey = "username+uri: tester @ forum.test.domain.org"; @@ -279,6 +327,649 @@ describe("DeDuplicateService core duplicate detection", () => { expect(forumSet.ciphers.map((c) => c.id).sort()).toEqual(["1", "3"]); expect(testSet.ciphers.map((c) => c.id).sort()).toEqual(["1", "2"]); }); + + it("groups androidapp variants by package id for same username", () => { + const c1 = buildCipher({ + id: "1", + name: "App1", + username: "u", + uris: ["androidapp://com.pkg/path"], + }); + const c2 = buildCipher({ + id: "2", + name: "App2", + username: "u", + uris: ["androidapp:com.pkg?x=1"], + }); + const sets = findSetsHostname([c1, c2]); + expect(sets).toHaveLength(1); + expect(sets[0].key).toBe("username+uri: u @ com.pkg"); + expect(new Set(sets[0].ciphers.map((c) => c.id))).toEqual(new Set(["1", "2"])); + }); + + it("IPv6 with different ports: still groups (port ignored)", () => { + const c1 = buildCipher({ + id: "1", + name: "V6a", + username: "u", + uris: ["http://[2001:db8::1]:8080/a"], + }); + const c2 = buildCipher({ + id: "2", + name: "V6b", + username: "u", + uris: ["http://[2001:db8::1]:8081/b"], + }); + const sets = findSetsHostname([c1, c2]); + expect(sets).toHaveLength(1); + }); + }); + + describe("username + URI bucket (Base strategy)", () => { + it("groups items with same username by registrable domain (ignores subdomain/path/query/fragment)", () => { + const c1 = buildCipher({ + id: "1", + name: "A", + username: "user@example.com", + uris: ["https://a.example.com/login"], + }); + const c2 = buildCipher({ + id: "2", + name: "B", + username: "user@example.com", + uris: ["https://b.example.com/login?foo=1#frag"], + }); + const sets = findSetsBase([c1, c2]); + expect(sets).toHaveLength(1); + expect(sets[0].ciphers.map((c) => c.id).sort()).toEqual(["1", "2"]); + expect(sets[0].key).toBe("username+uri: user@example.com @ example.com"); + }); + + it("groups when only path differs", () => { + const c1 = buildCipher({ + id: "1", + name: "A", + username: "u", + uris: ["https://a.example.com/a"], + }); + const c2 = buildCipher({ + id: "2", + name: "B", + username: "u", + uris: ["https://a.example.com/b"], + }); + const sets = findSetsBase([c1, c2]); + expect(sets).toHaveLength(1); + expect(sets[0].key).toBe("username+uri: u @ example.com"); + }); + + it("groups when only query differs", () => { + const c1 = buildCipher({ + id: "1", + name: "A", + username: "u", + uris: ["https://a.example.com/path?x=1"], + }); + const c2 = buildCipher({ + id: "2", + name: "B", + username: "u", + uris: ["https://b.example.com/path?x=2"], + }); + const sets = findSetsBase([c1, c2]); + expect(sets).toHaveLength(1); + expect(sets[0].key).toBe("username+uri: u @ example.com"); + }); + + it("groups when only fragment differs", () => { + const c1 = buildCipher({ + id: "1", + name: "A", + username: "u", + uris: ["https://a.example.com/path#one"], + }); + const c2 = buildCipher({ + id: "2", + name: "B", + username: "u", + uris: ["https://b.example.com/path#two"], + }); + const sets = findSetsBase([c1, c2]); + expect(sets).toHaveLength(1); + expect(sets[0].key).toBe("username+uri: u @ example.com"); + }); + + it("groups when only scheme differs (http vs https)", () => { + const c1 = buildCipher({ + id: "1", + name: "A", + username: "u", + uris: ["http://a.example.com/login"], + }); + const c2 = buildCipher({ + id: "2", + name: "B", + username: "u", + uris: ["https://b.example.com/login"], + }); + const sets = findSetsBase([c1, c2]); + expect(sets).toHaveLength(1); + expect(sets[0].key).toBe("username+uri: u @ example.com"); + }); + + it("groups when host case and trailing slash differ", () => { + const c1 = buildCipher({ + id: "1", + name: "A", + username: "u", + uris: ["HTTPS://A.EXAMPLE.COM/"], + }); + const c2 = buildCipher({ + id: "2", + name: "B", + username: "u", + uris: ["https://a.example.com"], + }); + const sets = findSetsBase([c1, c2]); + expect(sets).toHaveLength(1); + expect(sets[0].key).toBe("username+uri: u @ example.com"); + }); + + it("does NOT group when usernames differ even if base matches", () => { + const c1 = buildCipher({ + id: "1", + name: "A", + username: "alice", + uris: ["https://a.example.com"], + }); + const c2 = buildCipher({ + id: "2", + name: "B", + username: "bob", + uris: ["https://b.example.com/"], + }); + expect(findSetsBase([c1, c2])).toHaveLength(0); + }); + + it("multiple URIs per cipher can map to multiple bases, but identical membership collapses to one set", () => { + const c1 = buildCipher({ + id: "1", + name: "A", + username: "u", + uris: ["https://a.example.com", "https://b.example.net/"], + }); + const c2 = buildCipher({ + id: "2", + name: "B", + username: "u", + uris: ["http://A.example.com/", "http://b.example.net"], + }); + const sets = findSetsBase([c1, c2]); + expect(sets).toHaveLength(1); + const key = sets[0].key; + expect(["username+uri: u @ example.com", "username+uri: u @ example.net"]).toContain(key); + expect(sets[0].ciphers.map((c) => c.id).sort()).toEqual(["1", "2"]); + }); + + it("creates a single set when 3+ ciphers share same username+base", () => { + const cs = [1, 2, 3].map((i) => + buildCipher({ + id: String(i), + name: "N" + i, + username: "u", + uris: [ + i === 1 ? "a.example.org/path" : i === 2 ? "b.example.org" : "https://example.org/", + ], + }), + ); + const sets = findSetsBase(cs); + expect(sets).toHaveLength(1); + expect(sets[0].ciphers).toHaveLength(3); + expect(new Set(sets[0].ciphers.map((c) => c.id))).toEqual(new Set(["1", "2", "3"])); + expect(sets[0].key).toBe("username+uri: u @ example.org"); + }); + + it("ignores ciphers without username or without valid URIs", () => { + const good1 = buildCipher({ + id: "1", + name: "A", + username: "u", + uris: ["https://good.example"], + }); + const badNoUsername = buildCipher({ + id: "2", + name: "B", + username: "", + uris: ["good.example"], + }); + const badWhitespaceUsername = buildCipher({ + id: "3", + name: "C", + username: " ", + uris: ["good.example"], + }); + const badNoUris = buildCipher({ id: "4", name: "D", username: "u", uris: [] }); + const sets = findSetsBase([good1, badNoUsername, badWhitespaceUsername, badNoUris]); + expect(sets).toHaveLength(0); // only one valid item -> no duplicate set + }); + + it("does not create a duplicate set when a single cipher has multiple URIs that normalize to the same base", () => { + const c1 = buildCipher({ + id: "1", + name: "!_TEST", + username: "tester", + uris: ["a.example.org", "a.example.org/login", "HTTP://A.EXAMPLE.ORG"], + }); + const sets = findSetsBase([c1]); + expect(sets).toHaveLength(0); + }); + + it("with multiple items, a cipher that lists the same base multiple times appears only once in that base's grouping", () => { + const c1 = buildCipher({ + id: "1", + name: "!_TEST", + username: "tester", + uris: ["a.example.org", "b.a.example.org", "b.a.example.org/login"], + }); + const c2 = buildCipher({ + id: "2", + name: "2_TEST", + username: "tester", + uris: ["example.org"], + }); + const c3 = buildCipher({ + id: "3", + name: "3_TEST", + username: "tester", + uris: ["forum.example.org"], + }); + + const sets = findSetsBase([c1, c2, c3]); + const setsByKey = new Map(sets.map((s) => [s.key, s])); + + const baseKey = "username+uri: tester @ example.org"; + expect(setsByKey.has(baseKey)).toBe(true); + const baseSet = setsByKey.get(baseKey)!; + expect(baseSet.ciphers.map((c) => c.id).sort()).toEqual(["1", "2", "3"]); + }); + + it("groups androidapp variants by package id for same username", () => { + const c1 = buildCipher({ + id: "1", + name: "App1", + username: "u", + uris: ["androidapp://com.pkg/path"], + }); + const c2 = buildCipher({ + id: "2", + name: "App2", + username: "u", + uris: ["androidapp:com.pkg?x=1"], + }); + const sets = findSetsBase([c1, c2]); + expect(sets).toHaveLength(1); + expect(sets[0].key).toBe("username+uri: u @ com.pkg"); + expect(new Set(sets[0].ciphers.map((c) => c.id))).toEqual(new Set(["1", "2"])); + }); + + it("IPv6 with different ports: still groups (port ignored)", () => { + const c1 = buildCipher({ + id: "1", + name: "V6a", + username: "u", + uris: ["http://[2001:db8::1]:8080/a"], + }); + const c2 = buildCipher({ + id: "2", + name: "V6b", + username: "u", + uris: ["http://[2001:db8::1]:8081/b"], + }); + const sets = findSetsBase([c1, c2]); + expect(sets).toHaveLength(1); + }); + + // PSL-aware grouping + it("PSL: subdomains under example.co.uk group to example.co.uk", () => { + const c1 = buildCipher({ + id: "1", + name: "A", + username: "u", + uris: ["https://a.b.example.co.uk/login"], + }); + const c2 = buildCipher({ + id: "2", + name: "B", + username: "u", + uris: ["http://example.co.uk/"], + }); + const sets = findSetsBase([c1, c2]); + expect(sets).toHaveLength(1); + expect(sets[0].key).toBe("username+uri: u @ example.co.uk"); + }); + + it("PSL: different owners on github.io do NOT group (user1 vs user2)", () => { + const c1 = buildCipher({ + id: "1", + name: "Gh1", + username: "u", + uris: ["https://user1.github.io/"], + }); + const c2 = buildCipher({ + id: "2", + name: "Gh2", + username: "u", + uris: ["https://user2.github.io/blog"], + }); + expect(findSetsBase([c1, c2])).toHaveLength(0); + }); + + it("PSL: subdomains under the same user.github.io DO group", () => { + const c1 = buildCipher({ + id: "1", + name: "Gh", + username: "u", + uris: ["https://a.user.github.io/"], + }); + const c2 = buildCipher({ + id: "2", + name: "Gh", + username: "u", + uris: ["https://b.user.github.io/docs"], + }); + const sets = findSetsBase([c1, c2]); + expect(sets).toHaveLength(1); + expect(sets[0].key).toBe("username+uri: u @ user.github.io"); + }); + + it("PSL: different apps on appspot.com do NOT group; same app subdomains DO group", () => { + const a1 = buildCipher({ + id: "1", + name: "AppA", + username: "u", + uris: ["http://foo.appspot.com"], + }); + const a2 = buildCipher({ + id: "2", + name: "AppB", + username: "u", + uris: ["https://bar.appspot.com/login"], + }); + expect(findSetsBase([a1, a2])).toHaveLength(0); + + const s1 = buildCipher({ + id: "3", + name: "AppA", + username: "u", + uris: ["http://a.foo.appspot.com/x"], + }); + const s2 = buildCipher({ + id: "4", + name: "AppA", + username: "u", + uris: ["https://b.foo.appspot.com/y"], + }); + const subSets = findSetsBase([s1, s2]); + expect(subSets).toHaveLength(1); + expect(subSets[0].key).toBe("username+uri: u @ foo.appspot.com"); + }); + }); + + describe("username + URI bucket (Host strategy)", () => { + it("groups when host matches regardless of path/query/fragment/scheme", () => { + const c1 = buildCipher({ + id: "1", + name: "A", + username: "u", + uris: ["http://example.com/a?x=1#f"], + }); + const c2 = buildCipher({ + id: "2", + name: "B", + username: "u", + uris: ["https://example.com/b"], + }); + const sets = findSetsHost([c1, c2]); + expect(sets).toHaveLength(1); + expect(sets[0].key).toBe("username+uri: u @ example.com"); + }); + + it("does NOT group when ports differ", () => { + const c1 = buildCipher({ + id: "1", + name: "A", + username: "u", + uris: ["https://example.com:443/login"], + }); + const c2 = buildCipher({ + id: "2", + name: "B", + username: "u", + uris: ["https://example.com:8443/login"], + }); + const sets = findSetsHost([c1, c2]); + expect(sets).toHaveLength(0); + }); + + it("does NOT group when usernames differ even if host:port matches", () => { + const c1 = buildCipher({ + id: "1", + name: "A", + username: "alice", + uris: ["https://example.com:8080/"], + }); + const c2 = buildCipher({ + id: "2", + name: "B", + username: "bob", + uris: ["http://example.com:8080/"], + }); + expect(findSetsHost([c1, c2])).toHaveLength(0); + }); + + it("single cipher with multiple identical host:port URIs does not produce a set", () => { + const c1 = buildCipher({ + id: "1", + name: "X", + username: "u", + uris: ["http://example.com:8080/x", "HTTP://EXAMPLE.COM:8080/y"], + }); + expect(findSetsHost([c1])).toHaveLength(0); + }); + + it("multiple items: repeated host:port per cipher only included once in that group's membership", () => { + const c1 = buildCipher({ + id: "1", + name: "A", + username: "u", + uris: ["example.com:8080", "http://example.com:8080/x"], + }); + const c2 = buildCipher({ + id: "2", + name: "B", + username: "u", + uris: ["https://example.com:8080/"], + }); + const sets = findSetsHost([c1, c2]); + expect(sets).toHaveLength(1); + expect(sets[0].key).toBe("username+uri: u @ example.com:8080"); + expect(sets[0].ciphers.map((c) => c.id).sort()).toEqual(["1", "2"]); + }); + + it("androidapp variants group by package id", () => { + const c1 = buildCipher({ + id: "1", + name: "App1", + username: "u", + uris: ["androidapp://com.pkg/x"], + }); + const c2 = buildCipher({ + id: "2", + name: "App2", + username: "u", + uris: ["androidapp:com.pkg?y=1"], + }); + const sets = findSetsHost([c1, c2]); + expect(sets).toHaveLength(1); + expect(sets[0].key).toBe("username+uri: u @ com.pkg"); + }); + + it("IPv6 with different ports do NOT group", () => { + const c1 = buildCipher({ + id: "1", + name: "V6a", + username: "u", + uris: ["http://[2001:db8::1]:8080/a"], + }); + const c2 = buildCipher({ + id: "2", + name: "V6b", + username: "u", + uris: ["http://[2001:db8::1]:8081/b"], + }); + expect(findSetsHost([c1, c2])).toHaveLength(0); + }); + }); + + describe("username + URI bucket (Exact strategy)", () => { + it("requires exact normalized URL match (path/query/fragment included)", () => { + const c1 = buildCipher({ + id: "1", + name: "A", + username: "u", + uris: ["HTTPS://EXAMPLE.com/A/B?X=1#Frag"], + }); + const c2 = buildCipher({ + id: "2", + name: "B", + username: "u", + uris: ["https://example.com/a/b?x=1#frag"], + }); + const sets = findSetsExact([c1, c2]); + expect(sets).toHaveLength(1); + expect(sets[0].key).toBe("username+uri: u @ https://example.com/a/b?x=1#frag"); + expect(new Set(sets[0].ciphers.map((c) => c.id))).toEqual(new Set(["1", "2"])); + }); + + it("does NOT group when query differs", () => { + const c1 = buildCipher({ + id: "1", + name: "A", + username: "u", + uris: ["https://ex.com/a?x=1#y"], + }); + const c2 = buildCipher({ + id: "2", + name: "B", + username: "u", + uris: ["https://ex.com/a?x=2#y"], + }); + expect(findSetsExact([c1, c2])).toHaveLength(0); + }); + + it("does NOT group when fragment differs", () => { + const c1 = buildCipher({ + id: "1", + name: "A", + username: "u", + uris: ["https://ex.com/a?x=1#y"], + }); + const c2 = buildCipher({ + id: "2", + name: "B", + username: "u", + uris: ["https://ex.com/a?x=1#z"], + }); + expect(findSetsExact([c1, c2])).toHaveLength(0); + }); + + it("does NOT group when scheme differs", () => { + const c1 = buildCipher({ id: "1", name: "A", username: "u", uris: ["http://example.com/a"] }); + const c2 = buildCipher({ + id: "2", + name: "T", + username: "u", + uris: ["https://example.com/a"], + }); + expect(findSetsExact([c1, c2])).toHaveLength(0); + }); + + it("does NOT group when ports differ", () => { + const c1 = buildCipher({ + id: "1", + name: "A", + username: "u", + uris: ["https://example.com:443/a"], + }); + const c2 = buildCipher({ + id: "2", + name: "Q", + username: "u", + uris: ["https://example.com:8443/a"], + }); + expect(findSetsExact([c1, c2])).toHaveLength(0); + }); + + it("does NOT group when usernames differ even if URL matches", () => { + const c1 = buildCipher({ + id: "1", + name: "A", + username: "alice", + uris: ["https://example.com/a"], + }); + const c2 = buildCipher({ + id: "2", + name: "B", + username: "bob", + uris: ["https://example.com/a"], + }); + expect(findSetsExact([c1, c2])).toHaveLength(0); + }); + + it("single cipher with multiple identical URLs does not produce a set", () => { + const c1 = buildCipher({ + id: "1", + name: "X", + username: "u", + uris: ["HTTPS://EXAMPLE.com/A?X=1#F", "https://example.com/a?x=1#f"], + }); + expect(findSetsExact([c1])).toHaveLength(0); + }); + + it("androidapp variants group by package id (normalized to androidapp:pkg)", () => { + const c1 = buildCipher({ + id: "1", + name: "App1", + username: "u", + uris: ["androidapp://com.pkg/path"], + }); + const c2 = buildCipher({ + id: "2", + name: "App2", + username: "u", + uris: ["androidapp:COM.PKG?x=1"], + }); + const sets = findSetsExact([c1, c2]); + expect(sets).toHaveLength(1); + expect(sets[0].key.endsWith("@ androidapp:com.pkg")).toBe(true); + expect(new Set(sets[0].ciphers.map((c) => c.id))).toEqual(new Set(["1", "2"])); + }); + + it("IPv6 with different ports do NOT group", () => { + const c1 = buildCipher({ + id: "1", + name: "V6a", + username: "u", + uris: ["http://[2001:db8::1]:8080/a#f"], + }); + const c2 = buildCipher({ + id: "2", + name: "V6b", + username: "u", + uris: ["http://[2001:db8::1]:8081/a#f"], + }); + expect(findSetsExact([c1, c2])).toHaveLength(0); + }); }); describe("username + name bucket", () => { @@ -296,7 +987,7 @@ describe("DeDuplicateService core duplicate detection", () => { uris: ["https://two.example"], }); // Hosts differ so no URI duplicate; should produce exactly one name-based set - const sets = findSets([c1, c2]); + const sets = findSetsHostname([c1, c2]); expect(sets).toHaveLength(1); expect(sets[0].key).toBe("username+name: u & Shared Name"); expect(new Set(sets[0].ciphers.map((c) => c.id))).toEqual(new Set(["1", "2"])); @@ -305,7 +996,7 @@ describe("DeDuplicateService core duplicate detection", () => { it("groups when names differ only by case (case-insensitive)", () => { const c1 = buildCipher({ id: "1", name: "Login", username: "u", uris: ["a.example"] }); const c2 = buildCipher({ id: "2", name: "login", username: "u", uris: ["b.example"] }); - const sets = findSets([c1, c2]); + const sets = findSetsHostname([c1, c2]); const nameSet = sets.find((s) => s.key.startsWith("username+name: u &")); expect(nameSet).toBeDefined(); expect(new Set(nameSet!.ciphers.map((c) => c.id))).toEqual(new Set(["1", "2"])); @@ -314,7 +1005,7 @@ describe("DeDuplicateService core duplicate detection", () => { it("groups when names match after trimming outer whitespace", () => { const c1 = buildCipher({ id: "1", name: " Space ", username: "u", uris: ["one.example"] }); const c2 = buildCipher({ id: "2", name: "Space", username: "u", uris: ["two.example"] }); - const sets = findSets([c1, c2]); + const sets = findSetsHostname([c1, c2]); const match = sets.find((s) => s.key === "username+name: u & Space"); expect(match).toBeDefined(); expect(new Set(match!.ciphers.map((c) => c.id))).toEqual(new Set(["1", "2"])); @@ -324,7 +1015,7 @@ describe("DeDuplicateService core duplicate detection", () => { const c1 = buildCipher({ id: "1", name: "My Site", username: "u", uris: ["x.example"] }); const c2 = buildCipher({ id: "2", name: "My\tSite", username: "u", uris: ["y.example"] }); const c3 = buildCipher({ id: "3", name: "My Site", username: "u", uris: ["z.example"] }); - const sets = findSets([c1, c2, c3]); + const sets = findSetsHostname([c1, c2, c3]); const nameSet = sets.find((s) => s.key.startsWith("username+name: u &")); expect(nameSet).toBeDefined(); expect(new Set(nameSet!.ciphers.map((c) => c.id))).toEqual(new Set(["1", "2", "3"])); @@ -333,14 +1024,14 @@ describe("DeDuplicateService core duplicate detection", () => { it("does NOT group when usernames differ only by case (username case sensitive)", () => { const c1 = buildCipher({ id: "1", name: "Exact", username: "User", uris: ["site1.example"] }); const c2 = buildCipher({ id: "2", name: "Exact", username: "user", uris: ["site2.example"] }); - const sets = findSets([c1, c2]); + const sets = findSetsHostname([c1, c2]); expect(sets.find((s) => s.key.startsWith("username+name:"))).toBeUndefined(); }); it("groups when usernames match after trimming outer whitespace", () => { const c1 = buildCipher({ id: "1", name: "Label", username: " user ", uris: ["h1.example"] }); const c2 = buildCipher({ id: "2", name: "Label", username: "user", uris: ["h2.example"] }); - const sets = findSets([c1, c2]); + const sets = findSetsHostname([c1, c2]); const match = sets.find((s) => s.key === "username+name: user & Label"); expect(match).toBeDefined(); expect(new Set(match!.ciphers.map((c) => c.id))).toEqual(new Set(["1", "2"])); @@ -355,7 +1046,7 @@ describe("DeDuplicateService core duplicate detection", () => { uris: ["host" + i + ".example"], }), ); - const sets = findSets(cs); + const sets = findSetsHostname(cs); const nameSet = sets.filter((s) => s.key === "username+name: u & Cluster"); expect(nameSet).toHaveLength(1); expect(nameSet[0].ciphers).toHaveLength(3); @@ -364,7 +1055,7 @@ describe("DeDuplicateService core duplicate detection", () => { it("includes ciphers lacking any URIs in name bucket grouping", () => { const c1 = buildCipher({ id: "1", name: "Shared", username: "u", uris: [] }); const c2 = buildCipher({ id: "2", name: "Shared", username: "u", uris: ["alpha.example"] }); - const sets = findSets([c1, c2]); + const sets = findSetsHostname([c1, c2]); const match = sets.find((s) => s.key === "username+name: u & Shared"); expect(match).toBeDefined(); expect(new Set(match!.ciphers.map((c) => c.id))).toEqual(new Set(["1", "2"])); @@ -374,7 +1065,7 @@ describe("DeDuplicateService core duplicate detection", () => { const c1 = buildCipher({ id: "1", name: "Same", username: "u", uris: ["a.example"] }); const c2 = buildCipher({ id: "2", name: "Same", username: "u", uris: ["A.EXAMPLE/path"] }); const c3 = buildCipher({ id: "3", name: "Same", username: "u", uris: ["other.example"] }); - const sets = findSets([c1, c2, c3]); + const sets = findSetsHostname([c1, c2, c3]); const keys = sets.map((s) => s.key).sort(); expect(keys).toEqual(["username+name: u & Same", "username+uri: u @ a.example"]); const uriSet = sets.find((s) => s.key === "username+uri: u @ a.example")!; @@ -386,7 +1077,7 @@ describe("DeDuplicateService core duplicate detection", () => { it("when URI and name sets have identical cipher IDs, only the URI set is kept", () => { const c1 = buildCipher({ id: "1", name: "Same", username: "u", uris: ["a.example"] }); const c2 = buildCipher({ id: "2", name: "Same", username: "u", uris: ["A.EXAMPLE"] }); - const sets = findSets([c1, c2]); + const sets = findSetsHostname([c1, c2]); // both groupings would include [1,2], but the implementation prefers the URI set expect(sets).toHaveLength(1); expect(sets[0].key).toBe("username+uri: u @ a.example"); @@ -395,7 +1086,7 @@ describe("DeDuplicateService core duplicate detection", () => { it("does not create name bucket if only one cipher present", () => { const c1 = buildCipher({ id: "1", name: "Solo", username: "u", uris: ["solo.example"] }); - expect(findSets([c1])).toHaveLength(0); + expect(findSetsHostname([c1])).toHaveLength(0); }); it("requires non-empty trimmed username and name", () => { @@ -403,7 +1094,7 @@ describe("DeDuplicateService core duplicate detection", () => { const noName = buildCipher({ id: "2", name: "", username: "u", uris: ["y.test"] }); const blankName = buildCipher({ id: "3", name: " ", username: "u", uris: ["z.test"] }); const noUsername = buildCipher({ id: "4", name: "Name", username: "", uris: ["z.test"] }); - expect(findSets([good, noName, blankName, noUsername])).toHaveLength(0); + expect(findSetsHostname([good, noName, blankName, noUsername])).toHaveLength(0); }); }); @@ -521,7 +1212,7 @@ describe("DeDuplicateService core duplicate detection", () => { it("groups items without username by canonicalized name (case/whitespace-insensitive)", () => { const c1 = buildCipher({ id: "1", name: "My App", username: "", uris: [] }); const c2 = buildCipher({ id: "2", name: " my\tapp ", username: "", uris: [] }); - const sets = findSets([c1, c2]); + const sets = findSetsHostname([c1, c2]); expect(sets).toHaveLength(1); expect(sets[0].key).toBe("username+name: & My App"); expect(new Set(sets[0].ciphers.map((c) => c.id))).toEqual(new Set(["1", "2"])); @@ -531,7 +1222,7 @@ describe("DeDuplicateService core duplicate detection", () => { const noUser = buildCipher({ id: "1", name: "Same", username: "", uris: [] }); const u1 = buildCipher({ id: "2", name: "Same", username: "u", uris: [] }); const u2 = buildCipher({ id: "3", name: "Same", username: "u", uris: [] }); - const sets = findSets([noUser, u1, u2]); + const sets = findSetsHostname([noUser, u1, u2]); // Should only produce the username+name set for user "u" const keys = sets.map((s) => s.key).sort(); expect(keys).toEqual(["username+name: u & Same"]); @@ -543,7 +1234,7 @@ describe("DeDuplicateService core duplicate detection", () => { const cs = [1, 2, 3].map((i) => buildCipher({ id: String(i), name: "Cluster", username: "", uris: [] }), ); - const sets = findSets(cs); + const sets = findSetsHostname(cs); expect(sets).toHaveLength(1); expect(sets[0].key).toBe("username+name: & Cluster"); expect(new Set(sets[0].ciphers.map((c) => c.id))).toEqual(new Set(["1", "2", "3"])); @@ -552,8 +1243,141 @@ describe("DeDuplicateService core duplicate detection", () => { it("does not create a name-only set for a single item or blank name", () => { const solo = buildCipher({ id: "1", name: "Solo", username: "", uris: [] }); const blank = buildCipher({ id: "2", name: " ", username: "", uris: [] }); - expect(findSets([solo])).toHaveLength(0); - expect(findSets([blank])).toHaveLength(0); + expect(findSetsHostname([solo])).toHaveLength(0); + expect(findSetsHostname([blank])).toHaveLength(0); + }); + }); + + describe("URI match strategies", () => { + describe("Hostname", () => { + it("normalizes host and ignores port, scheme, path, userinfo", () => { + const keys = keysFor( + ["HTTPS://user:pass@EXAMPLE.COM:443/a", "http://example.com:8443/b?x=1#y", "example.com"], + "Hostname", + ); + expect(keys).toEqual(["example.com"]); + }); + + it("handles IDN via punycode", () => { + const keys = keysFor(["https://münich.example"], "Hostname"); + expect(keys[0]).toMatch(/^xn--mnich-kva\.example$/); + }); + + it("androidapp scheme yields package id across variants", () => { + const keys = keysFor( + [ + "androidapp://com.Example.App/login", + "androidapp:com.example.app?x=1", + " androidapp://COM.EXAMPLE.APP ", + ], + "Hostname", + ); + expect(keys).toEqual(["com.example.app"]); + }); + }); + + describe("Base", () => { + it("returns second-level + TLD for common domains", () => { + const keys = keysFor(["https://a.b.example.org", "http://example.org"], "Base"); + expect(keys).toEqual(["example.org"]); + }); + + it("returns host unchanged for IP and single-label host", () => { + const keys = keysFor(["http://192.168.0.10/x", "localhost:3000"], "Base"); + expect(keys).toEqual(["192.168.0.10", "localhost"]); + }); + + it("androidapp returns package id", () => { + const keys = keysFor(["androidapp:com.pkg"], "Base"); + expect(keys).toEqual(["com.pkg"]); + }); + + // PSL-specific behavior + it("PSL: co.uk groups to example.co.uk across subdomains", () => { + const keys = keysFor(["https://a.b.example.co.uk", "http://example.co.uk"], "Base"); + expect(keys).toEqual(["example.co.uk"]); + }); + + it("PSL: github.io keeps owners separate (user1 vs user2)", () => { + const keys = keysFor(["https://user1.github.io", "https://user2.github.io"], "Base").sort(); + expect(keys).toEqual(["user1.github.io", "user2.github.io"]); + }); + + it("PSL: subdomains under user.github.io collapse to user.github.io", () => { + const keys = keysFor(["https://a.user.github.io", "https://b.user.github.io"], "Base"); + expect(keys).toEqual(["user.github.io"]); + }); + + it("PSL: appspot.com keeps apps separate; subdomains under same app collapse", () => { + const sep = keysFor(["http://foo.appspot.com", "https://bar.appspot.com"], "Base").sort(); + expect(sep).toEqual(["bar.appspot.com", "foo.appspot.com"]); + + const collapse = keysFor(["http://a.foo.appspot.com", "https://b.foo.appspot.com"], "Base"); + expect(collapse).toEqual(["foo.appspot.com"]); + }); + }); + + describe("Host", () => { + it("includes port when present, otherwise host only", () => { + const keys = keysFor(["http://example.com", "http://example.com:8080"], "Host"); + expect(keys).toEqual(["example.com", "example.com:8080"]); + }); + + it("formats IPv6 with port as host:port", () => { + const keys = keysFor(["http://[2001:db8::1]:8080/x"], "Host"); + expect(keys).toEqual(["2001:db8::1:8080"]); + }); + + it("androidapp returns package id (no port)", () => { + const keys = keysFor(["androidapp://com.pkg/x"], "Host"); + expect(keys).toEqual(["com.pkg"]); + }); + }); + + describe("Exact", () => { + it("returns full normalized URL including path/query/fragment, lowercased", () => { + const keys = keysFor(["HTTPS://EXAMPLE.com:8443/A/B?X=1#Frag"], "Exact"); + expect(keys).toEqual(["https://example.com:8443/a/b?x=1#frag"]); + }); + + it("androidapp returns androidapp:package", () => { + const keys = keysFor(["androidapp:COM.PKG/path"], "Exact"); + expect(keys).toEqual(["androidapp:com.pkg"]); + }); + }); + + describe("Fallback URL parsing path for strategies", () => { + let originalURL: any; + beforeEach(() => { + originalURL = (global as any).URL; + (global as any).URL = class FailingURL { + constructor(_s: string) { + throw new Error("forced parse failure"); + } + } as any; + }); + afterEach(() => { + (global as any).URL = originalURL; + }); + + it("Hostname still extracts the host via fallback", () => { + const keys = keysFor( + ["custom+scheme://User:Pass@Sub.Domain.Example.com:8080/x"], + "Hostname", + ); + expect(keys).toEqual(["sub.domain.example.com"]); + }); + + it("Host returns host:port if numeric port; strips non-numeric suffix", () => { + const strat = "Host"; + expect(keysFor(["weird://host:8080/abc"], strat)).toEqual(["host:8080"]); + expect(keysFor(["weird://host:notaport/abc"], strat)).toEqual(["host"]); + }); + + it("Base extracts registrable domain via fallback (PSL)", () => { + const keys = keysFor(["custom+scheme://a.b.example.co.uk/path"], "Base"); + expect(keys).toEqual(["example.co.uk"]); + }); }); }); }); diff --git a/apps/web/src/app/vault/services/de-duplicate.service.ts b/apps/web/src/app/vault/services/de-duplicate.service.ts index 673836f6910..ba403fe6741 100644 --- a/apps/web/src/app/vault/services/de-duplicate.service.ts +++ b/apps/web/src/app/vault/services/de-duplicate.service.ts @@ -1,6 +1,7 @@ import { Injectable } from "@angular/core"; import { firstValueFrom } from "rxjs"; +import { Utils } from "@bitwarden/common/platform/misc/utils"; import { UserId } from "@bitwarden/common/types/guid"; import { CipherService } from "@bitwarden/common/vault/abstractions/cipher.service"; import { CipherView } from "@bitwarden/common/vault/models/view/cipher.view"; @@ -11,7 +12,6 @@ import { DuplicateReviewDialogComponent, DuplicateReviewDialogResult, } from "../../tools/de-duplicate/duplicate-review-dialog.component"; -// Success dialog replaced by callout shown in the de-duplicate component export interface DuplicateOperationResult { setsFound: number; @@ -24,6 +24,19 @@ interface DuplicateSet { ciphers: CipherView[]; } +type UriMatchStrategy = "Base" | "Hostname" | "Host" | "Exact"; + +interface ParsedUri { + original: string; + androidPackage?: string; + scheme?: string; + hostname?: string; + port?: string; + path?: string; + query?: string; + fragment?: string; +} + @Injectable({ providedIn: "root", }) @@ -34,14 +47,28 @@ export class DeDuplicateService { private cipherAuthorizationService: CipherAuthorizationService, ) {} + /** + * Strategies for URI matching/normalization. The current implementation defaults to Base. + * + * Base: matches on second-level and top-level domain only + * Hostname: matches on subdomain, second-level domain, and top-level domain + * Host: matches on the above but also includes port, when available + * Exact: exact match between URI and current browser page + */ + private static readonly DEFAULT_URI_STRATEGY: UriMatchStrategy = "Base"; + /** * Main entry point to find and handle duplicate ciphers for a given user. * @param userId The ID of the current user. * @returns A promise that resolves to the number of duplicate sets found. */ - async findAndHandleDuplicates(userId: UserId): Promise { + async findAndHandleDuplicates( + userId: UserId, + options?: { uriStrategy?: UriMatchStrategy }, + ): Promise { + const uriStrategy = options?.uriStrategy ?? DeDuplicateService.DEFAULT_URI_STRATEGY; const allCiphers = await this.cipherService.getAllDecrypted(userId); - const duplicateSets = this.findDuplicateSets(allCiphers); + const duplicateSets = this.findDuplicateSets(allCiphers, uriStrategy); if (duplicateSets.length > 0) { const { trashed, permanentlyDeleted } = await this.handleDuplicates(duplicateSets, userId); @@ -52,13 +79,17 @@ export class DeDuplicateService { /** * Finds groups of ciphers (clusters) that are considered duplicates. - * A "group" or cluster is defined by ciphers sharing login.username and a normalized login.uri, + * A "group" or cluster is defined by ciphers sharing login.username and a matched login.uri + * (where match behavior is determined by DEFAULT_URI_STRATEGY) * OR a matching login.username and normalized cipher.name, * OR when no username is present, on matching normalized cipher.name only. * @param ciphers A list of all the user's ciphers. * @returns An array of DuplicateSet objects, each representing a group of duplicates. */ - private findDuplicateSets(ciphers: CipherView[]): DuplicateSet[] { + private findDuplicateSets( + ciphers: CipherView[], + uriStrategy: UriMatchStrategy = DeDuplicateService.DEFAULT_URI_STRATEGY, + ): DuplicateSet[] { const uriBuckets = new Map(); const nameBuckets = new Map(); const nameOnlyBuckets = new Map(); // used in edge cases when no useername is present for a login @@ -94,24 +125,18 @@ export class DeDuplicateService { if (username) { const uris = this.extractUriStrings(cipher); if (uris.length > 0) { - // Collect unique normalized hosts to avoid adding the same cipher twice to the same bucket - const hosts = new Set(); - for (const uri of uris) { - const normHost = this.normalizeUri(uri); - if (normHost) { - hosts.add(normHost); - } - } - for (const normHost of hosts) { - const key = `${username}||${normHost}`; + // Collect unique normalized keys to avoid adding the same cipher twice to the same bucket + const keys = this.getUriKeysForStrategy(uris, uriStrategy); + for (const k of keys) { + const key = `${username}||${k}`; let bucket = uriBuckets.get(key); if (!bucket) { bucket = []; uriBuckets.set(key, bucket); } bucket.push(cipher); - const displayKey = `username+uri: ${username} @ ${normHost}`; - ensureSetForBucket(bucket, displayKey); // Create/extend duplicate set when bucket reaches size 2 + const displayKey = `username+uri: ${username} @ ${k}`; + ensureSetForBucket(bucket, displayKey); } } } @@ -131,7 +156,7 @@ export class DeDuplicateService { bucket.push(cipher); const displayName = bucket[0].name?.trim() || ""; const displayKey = `username+name: ${username} & ${displayName}`; - ensureSetForBucket(bucket, displayKey); // Create/extend duplicate set when bucket reaches size 2 + ensureSetForBucket(bucket, displayKey); } else { // match on cipher.name only when username is absent // to prevent false positive duplicates in a situation where a user has multiple accounts on the same site - among others @@ -144,7 +169,7 @@ export class DeDuplicateService { const displayName = bucket[0].name?.trim() || ""; // Reuse existing display format so UI logic extracts the name without introducing new labels const displayKey = `username+name: & ${displayName}`; - ensureSetForBucket(bucket, displayKey); // Create/extend duplicate set when bucket reaches size 2 + ensureSetForBucket(bucket, displayKey); } } } @@ -152,14 +177,14 @@ export class DeDuplicateService { // Collapse groups that contain the exact same cipher IDs // Prefer the stronger username+uri grouping over username+name - const weightedDuplicateSets = new Map(); // used to prioritize username+uri ses + const weightedDuplicateSets = new Map(); const groupingPriority = (key: string): number => (key.startsWith("username+uri:") ? 2 : 1); for (const set of duplicateSets) { const signature = set.ciphers .map((c) => c.id) .sort() - .join("|"); // string representing ciphr IDs in a reproducible way + .join("|"); const existing = weightedDuplicateSets.get(signature); if (!existing || groupingPriority(set.key) > groupingPriority(existing.key)) { weightedDuplicateSets.set(signature, set); @@ -210,67 +235,228 @@ export class DeDuplicateService { } /** - * Extracts the host portion (subdomains.domain.tld OR IPv4 OR IPv6) from an input string. - * Behavior: - * - Prepends "https://" if the string lacks a scheme so standard parsing works. - * - Uses the node's URL parser when available (new URL). That yields punycoded ASCII for IDNs. - * - Falls back to a lightweight regex authority parse if URL parsing fails or isn't available. - * - Strips userinfo, port, enclosing IPv6 brackets, and a trailing dot; lowercases result. - * - Returns "" if a host can't be derived. - * @param raw Input possibly containing a host. - * @returns Host string or empty string. + * Get normalized keys for the given URIs according to a strategy. */ - private normalizeUri(raw: string): string { + private getUriKeysForStrategy(uris: string[], strategy: UriMatchStrategy): Set { + const keys = new Set(); + for (const raw of uris) { + const parsed = this.parseUri(raw); + if (!parsed) { + continue; + } + switch (strategy) { + case "Base": { + const base = this.getBaseUri(parsed); + if (base) { + keys.add(base); + } + break; + } + case "Hostname": { + const hostName = this.getHostname(parsed); + if (hostName) { + keys.add(hostName); + } + break; + } + case "Host": { + const host = this.getHost(parsed); + if (host) { + keys.add(host); + } + break; + } + case "Exact": { + const exact = this.getExactUrlKey(parsed); + if (exact) { + keys.add(exact); + } + break; + } + } + } + return keys; + } + + /** + * Parse a URI/host-like string into components for strategy matching. + * - Supports androidapp:// and androidapp: scheme, normalizing to androidPackage. + * - Adds http:// scheme if missing for URL parsing. + */ + private parseUri(raw: string): ParsedUri | null { if (!raw) { - return ""; + return null; } - let input = raw.trim(); + const input = raw.trim(); if (!input) { - return ""; + return null; } - if (!/^[a-z][a-z0-9+.-]*:\/\//i.test(input)) { - input = "https://" + input; + + // Android package + const m1 = input.match(/^androidapp:\/\/([^/?#]+)(?:[/?#].*)?$/i); + if (m1?.[1]) { + const pkg = m1[1].trim().replace(/\.$/, "").toLowerCase(); + return { original: input, androidPackage: pkg }; + } + const m2 = input.match(/^androidapp:([^\s/?#]+).*$/i); + if (m2?.[1]) { + const pkg = m2[1].trim().replace(/\.$/, "").toLowerCase(); + return { original: input, androidPackage: pkg }; + } + + let toParse = input; + if (!/^[a-z][a-z0-9+.-]*:\/\//i.test(toParse)) { + // Align with convention when lacking scheme + // https://bitwarden.com/help/uri-match-detection/#uri-schemes + toParse = "http://" + toParse; } - // Attempt extraction using node's URL lib try { - const url = new URL(input); - let host = url.hostname || ""; // hostname excludes port already - if (!host) { - return ""; + const url = new URL(toParse); + let hostname = url.hostname || ""; + if (hostname.startsWith("[") && hostname.endsWith("]")) { + hostname = hostname.slice(1, -1); } - // Strip IPv6 brackets - if (host.startsWith("[") && host.endsWith("]")) { - host = host.slice(1, -1); - } - host = host.replace(/\.$/, "").toLowerCase(); - return host; + hostname = hostname.replace(/\.$/, "").toLowerCase(); + const port = url.port || ""; + const scheme = (url.protocol || "").replace(/:$/, "").toLowerCase(); + const path = url.pathname || ""; + const query = url.search || ""; + const fragment = url.hash || ""; + return { + original: input, + scheme, + hostname, + port, + path, + query, + fragment, + }; } catch { - // Fallback: manual authority extraction - const authorityMatch = input.match(/^[a-z][a-z0-9+.-]*:\/\/([^/?#]+)/i); + // Fallback manual authority extraction sufficient for host, hostname, and base strategies + // TODO if exact strategy is being used, the user should be notified URI matching won't be exact + const authorityMatch = toParse.match(/^[a-z][a-z0-9+.-]*:\/\/([^/?#]+)/i); if (!authorityMatch) { - return ""; + // TODO the user should be informed that this URI couldn't be matched + return { original: input }; } let authority = authorityMatch[1]; - // Strip userinfo if present (user:pass@host) const atIndex = authority.lastIndexOf("@"); if (atIndex !== -1) { authority = authority.slice(atIndex + 1); } - // IPv6 brackets - if (authority.startsWith("[") && authority.includes("]")) { - authority = authority.slice(1, authority.indexOf("]")); + let host = authority; + let port = ""; + if (authority.startsWith("[")) { + const end = authority.indexOf("]"); + if (end !== -1) { + host = authority.slice(1, end); + const rest = authority.slice(end + 1); + if (rest.startsWith(":")) { + const p = rest.slice(1); + if (/^[0-9]+$/.test(p)) { + port = p; + } + } + } } else { - // Port (last colon, numeric part) const c = authority.lastIndexOf(":"); - if (c !== -1 && /^[0-9]+$/.test(authority.slice(c + 1))) { - authority = authority.slice(0, c); + if (c !== -1) { + const p = authority.slice(c + 1); + if (/^[0-9]+$/.test(p)) { + host = authority.slice(0, c); + port = p; + } else { + // Non-numeric suffix after colon isn't a valid port -> treat as part of path/opaque + // Strip it from the host for host-based strategies. + host = authority.slice(0, c); + } } } - authority = authority.replace(/\.$/, "").toLowerCase(); - return authority; + host = host.replace(/\.$/, "").toLowerCase(); + return { original: input, hostname: host, port }; } } + private getBaseUri(p: ParsedUri): string { + if (p.androidPackage) { + return p.androidPackage; + } + const host = (p.hostname || "").toLowerCase(); + if (!host) { + return ""; + } + // IP addresses or localhost-like + if (/^\d+\.\d+\.\d+\.\d+$/.test(host) || host.includes(":")) { + return host; // IPv4 or IPv6 literal + } + // Prefer PSL-aware registrable domain via shared Utils (tldts under the hood) + // Try with a normalized URL string first; fall back to heuristic if unavailable + try { + const scheme = p.scheme || "http"; + const urlLike = `${scheme}://${host}`; + const domain = Utils.getDomain(urlLike); + if (domain) { + return domain.toLowerCase(); + } + } catch { + // ignore and fall back + } + const parts = host.split(".").filter(Boolean); + if (parts.length <= 2) { + return host; + } + return parts.slice(-2).join("."); + } + + private getHostname(p: ParsedUri): string { + if (p.androidPackage) { + return p.androidPackage; + } + return (p.hostname || "").toLowerCase(); + } + + private getHost(p: ParsedUri): string { + const host = this.getHostname(p); + if (!host) { + return ""; + } + const port = p.port?.trim(); + return port ? `${host}:${port}` : host; + } + + private getExactUrlKey(p: ParsedUri): string { + if (p.androidPackage) { + return `androidapp:${p.androidPackage}`; + } + const host = p.hostname || ""; + const scheme = p.scheme || "http"; // if absent, we assumed http when parsing + const port = p.port ? `:${p.port}` : ""; + const path = p.path || ""; + const query = p.query || ""; + const fragment = p.fragment || ""; + if (!host) { + return p.original.toLowerCase(); + } + return `${scheme}://${host}${port}${path}${query}${fragment}`.toLowerCase(); + } + + /** + * Backward-compatible helper retained for tests: returns normalized host from a URI-like string. + * - Lowercases + * - Strips userinfo, port, path, query, fragment + * - Supports IPv4, IPv6, androidapp package IDs + */ + private normalizeUri(raw: string): string { + const p = this.parseUri(raw); + if (!p) { + return ""; + } + if (p.androidPackage) { + return p.androidPackage; + } + return (p.hostname || "").toLowerCase(); + } + /** * Handles the user interaction and server-side deletion of identified duplicates. * This method prompts the user, checks permissions, and performs batch deletions.