1
0
mirror of https://github.com/bitwarden/browser synced 2026-02-05 11:13:44 +00:00

WIP: Added UI support for match strategies, refined match logic

This commit is contained in:
John Harrington
2025-08-19 05:18:53 -07:00
parent dff9ddcc0a
commit 6afebe2553
5 changed files with 144 additions and 62 deletions

View File

@@ -12,6 +12,22 @@
</bit-callout>
<div class="tw-flex tw-flex-col tw-gap-4">
<bit-form-field class="tw-max-w-[28rem]">
<bit-label>{{ "uriMatchingStrategy" | i18n }}</bit-label>
<bit-select [items]="strategyOptions" [(ngModel)]="selectedStrategy"></bit-select>
<bit-hint>
{{ "uriMatchingStrategyHelp" | i18n }}
<a
class="tw-ml-1"
href="https://bitwarden.com/help/uri-match-detection/#match-detection-options"
target="_blank"
rel="noopener noreferrer"
>
{{ "learnMoreAboutMatchDetection" | i18n }}
</a>
</bit-hint>
</bit-form-field>
<div class="tw-flex tw-gap-2">
<button
id="de-duplicate__button__find-duplicates"

View File

@@ -1,27 +1,52 @@
import { CommonModule } from "@angular/common";
import { ChangeDetectorRef, Component, Inject } from "@angular/core";
import { FormsModule } from "@angular/forms";
import { firstValueFrom } from "rxjs";
import { AccountService } from "@bitwarden/common/auth/abstractions/account.service";
import { getUserId } from "@bitwarden/common/auth/services/account.service";
import { I18nService } from "@bitwarden/common/platform/abstractions/i18n.service";
import { ButtonModule, CalloutModule } from "@bitwarden/components";
import {
ButtonModule,
CalloutModule,
FormFieldModule,
Option,
SelectModule,
} from "@bitwarden/components";
import { I18nPipe } from "@bitwarden/ui-common";
import { HeaderModule } from "../../layouts/header/header.module";
import { SharedModule } from "../../shared";
import { DeDuplicateService } from "../../vault/services/de-duplicate.service";
import { DeDuplicateService, UriMatchStrategy } from "../../vault/services/de-duplicate.service";
@Component({
selector: "app-de-duplicate",
standalone: true,
imports: [CommonModule, SharedModule, HeaderModule, ButtonModule, CalloutModule, I18nPipe],
imports: [
CommonModule,
SharedModule,
HeaderModule,
ButtonModule,
CalloutModule,
FormFieldModule,
SelectModule,
FormsModule,
I18nPipe,
],
templateUrl: "./de-duplicate.component.html",
})
export class DeDuplicateComponent {
loading = false;
callout: { type: "success" | "warning"; title?: string; message: string } | null = null;
selectedStrategy: UriMatchStrategy = "Base";
strategyOptions: Option<UriMatchStrategy>[] = [
{ label: this.i18nService.t("baseDomain"), value: "Base" },
{ label: this.i18nService.t("hostName"), value: "Hostname" },
{ label: this.i18nService.t("host"), value: "Host" },
{ label: this.i18nService.t("exact"), value: "Exact" },
];
constructor(
@Inject(DeDuplicateService) private deDuplicateService: DeDuplicateService,
private i18nService: I18nService,
@@ -37,7 +62,9 @@ export class DeDuplicateComponent {
try {
const userId = await firstValueFrom(this.accountService.activeAccount$.pipe(getUserId));
const result = await this.deDuplicateService.findAndHandleDuplicates(userId);
const result = await this.deDuplicateService.findAndHandleDuplicates(userId, {
uriStrategy: this.selectedStrategy,
});
if (result.setsFound === 0) {
this.callout = {

View File

@@ -2,7 +2,6 @@ import { CipherView } from "@bitwarden/common/vault/models/view/cipher.view";
import { DeDuplicateService } from "./de-duplicate.service";
// Minimal stubs (we directly exercise private logic; no need for full implementations)
const cipherServiceStub = { getAllDecrypted: jest.fn() };
const dialogServiceStub = { open: jest.fn() };
const cipherAuthorizationServiceStub = {};
@@ -55,8 +54,14 @@ describe("DeDuplicateService core duplicate detection", () => {
(service as any).findDuplicateSets(ciphers) as {
key: string;
ciphers: CipherView[];
}[]; // uses service default (Base)
const normalize = (s: string) => (service as any).normalizeUri(s) as string;
}[];
// Legacy normalizeUri tests now validate Hostname strategy normalization.
// Return the single Hostname key for a given input, or empty string when none.
const normalize = (s: string) => {
const set = (service as any).getUriKeysForStrategy([s], "Hostname") as Set<string>;
const first = Array.from(set)[0];
return first ?? "";
};
const extract = (c: CipherView) => (service as any).extractUriStrings(c) as string[];
const keysFor = (uris: string[], strat: string) =>
Array.from((service as any).getUriKeysForStrategy(uris, strat)).sort();
@@ -70,8 +75,7 @@ describe("DeDuplicateService core duplicate detection", () => {
);
});
// Default strategy check (Base)
describe("default strategy (Base)", () => {
describe("matching strategy default to Base", () => {
it("uses Base when not specified: subdomains group by registrable domain", () => {
const c1 = buildCipher({
id: "1",
@@ -92,7 +96,6 @@ describe("DeDuplicateService core duplicate detection", () => {
});
});
// Strategy-specific username+URI grouping tests — identical coverage across strategies
describe("username + URI bucket (Hostname strategy)", () => {
it("groups items with same username and host (ignores path/query/fragment)", () => {
const c1 = buildCipher({
@@ -272,7 +275,7 @@ describe("DeDuplicateService core duplicate detection", () => {
});
const badNoUris = buildCipher({ id: "4", name: "D", username: "u", uris: [] });
const sets = findSetsHostname([good1, badNoUsername, badWhitespaceUsername, badNoUris]);
expect(sets).toHaveLength(0); // only one valid item -> no duplicate set
expect(sets).toHaveLength(0);
});
it("does not create a duplicate set when a single cipher has multiple URIs that normalize to the same host", () => {
@@ -286,7 +289,6 @@ describe("DeDuplicateService core duplicate detection", () => {
"HTTP://FORUM.TEST.DOMAIN.ORG",
],
});
// Only one cipher overall; previously this could push the same cipher twice into the same bucket.
const sets = findSetsHostname([c1]);
expect(sets).toHaveLength(0);
});
@@ -323,7 +325,6 @@ describe("DeDuplicateService core duplicate detection", () => {
const forumSet = setsByKey.get(forumKey)!;
const testSet = setsByKey.get(testKey)!;
// Each set should contain each cipher at most once
expect(forumSet.ciphers.map((c) => c.id).sort()).toEqual(["1", "3"]);
expect(testSet.ciphers.map((c) => c.id).sort()).toEqual(["1", "2"]);
});
@@ -550,7 +551,7 @@ describe("DeDuplicateService core duplicate detection", () => {
});
const badNoUris = buildCipher({ id: "4", name: "D", username: "u", uris: [] });
const sets = findSetsBase([good1, badNoUsername, badWhitespaceUsername, badNoUris]);
expect(sets).toHaveLength(0); // only one valid item -> no duplicate set
expect(sets).toHaveLength(0);
});
it("does not create a duplicate set when a single cipher has multiple URIs that normalize to the same base", () => {
@@ -629,7 +630,6 @@ describe("DeDuplicateService core duplicate detection", () => {
expect(sets).toHaveLength(1);
});
// PSL-aware grouping
it("PSL: subdomains under example.co.uk group to example.co.uk", () => {
const c1 = buildCipher({
id: "1",
@@ -713,6 +713,40 @@ describe("DeDuplicateService core duplicate detection", () => {
expect(subSets).toHaveLength(1);
expect(subSets[0].key).toBe("username+uri: u @ foo.appspot.com");
});
it("PSL miss: internal/private zones use full host (no last-two-label collapse)", () => {
const c1 = buildCipher({
id: "1",
name: "A",
username: "u",
uris: ["https://a.b.internal/"],
});
const c2 = buildCipher({
id: "2",
name: "B",
username: "u",
uris: ["https://c.b.internal/"],
});
const sets = findSetsBase([c1, c2]);
expect(sets).toHaveLength(0);
});
it("PSL miss: cluster.local style names remain distinct", () => {
const c1 = buildCipher({
id: "1",
name: "G",
username: "u",
uris: ["http://service-a.monitoring.svc.cluster.local/login"],
});
const c2 = buildCipher({
id: "2",
name: "P",
username: "u",
uris: ["http://service-b.monitoring.svc.cluster.local/"],
});
const sets = findSetsBase([c1, c2]);
expect(sets).toHaveLength(0);
});
});
describe("username + URI bucket (Host strategy)", () => {
@@ -986,7 +1020,6 @@ describe("DeDuplicateService core duplicate detection", () => {
username: "u",
uris: ["https://two.example"],
});
// Hosts differ so no URI duplicate; should produce exactly one name-based set
const sets = findSetsHostname([c1, c2]);
expect(sets).toHaveLength(1);
expect(sets[0].key).toBe("username+name: u & Shared Name");
@@ -1078,7 +1111,6 @@ describe("DeDuplicateService core duplicate detection", () => {
const c1 = buildCipher({ id: "1", name: "Same", username: "u", uris: ["a.example"] });
const c2 = buildCipher({ id: "2", name: "Same", username: "u", uris: ["A.EXAMPLE"] });
const sets = findSetsHostname([c1, c2]);
// both groupings would include [1,2], but the implementation prefers the URI set
expect(sets).toHaveLength(1);
expect(sets[0].key).toBe("username+uri: u @ a.example");
expect(new Set(sets[0].ciphers.map((c) => c.id))).toEqual(new Set(["1", "2"]));
@@ -1123,7 +1155,6 @@ describe("DeDuplicateService core duplicate detection", () => {
it("returns empty array when login.uris is not an array", () => {
const c = buildCipher({ id: "1", name: "X", username: "u", uris: [] });
// Force a non-array value
(c as any).login.uris = "not-an-array" as any;
expect(extract(c)).toEqual([]);
});
@@ -1165,9 +1196,8 @@ describe("DeDuplicateService core duplicate detection", () => {
});
it("supports internationalized domains via punycode", () => {
// The URL parser returns punycoded hostname for IDN.
const host = normalize("https://münich.example/secure");
expect(host).toMatch(/^xn--mnich-kva\.example$/); // Punycode of münich.example
expect(host).toMatch(/^xn--mnich-kva\.example$/);
});
describe("fallback regex path (forced URL parse failure)", () => {
@@ -1223,7 +1253,6 @@ describe("DeDuplicateService core duplicate detection", () => {
const u1 = buildCipher({ id: "2", name: "Same", username: "u", uris: [] });
const u2 = buildCipher({ id: "3", name: "Same", username: "u", uris: [] });
const sets = findSetsHostname([noUser, u1, u2]);
// Should only produce the username+name set for user "u"
const keys = sets.map((s) => s.key).sort();
expect(keys).toEqual(["username+name: u & Same"]);
const unameSet = sets[0];
@@ -1292,7 +1321,6 @@ describe("DeDuplicateService core duplicate detection", () => {
expect(keys).toEqual(["com.pkg"]);
});
// PSL-specific behavior
it("PSL: co.uk groups to example.co.uk across subdomains", () => {
const keys = keysFor(["https://a.b.example.co.uk", "http://example.co.uk"], "Base");
expect(keys).toEqual(["example.co.uk"]);

View File

@@ -24,7 +24,7 @@ interface DuplicateSet {
ciphers: CipherView[];
}
type UriMatchStrategy = "Base" | "Hostname" | "Host" | "Exact";
export type UriMatchStrategy = "Base" | "Hostname" | "Host" | "Exact";
interface ParsedUri {
original: string;
@@ -81,8 +81,9 @@ export class DeDuplicateService {
* Finds groups of ciphers (clusters) that are considered duplicates.
* A "group" or cluster is defined by ciphers sharing login.username and a matched login.uri
* (where match behavior is determined by DEFAULT_URI_STRATEGY)
* OR a matching login.username and normalized cipher.name,
* OR when no username is present, on matching normalized cipher.name only.
* OR a exact matching login.username and exact matching normalized cipher.name,
* OR when no username is present, on exact matching normalized cipher.name only
* where cipher name normalization involves stripping internal and external whitespace and lowercasing all characters.
* @param ciphers A list of all the user's ciphers.
* @returns An array of DuplicateSet objects, each representing a group of duplicates.
*/
@@ -94,7 +95,7 @@ export class DeDuplicateService {
const nameBuckets = new Map<string, CipherView[]>();
const nameOnlyBuckets = new Map<string, CipherView[]>(); // used in edge cases when no useername is present for a login
// DuplicateSet will be created to hold duplicate login ciphers as soon as two matching ci\hers appear in a bucket
// DuplicateSet will be created to hold duplicate login ciphers once two matching ci\hers appear in a bucket
const duplicateSets: DuplicateSet[] = [];
// Used to prevent redundant groupings for a given display key ['username+uri', 'username+name']
@@ -158,8 +159,9 @@ export class DeDuplicateService {
const displayKey = `username+name: ${username} & ${displayName}`;
ensureSetForBucket(bucket, displayKey);
} else {
// match on cipher.name only when username is absent
// match on cipher.name only when login.username is absent
// to prevent false positive duplicates in a situation where a user has multiple accounts on the same site - among others
// this logic will apply to all cipher types, not only logins
let bucket = nameOnlyBuckets.get(canonical);
if (!bucket) {
bucket = [];
@@ -377,6 +379,17 @@ export class DeDuplicateService {
}
}
/**
* Compute the "Base" key for a URI (registrable domain when possible).
*
* Order of operations:
* 1) androidapp -> package id.
* 2) IP/IPv6 -> return literal host (no domain logic).
* 3) Use PSL via Utils.getDomain(scheme://host) to resolve registrable domain for public suffixes.
* 4) Guard against over-grouping on likely private TLDs (e.g., .local, .internal, .lan, .corp, .home):
* when PSL returns only two labels but the original host has more labels, prefer the full host.
* 5) If PSL returns nothing (true miss), return the full host (do NOT fall back to last-two-label).
*/
private getBaseUri(p: ParsedUri): string {
if (p.androidPackage) {
return p.androidPackage;
@@ -385,27 +398,30 @@ export class DeDuplicateService {
if (!host) {
return "";
}
// IP addresses or localhost-like
// IP addresses or IPv6 literals: never reduce further
if (/^\d+\.\d+\.\d+\.\d+$/.test(host) || host.includes(":")) {
return host; // IPv4 or IPv6 literal
}
// Prefer PSL-aware registrable domain via shared Utils (tldts under the hood)
// Try with a normalized URL string first; fall back to heuristic if unavailable
try {
const scheme = p.scheme || "http";
const urlLike = `${scheme}://${host}`;
const domain = Utils.getDomain(urlLike);
if (domain) {
return domain.toLowerCase();
const scheme = p.scheme || "http";
const urlLike = `${scheme}://${host}`;
// Try PSL to get the registrable domain for public suffixes.
const domain = Utils.getDomain(urlLike)?.toLowerCase();
if (domain) {
// Avoid over-grouping for likely private TLDs where getDomain effectively collapses to last-two-label.
// Example: grafana.monitoring.svc.cluster.local would otherwise collapse to cluster.local.
const parts = host.split(".").filter(Boolean);
const dparts = domain.split(".").filter(Boolean);
const tld = dparts[dparts.length - 1];
const PRIVATE_SUFFIX_TLDS = new Set(["local", "internal", "lan", "corp", "home"]);
if (dparts.length === 2 && parts.length > 2 && PRIVATE_SUFFIX_TLDS.has(tld)) {
return host;
}
} catch {
// ignore and fall back
return domain;
}
const parts = host.split(".").filter(Boolean);
if (parts.length <= 2) {
return host;
}
return parts.slice(-2).join(".");
// PSL miss (no registrable domain): prefer full host to avoid over-grouping on private/internal DNS.
// Previous behavior used last-two-label; we intentionally avoid that here.
return host;
}
private getHostname(p: ParsedUri): string {
@@ -429,7 +445,7 @@ export class DeDuplicateService {
return `androidapp:${p.androidPackage}`;
}
const host = p.hostname || "";
const scheme = p.scheme || "http"; // if absent, we assumed http when parsing
const scheme = p.scheme || "http"; // Bitwarden convention assumes http when missing
const port = p.port ? `:${p.port}` : "";
const path = p.path || "";
const query = p.query || "";
@@ -440,23 +456,6 @@ export class DeDuplicateService {
return `${scheme}://${host}${port}${path}${query}${fragment}`.toLowerCase();
}
/**
* Backward-compatible helper retained for tests: returns normalized host from a URI-like string.
* - Lowercases
* - Strips userinfo, port, path, query, fragment
* - Supports IPv4, IPv6, androidapp package IDs
*/
private normalizeUri(raw: string): string {
const p = this.parseUri(raw);
if (!p) {
return "";
}
if (p.androidPackage) {
return p.androidPackage;
}
return (p.hostname || "").toLowerCase();
}
/**
* Handles the user interaction and server-side deletion of identified duplicates.
* This method prompts the user, checks permissions, and performs batch deletions.

View File

@@ -515,6 +515,10 @@
"message": "Domain name",
"description": "Domain name. Example: website.com"
},
"hostName" : {
"message": "Host name",
"description": "Host name includes subdomain but excludes port. Example: subdomain.website.com"
},
"host": {
"message": "Host",
"description": "A URL's host value. For example, the host of https://sub.domain.com:443 is 'sub.domain.com:443'."
@@ -11050,9 +11054,17 @@
"description": "Displayed in a header element and bit-callout to convey the purpose of the de-duplication tool."
},
"deDuplicationToolDescription": {
"message": "Login items that have matching usernames and websites will be made available for review and optional deletion. Deleting duplicate items not in the trash will move these items to the trash, and deleting items already in the trash will permanently remove them from your vault.",
"message": "This tool searches your vault and will present any logins with matching usernames, URIs (websites), and names. Duplicate items will be presented for review and optional deletion. Items selected for deletion will be moved to the vault trash; duplicates already in the trash will be permanently deleted.",
"description": "A message explaining what the de-duplication tool will do."
},
"uriMatchingStrategy": {
"message": "URI matching strategy",
"description": "Label for the selector controlling how URIs are compared when finding duplicates."
},
"uriMatchingStrategyHelp": {
"message": "Choose how URIs are compared when finding duplicates.",
"description": "Helper text explaining the URI matching strategy selector."
},
"findDuplicates": {
"message": "Find duplicates",
"description": "Displayed inside of a button to indicate that clicking the button will initiate a search for duplicate login items."