From ad4b01f315e2b53cf73d509a12cb137a86929d32 Mon Sep 17 00:00:00 2001 From: Leslie Tilton <23057410+Banrion@users.noreply.github.com> Date: Tue, 30 Dec 2025 11:06:30 -0600 Subject: [PATCH] [PM-28548] Phishing Blocker support links (#18070) * Change domain terminology to web addresses * Added phishing resource file * Finish renaming and adding runtime configuration for domains vs links setting * Update reference * Add matching functions per resource * correct URL matching logic for links-based detection Problem: The phishing link matcher was failing to detect known phishing URLs due to two issues: 1. Protocol mismatch: Entries in the phishing list use `http://` but users typically visit `https://` versions. The matcher was comparing full URLs including protocol, causing legitimate matches to fail. - List entry: `http://smartdapptradxx.pages.dev` - User visits: `https://smartdapptradxx.pages.dev/` - Result: No match (incorrect) 2. Hostname-only matching would have caused false positives: An earlier attempt to fix #1 included hostname-only comparison, which defeats the purpose of links-based detection. The goal of PM-28548 is precise URL matching to avoid blocking entire domains (like pages.dev, github.io) when only specific paths are malicious. Solution: - Always strip protocol (http:// or https://) from both entry and URL before comparison, treating them as equivalent - Remove hostname-only matching to maintain precision - Keep prefix matching for subpaths, query strings, and fragments --------- Co-authored-by: Alex (cherry picked from commit 800a21d8a3b65369528610ac79517e7a5792e48a) --- .../phishing-detection/phishing-resources.ts | 98 ++++++++++++++++ .../services/phishing-data.service.spec.ts | 62 +++++----- .../services/phishing-data.service.ts | 107 +++++++++--------- .../services/phishing-detection.service.ts | 2 +- 4 files changed, 186 insertions(+), 83 deletions(-) create mode 100644 apps/browser/src/dirt/phishing-detection/phishing-resources.ts diff --git a/apps/browser/src/dirt/phishing-detection/phishing-resources.ts b/apps/browser/src/dirt/phishing-detection/phishing-resources.ts new file mode 100644 index 00000000000..262d6cf833b --- /dev/null +++ b/apps/browser/src/dirt/phishing-detection/phishing-resources.ts @@ -0,0 +1,98 @@ +export type PhishingResource = { + name?: string; + remoteUrl: string; + checksumUrl: string; + todayUrl: string; + /** Matcher used to decide whether a given URL matches an entry from this resource */ + match: (url: URL, entry: string) => boolean; +}; + +export const PhishingResourceType = Object.freeze({ + Domains: "domains", + Links: "links", +} as const); + +export type PhishingResourceType = (typeof PhishingResourceType)[keyof typeof PhishingResourceType]; + +export const PHISHING_RESOURCES: Record = { + [PhishingResourceType.Domains]: [ + { + name: "Phishing.Database Domains", + remoteUrl: + "https://raw.githubusercontent.com/Phishing-Database/Phishing.Database/master/phishing-domains-ACTIVE.txt", + checksumUrl: + "https://raw.githubusercontent.com/Phishing-Database/checksums/refs/heads/master/phishing-domains-ACTIVE.txt.md5", + todayUrl: + "https://raw.githubusercontent.com/Phishing-Database/Phishing.Database/refs/heads/master/phishing-domains-NEW-today.txt", + match: (url: URL, entry: string) => { + if (!entry) { + return false; + } + const candidate = entry.trim().toLowerCase().replace(/\/$/, ""); + // If entry contains a scheme, strip it for comparison + const e = candidate.replace(/^https?:\/\//, ""); + // Compare against hostname or host+path + if (e === url.hostname.toLowerCase()) { + return true; + } + const urlNoProto = url.href + .toLowerCase() + .replace(/https?:\/\//, "") + .replace(/\/$/, ""); + return urlNoProto === e || urlNoProto.startsWith(e + "/"); + }, + }, + ], + [PhishingResourceType.Links]: [ + { + name: "Phishing.Database Links", + remoteUrl: + "https://raw.githubusercontent.com/Phishing-Database/Phishing.Database/master/phishing-links-ACTIVE.txt", + checksumUrl: + "https://raw.githubusercontent.com/Phishing-Database/checksums/refs/heads/master/phishing-links-ACTIVE.txt.md5", + todayUrl: + "https://raw.githubusercontent.com/Phishing-Database/Phishing.Database/refs/heads/master/phishing-links-NEW-today.txt", + match: (url: URL, entry: string) => { + if (!entry) { + return false; + } + // Basic HTML entity decode for common cases (the lists sometimes contain &) + const decodeHtml = (s: string) => s.replace(/&/g, "&"); + + const normalizedEntry = decodeHtml(entry.trim()).toLowerCase().replace(/\/$/, ""); + + // Normalize URL for comparison - always strip protocol for consistent matching + const normalizedUrl = decodeHtml(url.href).toLowerCase().replace(/\/$/, ""); + const urlNoProto = normalizedUrl.replace(/^https?:\/\//, ""); + + // Strip protocol from entry if present (http:// and https:// should be treated as equivalent) + const entryNoProto = normalizedEntry.replace(/^https?:\/\//, ""); + + // Compare full path (without protocol) - exact match + if (urlNoProto === entryNoProto) { + return true; + } + + // Check if URL starts with entry (prefix match for subpaths/query/hash) + // e.g., entry "site.com/phish" matches "site.com/phish/subpage" or "site.com/phish?id=1" + if ( + urlNoProto.startsWith(entryNoProto + "/") || + urlNoProto.startsWith(entryNoProto + "?") || + urlNoProto.startsWith(entryNoProto + "#") + ) { + return true; + } + + return false; + }, + }, + ], +}; + +export function getPhishingResources( + type: PhishingResourceType, + index = 0, +): PhishingResource | undefined { + const list = PHISHING_RESOURCES[type] ?? []; + return list[index]; +} diff --git a/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.spec.ts b/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.spec.ts index 94f3e99f8be..30aa947092d 100644 --- a/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.spec.ts +++ b/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.spec.ts @@ -25,7 +25,7 @@ describe("PhishingDataService", () => { }; let fetchChecksumSpy: jest.SpyInstance; - let fetchDomainsSpy: jest.SpyInstance; + let fetchWebAddressesSpy: jest.SpyInstance; beforeEach(() => { jest.useFakeTimers(); @@ -45,113 +45,113 @@ describe("PhishingDataService", () => { platformUtilsService, ); - fetchChecksumSpy = jest.spyOn(service as any, "fetchPhishingDomainsChecksum"); - fetchDomainsSpy = jest.spyOn(service as any, "fetchPhishingDomains"); + fetchChecksumSpy = jest.spyOn(service as any, "fetchPhishingChecksum"); + fetchWebAddressesSpy = jest.spyOn(service as any, "fetchPhishingWebAddresses"); }); - describe("isPhishingDomains", () => { - it("should detect a phishing domain", async () => { + describe("isPhishingWebAddress", () => { + it("should detect a phishing web address", async () => { setMockState({ - domains: ["phish.com", "badguy.net"], + webAddresses: ["phish.com", "badguy.net"], timestamp: Date.now(), checksum: "abc123", applicationVersion: "1.0.0", }); const url = new URL("http://phish.com"); - const result = await service.isPhishingDomain(url); + const result = await service.isPhishingWebAddress(url); expect(result).toBe(true); }); - it("should not detect a safe domain", async () => { + it("should not detect a safe web address", async () => { setMockState({ - domains: ["phish.com", "badguy.net"], + webAddresses: ["phish.com", "badguy.net"], timestamp: Date.now(), checksum: "abc123", applicationVersion: "1.0.0", }); const url = new URL("http://safe.com"); - const result = await service.isPhishingDomain(url); + const result = await service.isPhishingWebAddress(url); expect(result).toBe(false); }); - it("should match against root domain", async () => { + it("should match against root web address", async () => { setMockState({ - domains: ["phish.com", "badguy.net"], + webAddresses: ["phish.com", "badguy.net"], timestamp: Date.now(), checksum: "abc123", applicationVersion: "1.0.0", }); const url = new URL("http://phish.com/about"); - const result = await service.isPhishingDomain(url); + const result = await service.isPhishingWebAddress(url); expect(result).toBe(true); }); it("should not error on empty state", async () => { setMockState(undefined as any); const url = new URL("http://phish.com/about"); - const result = await service.isPhishingDomain(url); + const result = await service.isPhishingWebAddress(url); expect(result).toBe(false); }); }); - describe("getNextDomains", () => { - it("refetches all domains if applicationVersion has changed", async () => { + describe("getNextWebAddresses", () => { + it("refetches all web addresses if applicationVersion has changed", async () => { const prev: PhishingData = { - domains: ["a.com"], + webAddresses: ["a.com"], timestamp: Date.now() - 60000, checksum: "old", applicationVersion: "1.0.0", }; fetchChecksumSpy.mockResolvedValue("new"); - fetchDomainsSpy.mockResolvedValue(["d.com", "e.com"]); + fetchWebAddressesSpy.mockResolvedValue(["d.com", "e.com"]); platformUtilsService.getApplicationVersion.mockResolvedValue("2.0.0"); - const result = await service.getNextDomains(prev); + const result = await service.getNextWebAddresses(prev); - expect(result!.domains).toEqual(["d.com", "e.com"]); + expect(result!.webAddresses).toEqual(["d.com", "e.com"]); expect(result!.checksum).toBe("new"); expect(result!.applicationVersion).toBe("2.0.0"); }); it("only updates timestamp if checksum matches", async () => { const prev: PhishingData = { - domains: ["a.com"], + webAddresses: ["a.com"], timestamp: Date.now() - 60000, checksum: "abc", applicationVersion: "1.0.0", }; fetchChecksumSpy.mockResolvedValue("abc"); - const result = await service.getNextDomains(prev); - expect(result!.domains).toEqual(prev.domains); + const result = await service.getNextWebAddresses(prev); + expect(result!.webAddresses).toEqual(prev.webAddresses); expect(result!.checksum).toBe("abc"); expect(result!.timestamp).not.toBe(prev.timestamp); }); it("patches daily domains if cache is fresh", async () => { const prev: PhishingData = { - domains: ["a.com"], + webAddresses: ["a.com"], timestamp: Date.now() - 60000, checksum: "old", applicationVersion: "1.0.0", }; fetchChecksumSpy.mockResolvedValue("new"); - fetchDomainsSpy.mockResolvedValue(["b.com", "c.com"]); - const result = await service.getNextDomains(prev); - expect(result!.domains).toEqual(["a.com", "b.com", "c.com"]); + fetchWebAddressesSpy.mockResolvedValue(["b.com", "c.com"]); + const result = await service.getNextWebAddresses(prev); + expect(result!.webAddresses).toEqual(["a.com", "b.com", "c.com"]); expect(result!.checksum).toBe("new"); }); it("fetches all domains if cache is old", async () => { const prev: PhishingData = { - domains: ["a.com"], + webAddresses: ["a.com"], timestamp: Date.now() - 2 * 24 * 60 * 60 * 1000, checksum: "old", applicationVersion: "1.0.0", }; fetchChecksumSpy.mockResolvedValue("new"); - fetchDomainsSpy.mockResolvedValue(["d.com", "e.com"]); - const result = await service.getNextDomains(prev); - expect(result!.domains).toEqual(["d.com", "e.com"]); + fetchWebAddressesSpy.mockResolvedValue(["d.com", "e.com"]); + const result = await service.getNextWebAddresses(prev); + expect(result!.webAddresses).toEqual(["d.com", "e.com"]); expect(result!.checksum).toBe("new"); }); }); diff --git a/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.ts b/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.ts index 6e1bf07c647..21fe74f1873 100644 --- a/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.ts +++ b/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.ts @@ -20,14 +20,16 @@ import { ScheduledTaskNames, TaskSchedulerService } from "@bitwarden/common/plat import { LogService } from "@bitwarden/logging"; import { GlobalStateProvider, KeyDefinition, PHISHING_DETECTION_DISK } from "@bitwarden/state"; +import { getPhishingResources, PhishingResourceType } from "../phishing-resources"; + export type PhishingData = { - domains: string[]; + webAddresses: string[]; timestamp: number; checksum: string; /** * We store the application version to refetch the entire dataset on a new client release. - * This counteracts daily appends updates not removing inactive or false positive domains. + * This counteracts daily appends updates not removing inactive or false positive web addresses. */ applicationVersion: string; }; @@ -37,34 +39,27 @@ export const PHISHING_DOMAINS_KEY = new KeyDefinition( "phishingDomains", { deserializer: (value: PhishingData) => - value ?? { domains: [], timestamp: 0, checksum: "", applicationVersion: "" }, + value ?? { webAddresses: [], timestamp: 0, checksum: "", applicationVersion: "" }, }, ); -/** Coordinates fetching, caching, and patching of known phishing domains */ +/** Coordinates fetching, caching, and patching of known phishing web addresses */ export class PhishingDataService { - private static readonly RemotePhishingDatabaseUrl = - "https://raw.githubusercontent.com/Phishing-Database/Phishing.Database/master/phishing-domains-ACTIVE.txt"; - private static readonly RemotePhishingDatabaseChecksumUrl = - "https://raw.githubusercontent.com/Phishing-Database/checksums/refs/heads/master/phishing-domains-ACTIVE.txt.md5"; - private static readonly RemotePhishingDatabaseTodayUrl = - "https://raw.githubusercontent.com/Phishing-Database/Phishing.Database/refs/heads/master/phishing-domains-NEW-today.txt"; - - private _testDomains = this.getTestDomains(); + private _testWebAddresses = this.getTestWebAddresses(); private _cachedState = this.globalStateProvider.get(PHISHING_DOMAINS_KEY); - private _domains$ = this._cachedState.state$.pipe( + private _webAddresses$ = this._cachedState.state$.pipe( map( (state) => new Set( - (state?.domains?.filter((line) => line.trim().length > 0) ?? []).concat( - this._testDomains, + (state?.webAddresses?.filter((line) => line.trim().length > 0) ?? []).concat( + this._testWebAddresses, "phishing.testcategory.com", // Included for QA to test in prod ), ), ), ); - // How often are new domains added to the remote? + // How often are new web addresses added to the remote? readonly UPDATE_INTERVAL_DURATION = 24 * 60 * 60 * 1000; // 24 hours private _triggerUpdate$ = new Subject(); @@ -75,7 +70,7 @@ export class PhishingDataService { this._cachedState.state$.pipe( first(), // Only take the first value to avoid an infinite loop when updating the cache below switchMap(async (cachedState) => { - const next = await this.getNextDomains(cachedState); + const next = await this.getNextWebAddresses(cachedState); if (next) { await this._cachedState.update(() => next); this.logService.info(`[PhishingDataService] cache updated`); @@ -85,7 +80,7 @@ export class PhishingDataService { count: 3, delay: (err, count) => { this.logService.error( - `[PhishingDataService] Unable to update domains. Attempt ${count}.`, + `[PhishingDataService] Unable to update web addresses. Attempt ${count}.`, err, ); return timer(5 * 60 * 1000); // 5 minutes @@ -97,7 +92,7 @@ export class PhishingDataService { err: unknown /** Eslint actually crashed if you remove this type: https://github.com/cartant/eslint-plugin-rxjs/issues/122 */, ) => { this.logService.error( - "[PhishingDataService] Retries unsuccessful. Unable to update domains.", + "[PhishingDataService] Retries unsuccessful. Unable to update web addresses.", err, ); return EMPTY; @@ -114,6 +109,7 @@ export class PhishingDataService { private globalStateProvider: GlobalStateProvider, private logService: LogService, private platformUtilsService: PlatformUtilsService, + private resourceType: PhishingResourceType = PhishingResourceType.Links, ) { this.taskSchedulerService.registerTaskHandler(ScheduledTaskNames.phishingDomainUpdate, () => { this._triggerUpdate$.next(); @@ -125,22 +121,31 @@ export class PhishingDataService { } /** - * Checks if the given URL is a known phishing domain + * Checks if the given URL is a known phishing web address * * @param url The URL to check - * @returns True if the URL is a known phishing domain, false otherwise + * @returns True if the URL is a known phishing web address, false otherwise */ - async isPhishingDomain(url: URL): Promise { - const domains = await firstValueFrom(this._domains$); - const result = domains.has(url.hostname); - if (result) { - return true; + async isPhishingWebAddress(url: URL): Promise { + // Use domain (hostname) matching for domain resources, and link matching for links resources + const entries = await firstValueFrom(this._webAddresses$); + + const resource = getPhishingResources(this.resourceType); + if (resource && resource.match) { + for (const entry of entries) { + if (resource.match(url, entry)) { + return true; + } + } + return false; } - return false; + + // Default/domain behavior: exact hostname match as a fallback + return entries.has(url.hostname); } - async getNextDomains(prev: PhishingData | null): Promise { - prev = prev ?? { domains: [], timestamp: 0, checksum: "", applicationVersion: "" }; + async getNextWebAddresses(prev: PhishingData | null): Promise { + prev = prev ?? { webAddresses: [], timestamp: 0, checksum: "", applicationVersion: "" }; const timestamp = Date.now(); const prevAge = timestamp - prev.timestamp; this.logService.info(`[PhishingDataService] Cache age: ${prevAge}`); @@ -148,7 +153,7 @@ export class PhishingDataService { const applicationVersion = await this.platformUtilsService.getApplicationVersion(); // If checksum matches, return existing data with new timestamp & version - const remoteChecksum = await this.fetchPhishingDomainsChecksum(); + const remoteChecksum = await this.fetchPhishingChecksum(this.resourceType); if (remoteChecksum && prev.checksum === remoteChecksum) { this.logService.info( `[PhishingDataService] Remote checksum matches local checksum, updating timestamp only.`, @@ -157,66 +162,66 @@ export class PhishingDataService { } // Checksum is different, data needs to be updated. - // Approach 1: Fetch only new domains and append + // Approach 1: Fetch only new web addresses and append const isOneDayOldMax = prevAge <= this.UPDATE_INTERVAL_DURATION; if (isOneDayOldMax && applicationVersion === prev.applicationVersion) { - const dailyDomains: string[] = await this.fetchPhishingDomains( - PhishingDataService.RemotePhishingDatabaseTodayUrl, - ); + const webAddressesTodayUrl = getPhishingResources(this.resourceType)!.todayUrl; + const dailyWebAddresses: string[] = + await this.fetchPhishingWebAddresses(webAddressesTodayUrl); this.logService.info( - `[PhishingDataService] ${dailyDomains.length} new phishing domains added`, + `[PhishingDataService] ${dailyWebAddresses.length} new phishing web addresses added`, ); return { - domains: prev.domains.concat(dailyDomains), + webAddresses: prev.webAddresses.concat(dailyWebAddresses), checksum: remoteChecksum, timestamp, applicationVersion, }; } - // Approach 2: Fetch all domains - const domains = await this.fetchPhishingDomains(PhishingDataService.RemotePhishingDatabaseUrl); + // Approach 2: Fetch all web addresses + const remoteUrl = getPhishingResources(this.resourceType)!.remoteUrl; + const remoteWebAddresses = await this.fetchPhishingWebAddresses(remoteUrl); return { - domains, + webAddresses: remoteWebAddresses, timestamp, checksum: remoteChecksum, applicationVersion, }; } - private async fetchPhishingDomainsChecksum() { - const response = await this.apiService.nativeFetch( - new Request(PhishingDataService.RemotePhishingDatabaseChecksumUrl), - ); + private async fetchPhishingChecksum(type: PhishingResourceType = PhishingResourceType.Domains) { + const checksumUrl = getPhishingResources(type)!.checksumUrl; + const response = await this.apiService.nativeFetch(new Request(checksumUrl)); if (!response.ok) { throw new Error(`[PhishingDataService] Failed to fetch checksum: ${response.status}`); } return response.text(); } - private async fetchPhishingDomains(url: string) { + private async fetchPhishingWebAddresses(url: string) { const response = await this.apiService.nativeFetch(new Request(url)); if (!response.ok) { - throw new Error(`[PhishingDataService] Failed to fetch domains: ${response.status}`); + throw new Error(`[PhishingDataService] Failed to fetch web addresses: ${response.status}`); } return response.text().then((text) => text.split("\n")); } - private getTestDomains() { + private getTestWebAddresses() { const flag = devFlagEnabled("testPhishingUrls"); if (!flag) { return []; } - const domains = devFlagValue("testPhishingUrls") as unknown[]; - if (domains && domains instanceof Array) { + const webAddresses = devFlagValue("testPhishingUrls") as unknown[]; + if (webAddresses && webAddresses instanceof Array) { this.logService.debug( - "[PhishingDetectionService] Dev flag enabled for testing phishing detection. Adding test phishing domains:", - domains, + "[PhishingDetectionService] Dev flag enabled for testing phishing detection. Adding test phishing web addresses:", + webAddresses, ); - return domains as string[]; + return webAddresses as string[]; } return []; } diff --git a/apps/browser/src/dirt/phishing-detection/services/phishing-detection.service.ts b/apps/browser/src/dirt/phishing-detection/services/phishing-detection.service.ts index e04d08559ab..d90e872eef8 100644 --- a/apps/browser/src/dirt/phishing-detection/services/phishing-detection.service.ts +++ b/apps/browser/src/dirt/phishing-detection/services/phishing-detection.service.ts @@ -94,7 +94,7 @@ export class PhishingDetectionService { this._ignoredHostnames.delete(url.hostname); return; } - const isPhishing = await phishingDataService.isPhishingDomain(url); + const isPhishing = await phishingDataService.isPhishingWebAddress(url); if (!isPhishing) { return; }