From cb21aa9ff93e9bd9031c5842e6115b5d3bdb75e3 Mon Sep 17 00:00:00 2001 From: Leslie Tilton <23057410+Banrion@users.noreply.github.com> Date: Mon, 26 Jan 2026 17:05:42 -0600 Subject: [PATCH] [PM-31203] Change Phishing Url Check to use a Cursor Based Search (#18561) * Initial changes to look at phishing indexeddb service and removal of obsolete compression code * Convert background update to rxjs format and trigger via subject. Update test cases * Added addUrls function to use instead of saveUrls so appending daily does not clear all urls * Added debug logs to phishing-indexeddb service * Added a fallback url when downloading phishing url list * Remove obsolete comments * Fix testUrl default, false scenario and test cases * Add default return on isPhishingWebAddress * Added log statement * Change hostname to href in hasUrl check * Save fallback response * Fix matching subpaths in links. Update test cases * Fix meta data updates storing last checked instead of last updated * Update QA phishing url to be normalized * Filter web addresses * Return previous meta to keep subscription alive * Change indexeddb lookup from loading all to cursor search * fix(phishing): improve performance and fix URL matching in phishing detection Problem: The cursor-based search takes ~25 seconds to scan the entire phishing database. For non-phishing URLs (99% of cases), this full scan runs to completion every time. Before these fixes, opening a new tab triggered this sequence: 1. chrome://newtab/ fires a phishing check 2. Sequential concatMap blocks while cursor scans all 500k+ URLs (~25 sec) 3. User pastes actual URL and hits enter 4. That URL's check waits in queue behind the chrome:// check 5. Total delay: ~50+ seconds for a simple "open tab, paste link" workflow Even for legitimate phishing checks, the cursor search could take up to 25 seconds per URL when the fast hasUrl lookup misses due to trailing slash mismatches. Changes: phishing-data.service.ts: - Add protocol filter to early-return for non-http(s) URLs, avoiding expensive IndexedDB operations for chrome://, about:, file:// URLs - Add trailing slash normalization for hasUrl lookup - browsers add trailing slashes but DB entries may not have them, causing O(1) lookups to miss and fall back to O(n) cursor search unnecessarily - Add debug logging for hasUrl checks and timing metrics for cursor-based search to aid performance debugging phishing-detection.service.ts: - Replace concatMap with mergeMap for parallel tab processing - each tab check now runs independently instead of sequentially - Add concurrency limit of 5 to prevent overwhelming IndexedDB while still allowing parallel execution Result: - New tabs are instant (no IndexedDB calls for non-web URLs) - One slow phishing check doesn't block other tabs - Common URL patterns hit the fast O(1) path instead of O(n) cursor scan * performance debug logs * disable custom match because too slow * spec fix --------- Co-authored-by: Alex (cherry picked from commit 60c28dd182eb7cbdd73956eb19509976c1c875b5) --- .../phishing-detection/phishing-resources.ts | 4 + .../services/phishing-data.service.spec.ts | 506 ++++++++-------- .../services/phishing-data.service.ts | 543 ++++++++++-------- .../services/phishing-detection.service.ts | 54 +- .../phishing-indexeddb.service.spec.ts | 83 +++ .../services/phishing-indexeddb.service.ts | 54 ++ 6 files changed, 740 insertions(+), 504 deletions(-) diff --git a/apps/browser/src/dirt/phishing-detection/phishing-resources.ts b/apps/browser/src/dirt/phishing-detection/phishing-resources.ts index 4cd155c8ae3..c8dd083e6a6 100644 --- a/apps/browser/src/dirt/phishing-detection/phishing-resources.ts +++ b/apps/browser/src/dirt/phishing-detection/phishing-resources.ts @@ -5,6 +5,8 @@ export type PhishingResource = { todayUrl: string; /** Matcher used to decide whether a given URL matches an entry from this resource */ match: (url: URL, entry: string) => boolean; + /** Whether to use the custom matcher. If false, only exact hasUrl lookups are used. Default: true */ + useCustomMatcher?: boolean; }; export const PhishingResourceType = Object.freeze({ @@ -50,6 +52,8 @@ export const PHISHING_RESOURCES: Record { if (!entry) { return false; diff --git a/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.spec.ts b/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.spec.ts index c277b8d33f8..2d6c7a5a651 100644 --- a/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.spec.ts +++ b/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.spec.ts @@ -1,4 +1,5 @@ import { MockProxy, mock } from "jest-mock-extended"; +import { firstValueFrom } from "rxjs"; import { ApiService } from "@bitwarden/common/abstractions/api.service"; import { PlatformUtilsService } from "@bitwarden/common/platform/abstractions/platform-utils.service"; @@ -9,66 +10,8 @@ import { import { FakeGlobalStateProvider } from "@bitwarden/common/spec"; import { LogService } from "@bitwarden/logging"; -import { - PhishingDataService, - PHISHING_DOMAINS_META_KEY, - PHISHING_DOMAINS_BLOB_KEY, - PhishingDataMeta, - PhishingDataBlob, -} from "./phishing-data.service"; - -const flushPromises = () => - new Promise((resolve) => jest.requireActual("timers").setImmediate(resolve)); - -// [FIXME] Move mocking and compression helpers to a shared test utils library -// to separate from phishing data service tests. -export const setupPhishingMocks = (mockedResult: string | ArrayBuffer = "mocked-data") => { - // Store original globals - const originals = { - Response: global.Response, - CompressionStream: global.CompressionStream, - DecompressionStream: global.DecompressionStream, - Blob: global.Blob, - atob: global.atob, - btoa: global.btoa, - }; - - // Mock missing or browser-only globals - global.atob = (str) => Buffer.from(str, "base64").toString("binary"); - global.btoa = (str) => Buffer.from(str, "binary").toString("base64"); - - (global as any).CompressionStream = class {}; - (global as any).DecompressionStream = class {}; - - global.Blob = class { - constructor(public parts: any[]) {} - stream() { - return { pipeThrough: () => ({}) }; - } - } as any; - - global.Response = class { - body = { pipeThrough: () => ({}) }; - // Return string for decompression - text() { - return Promise.resolve(typeof mockedResult === "string" ? mockedResult : ""); - } - // Return ArrayBuffer for compression - arrayBuffer() { - if (typeof mockedResult === "string") { - const bytes = new TextEncoder().encode(mockedResult); - return Promise.resolve(bytes.buffer); - } - - return Promise.resolve(mockedResult); - } - } as any; - - // Cleanup function - return () => { - Object.assign(global, originals); - }; -}; +import { PHISHING_DOMAINS_META_KEY, PhishingDataService } from "./phishing-data.service"; +import type { PhishingIndexedDbService } from "./phishing-indexeddb.service"; describe("PhishingDataService", () => { let service: PhishingDataService; @@ -76,33 +19,31 @@ describe("PhishingDataService", () => { let taskSchedulerService: TaskSchedulerService; let logService: MockProxy; let platformUtilsService: MockProxy; + let mockIndexedDbService: MockProxy; const fakeGlobalStateProvider: FakeGlobalStateProvider = new FakeGlobalStateProvider(); - - const setMockMeta = (state: PhishingDataMeta) => { - fakeGlobalStateProvider.getFake(PHISHING_DOMAINS_META_KEY).stateSubject.next(state); - return state; - }; - const setMockBlob = (state: PhishingDataBlob) => { - fakeGlobalStateProvider.getFake(PHISHING_DOMAINS_BLOB_KEY).stateSubject.next(state); - return state; - }; - let fetchChecksumSpy: jest.SpyInstance; - let fetchAndCompressSpy: jest.SpyInstance; - - const mockMeta: PhishingDataMeta = { - checksum: "abc", - timestamp: Date.now(), - applicationVersion: "1.0.0", - }; - const mockBlob = "http://phish.com\nhttps://badguy.net"; - const mockCompressedBlob = - "H4sIAAAAAAAA/8vMTSzJzM9TSE7MLchJLElVyE9TyC9KSS1S0FFIz8hLz0ksSQUAtK7XMSYAAAA="; beforeEach(async () => { - jest.useFakeTimers(); + jest.clearAllMocks(); + + // Mock Request global if not available + if (typeof Request === "undefined") { + (global as any).Request = class { + constructor(public url: string) {} + }; + } + apiService = mock(); logService = mock(); + mockIndexedDbService = mock(); + + // Set default mock behaviors + mockIndexedDbService.hasUrl.mockResolvedValue(false); + mockIndexedDbService.loadAllUrls.mockResolvedValue([]); + mockIndexedDbService.findMatchingUrl.mockResolvedValue(false); + mockIndexedDbService.saveUrls.mockResolvedValue(undefined); + mockIndexedDbService.addUrls.mockResolvedValue(undefined); + mockIndexedDbService.saveUrlsFromStream.mockResolvedValue(undefined); platformUtilsService = mock(); platformUtilsService.getApplicationVersion.mockResolvedValue("1.0.0"); @@ -116,217 +57,308 @@ describe("PhishingDataService", () => { logService, platformUtilsService, ); - fetchChecksumSpy = jest.spyOn(service as any, "fetchPhishingChecksum"); - fetchAndCompressSpy = jest.spyOn(service as any, "fetchAndCompress"); + // Replace the IndexedDB service with our mock + service["indexedDbService"] = mockIndexedDbService; + + fetchChecksumSpy = jest.spyOn(service as any, "fetchPhishingChecksum"); fetchChecksumSpy.mockResolvedValue("new-checksum"); - fetchAndCompressSpy.mockResolvedValue("compressed-blob"); }); describe("initialization", () => { - beforeEach(() => { - jest.spyOn(service as any, "_compressString").mockResolvedValue(mockCompressedBlob); - jest.spyOn(service as any, "_decompressString").mockResolvedValue(mockBlob); + it("should initialize with IndexedDB service", () => { + expect(service["indexedDbService"]).toBeDefined(); }); - it("should perform background update", async () => { - platformUtilsService.getApplicationVersion.mockResolvedValue("1.0.x"); - jest - .spyOn(service as any, "getNextWebAddresses") - .mockResolvedValue({ meta: mockMeta, blob: mockBlob }); - - setMockBlob(mockBlob); - setMockMeta(mockMeta); - - const sub = service.update$.subscribe(); - await flushPromises(); - - const url = new URL("http://phish.com"); - const QAurl = new URL("http://phishing.testcategory.com"); + it("should detect QA test addresses - http protocol", async () => { + const url = new URL("http://phishing.testcategory.com"); expect(await service.isPhishingWebAddress(url)).toBe(true); - expect(await service.isPhishingWebAddress(QAurl)).toBe(true); + // IndexedDB should not be called for test addresses + expect(mockIndexedDbService.hasUrl).not.toHaveBeenCalled(); + }); - sub.unsubscribe(); + it("should detect QA test addresses - https protocol", async () => { + const url = new URL("https://phishing.testcategory.com"); + expect(await service.isPhishingWebAddress(url)).toBe(true); + expect(mockIndexedDbService.hasUrl).not.toHaveBeenCalled(); + }); + + it("should detect QA test addresses - specific subpath /block", async () => { + const url = new URL("https://phishing.testcategory.com/block"); + expect(await service.isPhishingWebAddress(url)).toBe(true); + expect(mockIndexedDbService.hasUrl).not.toHaveBeenCalled(); + }); + + it("should NOT detect QA test addresses - different subpath", async () => { + mockIndexedDbService.hasUrl.mockResolvedValue(false); + mockIndexedDbService.findMatchingUrl.mockResolvedValue(false); + + const url = new URL("https://phishing.testcategory.com/other"); + const result = await service.isPhishingWebAddress(url); + + // This should NOT be detected as a test address since only /block subpath is hardcoded + expect(result).toBe(false); + }); + + it("should detect QA test addresses - root path with trailing slash", async () => { + const url = new URL("https://phishing.testcategory.com/"); + const result = await service.isPhishingWebAddress(url); + + // This SHOULD be detected since URLs are normalized (trailing slash added to root URLs) + expect(result).toBe(true); + expect(mockIndexedDbService.hasUrl).not.toHaveBeenCalled(); }); }); describe("isPhishingWebAddress", () => { - beforeEach(() => { - jest.spyOn(service as any, "_compressString").mockResolvedValue(mockCompressedBlob); - jest.spyOn(service as any, "_decompressString").mockResolvedValue(mockBlob); - }); + it("should detect a phishing web address using quick hasUrl lookup", async () => { + // Mock hasUrl to return true for direct hostname match + mockIndexedDbService.hasUrl.mockResolvedValue(true); - it("should detect a phishing web address", async () => { - service["_webAddressesSet"] = new Set(["phish.com", "badguy.net"]); - - const url = new URL("http://phish.com"); + const url = new URL("http://phish.com/testing-param"); const result = await service.isPhishingWebAddress(url); expect(result).toBe(true); + expect(mockIndexedDbService.hasUrl).toHaveBeenCalledWith("http://phish.com/testing-param"); + // Should not fall back to custom matcher when hasUrl returns true + expect(mockIndexedDbService.findMatchingUrl).not.toHaveBeenCalled(); + }); + + it("should return false when hasUrl returns false (custom matcher disabled)", async () => { + // Mock hasUrl to return false (no direct href match) + mockIndexedDbService.hasUrl.mockResolvedValue(false); + + const url = new URL("http://phish.com/path"); + const result = await service.isPhishingWebAddress(url); + + // Custom matcher is currently disabled (useCustomMatcher: false), so result is false + expect(result).toBe(false); + expect(mockIndexedDbService.hasUrl).toHaveBeenCalledWith("http://phish.com/path"); + // Custom matcher should NOT be called since it's disabled + expect(mockIndexedDbService.findMatchingUrl).not.toHaveBeenCalled(); }); it("should not detect a safe web address", async () => { - service["_webAddressesSet"] = new Set(["phish.com", "badguy.net"]); + // Mock hasUrl to return false + mockIndexedDbService.hasUrl.mockResolvedValue(false); + const url = new URL("http://safe.com"); const result = await service.isPhishingWebAddress(url); + expect(result).toBe(false); + expect(mockIndexedDbService.hasUrl).toHaveBeenCalledWith("http://safe.com/"); + // Custom matcher is disabled, so findMatchingUrl should NOT be called + expect(mockIndexedDbService.findMatchingUrl).not.toHaveBeenCalled(); }); - it("should match against root web address", async () => { - service["_webAddressesSet"] = new Set(["phish.com", "badguy.net"]); - const url = new URL("http://phish.com/about"); + it("should not match against root web address with subpaths (custom matcher disabled)", async () => { + // Mock hasUrl to return false (no direct href match) + mockIndexedDbService.hasUrl.mockResolvedValue(false); + + const url = new URL("http://phish.com/login/page"); const result = await service.isPhishingWebAddress(url); - expect(result).toBe(true); + + expect(result).toBe(false); + expect(mockIndexedDbService.hasUrl).toHaveBeenCalledWith("http://phish.com/login/page"); + // Custom matcher is disabled, so findMatchingUrl should NOT be called + expect(mockIndexedDbService.findMatchingUrl).not.toHaveBeenCalled(); }); - it("should not error on empty state", async () => { - service["_webAddressesSet"] = null; + it("should not match against root web address with different subpaths (custom matcher disabled)", async () => { + // Mock hasUrl to return false (no direct hostname match) + mockIndexedDbService.hasUrl.mockResolvedValue(false); + + const url = new URL("http://phish.com/login/page2"); + const result = await service.isPhishingWebAddress(url); + + expect(result).toBe(false); + expect(mockIndexedDbService.hasUrl).toHaveBeenCalledWith("http://phish.com/login/page2"); + // Custom matcher is disabled, so findMatchingUrl should NOT be called + expect(mockIndexedDbService.findMatchingUrl).not.toHaveBeenCalled(); + }); + + it("should handle IndexedDB errors gracefully", async () => { + // Mock hasUrl to throw error + mockIndexedDbService.hasUrl.mockRejectedValue(new Error("hasUrl error")); + const url = new URL("http://phish.com/about"); const result = await service.isPhishingWebAddress(url); + expect(result).toBe(false); + expect(logService.error).toHaveBeenCalledWith( + "[PhishingDataService] IndexedDB lookup via hasUrl failed", + expect.any(Error), + ); + // Custom matcher is disabled, so no custom matcher error is expected + expect(mockIndexedDbService.findMatchingUrl).not.toHaveBeenCalled(); }); }); - describe("getNextWebAddresses", () => { - beforeEach(() => { - jest.spyOn(service as any, "_compressString").mockResolvedValue(mockCompressedBlob); - jest.spyOn(service as any, "_decompressString").mockResolvedValue(mockBlob); + describe("data updates", () => { + it("should update full dataset via stream", async () => { + // Mock full dataset update + const mockResponse = { + ok: true, + body: {} as ReadableStream, + } as Response; + apiService.nativeFetch.mockResolvedValue(mockResponse); + + await firstValueFrom(service["_updateFullDataSet"]()); + + expect(mockIndexedDbService.saveUrlsFromStream).toHaveBeenCalled(); }); - it("refetches all web addresses if applicationVersion has changed", async () => { - const prev: PhishingDataMeta = { - timestamp: Date.now() - 60000, - checksum: "old", - applicationVersion: "1.0.0", - }; - fetchChecksumSpy.mockResolvedValue("new"); + it("should update daily dataset via addUrls", async () => { + // Mock daily update + const mockResponse = { + ok: true, + text: jest.fn().mockResolvedValue("newphish.com\nanotherbad.net"), + } as unknown as Response; + apiService.nativeFetch.mockResolvedValue(mockResponse); + + await firstValueFrom(service["_updateDailyDataSet"]()); + + expect(mockIndexedDbService.addUrls).toHaveBeenCalledWith(["newphish.com", "anotherbad.net"]); + }); + + it("should get updated meta information", async () => { + fetchChecksumSpy.mockResolvedValue("new-checksum"); platformUtilsService.getApplicationVersion.mockResolvedValue("2.0.0"); - const result = await service.getNextWebAddresses(prev); + const meta = await firstValueFrom(service["_getUpdatedMeta"]()); - expect(result!.blob).toBe("compressed-blob"); - expect(result!.meta!.checksum).toBe("new"); - expect(result!.meta!.applicationVersion).toBe("2.0.0"); - }); - - it("returns null when checksum matches and cache not expired", async () => { - const prev: PhishingDataMeta = { - timestamp: Date.now(), - checksum: "abc", - applicationVersion: "1.0.0", - }; - fetchChecksumSpy.mockResolvedValue("abc"); - const result = await service.getNextWebAddresses(prev); - expect(result).toBeNull(); - }); - - it("patches daily domains when cache is expired and checksum unchanged", async () => { - const prev: PhishingDataMeta = { - timestamp: 0, - checksum: "old", - applicationVersion: "1.0.0", - }; - const dailyLines = ["b.com", "c.com"]; - fetchChecksumSpy.mockResolvedValue("old"); - jest.spyOn(service as any, "fetchText").mockResolvedValue(dailyLines); - - setMockBlob(mockBlob); - - const expectedBlob = - "H4sIAAAAAAAA/8vMTSzJzM9TSE7MLchJLElVyE9TyC9KSS1S0FFIz8hLz0ksSQUAtK7XMSYAAAA="; - const result = await service.getNextWebAddresses(prev); - - expect(result!.blob).toBe(expectedBlob); - expect(result!.meta!.checksum).toBe("old"); - }); - - it("fetches all domains when checksum has changed", async () => { - const prev: PhishingDataMeta = { - timestamp: 0, - checksum: "old", - applicationVersion: "1.0.0", - }; - fetchChecksumSpy.mockResolvedValue("new"); - fetchAndCompressSpy.mockResolvedValue("new-blob"); - const result = await service.getNextWebAddresses(prev); - expect(result!.blob).toBe("new-blob"); - expect(result!.meta!.checksum).toBe("new"); + expect(meta).toBeDefined(); + expect(meta.checksum).toBe("new-checksum"); + expect(meta.applicationVersion).toBe("2.0.0"); + expect(meta.timestamp).toBeDefined(); }); }); - describe("compression helpers", () => { - let restore: () => void; + describe("phishing meta data updates", () => { + it("should not update metadata when no data updates occur", async () => { + // Set up existing metadata + const existingMeta = { + checksum: "existing-checksum", + timestamp: Date.now() - 1000, // 1 second ago (not expired) + applicationVersion: "1.0.0", + }; + await fakeGlobalStateProvider.get(PHISHING_DOMAINS_META_KEY).update(() => existingMeta); - beforeEach(async () => { - restore = setupPhishingMocks("abc"); + // Mock conditions where no update is needed + fetchChecksumSpy.mockResolvedValue("existing-checksum"); // Same checksum + platformUtilsService.getApplicationVersion.mockResolvedValue("1.0.0"); // Same version + const mockResponse = { + ok: true, + body: {} as ReadableStream, + } as Response; + apiService.nativeFetch.mockResolvedValue(mockResponse); + + // Trigger background update + const result = await firstValueFrom(service["_backgroundUpdate"](existingMeta)); + + // Verify metadata was NOT updated (same reference returned) + expect(result).toEqual(existingMeta); + expect(result?.timestamp).toBe(existingMeta.timestamp); + + // Verify no data updates were performed + expect(mockIndexedDbService.saveUrlsFromStream).not.toHaveBeenCalled(); + expect(mockIndexedDbService.addUrls).not.toHaveBeenCalled(); }); - afterEach(() => { - if (restore) { - restore(); - } - delete (Uint8Array as any).fromBase64; - jest.restoreAllMocks(); + it("should update metadata when full dataset update occurs due to checksum change", async () => { + // Set up existing metadata + const existingMeta = { + checksum: "old-checksum", + timestamp: Date.now() - 1000, + applicationVersion: "1.0.0", + }; + await fakeGlobalStateProvider.get(PHISHING_DOMAINS_META_KEY).update(() => existingMeta); + + // Mock conditions for full update + fetchChecksumSpy.mockResolvedValue("new-checksum"); // Different checksum + platformUtilsService.getApplicationVersion.mockResolvedValue("1.0.0"); + const mockResponse = { + ok: true, + body: {} as ReadableStream, + } as Response; + apiService.nativeFetch.mockResolvedValue(mockResponse); + + // Trigger background update + const result = await firstValueFrom(service["_backgroundUpdate"](existingMeta)); + + // Verify metadata WAS updated with new values + expect(result?.checksum).toBe("new-checksum"); + expect(result?.timestamp).toBeGreaterThan(existingMeta.timestamp); + + // Verify full update was performed + expect(mockIndexedDbService.saveUrlsFromStream).toHaveBeenCalled(); + expect(mockIndexedDbService.addUrls).not.toHaveBeenCalled(); // Daily should not run }); - describe("_compressString", () => { - it("compresses a string to base64", async () => { - const out = await service["_compressString"]("abc"); - expect(out).toBe("YWJj"); // base64 for 'abc' - }); + it("should update metadata when full dataset update occurs due to version change", async () => { + // Set up existing metadata + const existingMeta = { + checksum: "same-checksum", + timestamp: Date.now() - 1000, + applicationVersion: "1.0.0", + }; + await fakeGlobalStateProvider.get(PHISHING_DOMAINS_META_KEY).update(() => existingMeta); - it("compresses using fallback on older browsers", async () => { - const input = "abc"; - const expected = btoa(encodeURIComponent(input)); - const out = await service["_compressString"](input); - expect(out).toBe(expected); - }); + // Mock conditions for full update + fetchChecksumSpy.mockResolvedValue("same-checksum"); + platformUtilsService.getApplicationVersion.mockResolvedValue("2.0.0"); // Different version + const mockResponse = { + ok: true, + body: {} as ReadableStream, + } as Response; + apiService.nativeFetch.mockResolvedValue(mockResponse); - it("compresses using btoa on error", async () => { - const input = "abc"; - const expected = btoa(encodeURIComponent(input)); - const out = await service["_compressString"](input); - expect(out).toBe(expected); - }); + // Trigger background update + const result = await firstValueFrom(service["_backgroundUpdate"](existingMeta)); + + // Verify metadata WAS updated + expect(result?.applicationVersion).toBe("2.0.0"); + expect(result?.timestamp).toBeGreaterThan(existingMeta.timestamp); + + // Verify full update was performed + expect(mockIndexedDbService.saveUrlsFromStream).toHaveBeenCalled(); + expect(mockIndexedDbService.addUrls).not.toHaveBeenCalled(); }); - describe("_decompressString", () => { - it("decompresses a string from base64", async () => { - const base64 = btoa("ignored"); - const out = await service["_decompressString"](base64); - expect(out).toBe("abc"); - }); - it("decompresses using fallback on older browsers", async () => { - // Provide a fromBase64 implementation - (Uint8Array as any).fromBase64 = (b64: string) => new Uint8Array([100, 101, 102]); + it("should update metadata when daily update occurs due to cache expiration", async () => { + // Set up existing metadata (expired cache) + const existingMeta = { + checksum: "same-checksum", + timestamp: Date.now() - 25 * 60 * 60 * 1000, // 25 hours ago (expired) + applicationVersion: "1.0.0", + }; + await fakeGlobalStateProvider.get(PHISHING_DOMAINS_META_KEY).update(() => existingMeta); - const out = await service["_decompressString"]("ignored"); - expect(out).toBe("abc"); - }); + // Mock conditions for daily update only + fetchChecksumSpy.mockResolvedValue("same-checksum"); // Same checksum (no full update) + platformUtilsService.getApplicationVersion.mockResolvedValue("1.0.0"); // Same version + const mockFullResponse = { + ok: true, + body: {} as ReadableStream, + } as Response; + const mockDailyResponse = { + ok: true, + text: jest.fn().mockResolvedValue("newdomain.com"), + } as unknown as Response; + apiService.nativeFetch + .mockResolvedValueOnce(mockFullResponse) + .mockResolvedValueOnce(mockDailyResponse); - it("decompresses using atob on error", async () => { - const base64 = btoa(encodeURIComponent("abc")); - const out = await service["_decompressString"](base64); - expect(out).toBe("abc"); - }); - }); - }); + // Trigger background update + const result = await firstValueFrom(service["_backgroundUpdate"](existingMeta)); - describe("_loadBlobToMemory", () => { - it("loads blob into memory set", async () => { - const prevBlob = "ignored-base64"; - fakeGlobalStateProvider.getFake(PHISHING_DOMAINS_BLOB_KEY).stateSubject.next(prevBlob); + // Verify metadata WAS updated + expect(result?.timestamp).toBeGreaterThan(existingMeta.timestamp); + expect(result?.checksum).toBe("same-checksum"); - jest.spyOn(service as any, "_decompressString").mockResolvedValue("phish.com\nbadguy.net"); - - // Trigger the load pipeline and allow async RxJS processing to complete - service["_loadBlobToMemory"](); - await flushPromises(); - - const set = service["_webAddressesSet"] as Set; - expect(set).toBeDefined(); - expect(set.has("phish.com")).toBe(true); - expect(set.has("badguy.net")).toBe(true); + // Verify only daily update was performed + expect(mockIndexedDbService.saveUrlsFromStream).not.toHaveBeenCalled(); + expect(mockIndexedDbService.addUrls).toHaveBeenCalledWith(["newdomain.com"]); }); }); }); diff --git a/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.ts b/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.ts index 7d5f04cc276..c34a94ecced 100644 --- a/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.ts +++ b/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.ts @@ -1,17 +1,25 @@ import { catchError, + concatMap, + defer, EMPTY, + exhaustMap, first, - firstValueFrom, + forkJoin, from, + iif, + map, + Observable, of, + retry, share, takeUntil, startWith, Subject, switchMap, tap, - map, + throwError, + timer, } from "rxjs"; import { devFlagEnabled, devFlagValue } from "@bitwarden/browser/platform/flags"; @@ -23,6 +31,8 @@ import { GlobalStateProvider, KeyDefinition, PHISHING_DETECTION_DISK } from "@bi import { getPhishingResources, PhishingResourceType } from "../phishing-resources"; +import { PhishingIndexedDbService } from "./phishing-indexeddb.service"; + /** * Metadata about the phishing data set */ @@ -73,19 +83,16 @@ export class PhishingDataService { // We are adding the destroy to guard against accidental leaks. private _destroy$ = new Subject(); - private _testWebAddresses = this.getTestWebAddresses().concat("phishing.testcategory.com"); // Included for QA to test in prod + private _testWebAddresses = this.getTestWebAddresses(); private _phishingMetaState = this.globalStateProvider.get(PHISHING_DOMAINS_META_KEY); - private _phishingBlobState = this.globalStateProvider.get(PHISHING_DOMAINS_BLOB_KEY); - // In-memory set loaded from blob for fast lookups without reading large storage repeatedly - private _webAddressesSet: Set | null = null; - // Loading variables for web addresses set - // Triggers a load for _webAddressesSet - private _loadTrigger$ = new Subject(); + private indexedDbService: PhishingIndexedDbService; // How often are new web addresses added to the remote? readonly UPDATE_INTERVAL_DURATION = 24 * 60 * 60 * 1000; // 24 hours + private _backgroundUpdateTrigger$ = new Subject(); + private _triggerUpdate$ = new Subject(); update$ = this._triggerUpdate$.pipe( startWith(undefined), // Always emit once @@ -93,12 +100,8 @@ export class PhishingDataService { this._phishingMetaState.state$.pipe( first(), // Only take the first value to avoid an infinite loop when updating the cache below tap((metaState) => { - // Initial loading of web addresses set if not already loaded - if (!this._webAddressesSet) { - this._loadBlobToMemory(); - } - // Perform any updates in the background if needed - void this._backgroundUpdate(metaState); + // Perform any updates in the background + this._backgroundUpdateTrigger$.next(metaState); }), catchError((err: unknown) => { this.logService.error("[PhishingDataService] Background update failed to start.", err); @@ -106,7 +109,6 @@ export class PhishingDataService { }), ), ), - // Stop emitting when dispose() is called takeUntil(this._destroy$), share(), ); @@ -120,6 +122,7 @@ export class PhishingDataService { private resourceType: PhishingResourceType = PhishingResourceType.Links, ) { this.logService.debug("[PhishingDataService] Initializing service..."); + this.indexedDbService = new PhishingIndexedDbService(this.logService); this.taskSchedulerService.registerTaskHandler(ScheduledTaskNames.phishingDomainUpdate, () => { this._triggerUpdate$.next(); }); @@ -127,18 +130,20 @@ export class PhishingDataService { ScheduledTaskNames.phishingDomainUpdate, this.UPDATE_INTERVAL_DURATION, ); - this._setupLoadPipeline(); + this._backgroundUpdateTrigger$ + .pipe( + exhaustMap((currentMeta) => { + return this._backgroundUpdate(currentMeta); + }), + takeUntil(this._destroy$), + ) + .subscribe(); } dispose(): void { // Signal all pipelines to stop and unsubscribe stored subscriptions this._destroy$.next(); this._destroy$.complete(); - - // Clear web addresses set from memory - if (this._webAddressesSet !== null) { - this._webAddressesSet = null; - } } /** @@ -148,105 +153,120 @@ export class PhishingDataService { * @returns True if the URL is a known phishing web address, false otherwise */ async isPhishingWebAddress(url: URL): Promise { - if (!this._webAddressesSet) { - this.logService.debug("[PhishingDataService] Set not loaded; skipping check"); + this.logService.debug("[PhishingDataService] isPhishingWebAddress called for: " + url.href); + + // Skip non-http(s) protocols - phishing database only contains web URLs + // This prevents expensive fallback checks for chrome://, about:, file://, etc. + if (url.protocol !== "http:" && url.protocol !== "https:") { + this.logService.debug("[PhishingDataService] Skipping non-http(s) protocol: " + url.protocol); return false; } - const set = this._webAddressesSet!; + // Quick check for QA/dev test addresses + if (this._testWebAddresses.includes(url.href)) { + this.logService.info("[PhishingDataService] Found test web address: " + url.href); + return true; + } + const resource = getPhishingResources(this.resourceType); - // Custom matcher per resource - if (resource && resource?.match) { - for (const entry of set) { - if (resource.match(url, entry)) { - return true; - } + try { + // Quick lookup: check direct presence of href in IndexedDB + // Also check without trailing slash since browsers add it but DB entries may not have it + const urlHref = url.href; + const urlWithoutTrailingSlash = urlHref.endsWith("/") ? urlHref.slice(0, -1) : null; + + this.logService.debug("[PhishingDataService] Checking hasUrl on this string: " + urlHref); + let hasUrl = await this.indexedDbService.hasUrl(urlHref); + + // If not found and URL has trailing slash, try without it + if (!hasUrl && urlWithoutTrailingSlash) { + this.logService.debug( + "[PhishingDataService] Checking hasUrl without trailing slash: " + + urlWithoutTrailingSlash, + ); + hasUrl = await this.indexedDbService.hasUrl(urlWithoutTrailingSlash); } - return false; + + if (hasUrl) { + this.logService.info( + "[PhishingDataService] Found phishing web address through direct lookup: " + urlHref, + ); + return true; + } + } catch (err) { + this.logService.error("[PhishingDataService] IndexedDB lookup via hasUrl failed", err); } - // Default set-based lookup - return set.has(url.hostname); - } - - async getNextWebAddresses( - previous: PhishingDataMeta | null, - ): Promise | null> { - const prevMeta = previous ?? { timestamp: 0, checksum: "", applicationVersion: "" }; - const now = Date.now(); - - // Updates to check - const applicationVersion = await this.platformUtilsService.getApplicationVersion(); - const remoteChecksum = await this.fetchPhishingChecksum(this.resourceType); - - // Logic checks - const appVersionChanged = applicationVersion !== prevMeta.applicationVersion; - const masterChecksumChanged = remoteChecksum !== prevMeta.checksum; - - // Check for full updated - if (masterChecksumChanged || appVersionChanged) { - this.logService.info("[PhishingDataService] Checksum or version changed; Fetching ALL."); - const remoteUrl = getPhishingResources(this.resourceType)!.remoteUrl; - const blob = await this.fetchAndCompress(remoteUrl); - return { - blob, - meta: { checksum: remoteChecksum, timestamp: now, applicationVersion }, - }; - } - - // Check for daily file - const isCacheExpired = now - prevMeta.timestamp > this.UPDATE_INTERVAL_DURATION; - - if (isCacheExpired) { - this.logService.info("[PhishingDataService] Daily cache expired; Fetching TODAY's"); - const url = getPhishingResources(this.resourceType)!.todayUrl; - const newLines = await this.fetchText(url); - const prevBlob = (await firstValueFrom(this._phishingBlobState.state$)) ?? ""; - const oldText = prevBlob ? await this._decompressString(prevBlob) : ""; - - // Join the new lines to the existing list - const combined = (oldText ? oldText + "\n" : "") + newLines.join("\n"); - - return { - blob: await this._compressString(combined), - meta: { - checksum: remoteChecksum, - timestamp: now, // Reset the timestamp - applicationVersion, - }, - }; - } - - return null; + // If a custom matcher is provided and enabled, use cursor-based search. + // This avoids loading all URLs into memory and allows early exit on first match. + // Can be disabled via useCustomMatcher: false for performance reasons. + if (resource && resource.match && resource.useCustomMatcher !== false) { + try { + this.logService.debug( + "[PhishingDataService] Starting cursor-based search for: " + url.href, + ); + const startTime = performance.now(); + + const found = await this.indexedDbService.findMatchingUrl((entry) => + resource.match(url, entry), + ); + + const endTime = performance.now(); + const duration = (endTime - startTime).toFixed(2); + this.logService.debug( + `[PhishingDataService] Cursor-based search completed in ${duration}ms for: ${url.href} (found: ${found})`, + ); + + if (found) { + this.logService.info( + "[PhishingDataService] Found phishing web address through custom matcher: " + url.href, + ); + } else { + this.logService.debug( + "[PhishingDataService] No match found, returning false for: " + url.href, + ); + } + return found; + } catch (err) { + this.logService.error("[PhishingDataService] Error running custom matcher", err); + this.logService.debug( + "[PhishingDataService] Returning false due to error for: " + url.href, + ); + return false; + } + } + this.logService.debug( + "[PhishingDataService] No custom matcher, returning false for: " + url.href, + ); + return false; } + // [FIXME] Pull fetches into api service private async fetchPhishingChecksum(type: PhishingResourceType = PhishingResourceType.Domains) { const checksumUrl = getPhishingResources(type)!.checksumUrl; - const response = await this.apiService.nativeFetch(new Request(checksumUrl)); - if (!response.ok) { - throw new Error(`[PhishingDataService] Failed to fetch checksum: ${response.status}`); - } - return response.text(); - } - private async fetchAndCompress(url: string): Promise { - const response = await this.apiService.nativeFetch(new Request(url)); - if (!response.ok) { - throw new Error("Fetch failed"); - } + this.logService.debug(`[PhishingDataService] Fetching checksum from: ${checksumUrl}`); - const downloadStream = response.body!; - // Pipe through CompressionStream while it's downloading - const compressedStream = downloadStream.pipeThrough(new CompressionStream("gzip")); - // Convert to ArrayBuffer - const buffer = await new Response(compressedStream).arrayBuffer(); - const bytes = new Uint8Array(buffer); + try { + const response = await this.apiService.nativeFetch(new Request(checksumUrl)); + if (!response.ok) { + throw new Error( + `[PhishingDataService] Failed to fetch checksum: ${response.status} ${response.statusText}`, + ); + } - // Return as Base64 for storage - return (bytes as any).toBase64 ? (bytes as any).toBase64() : this._uint8ToBase64Fallback(bytes); + return await response.text(); + } catch (error) { + this.logService.error( + `[PhishingDataService] Checksum fetch failed from ${checksumUrl}`, + error, + ); + throw error; + } } - private async fetchText(url: string) { + // [FIXME] Pull fetches into api service + private async fetchToday(url: string) { const response = await this.apiService.nativeFetch(new Request(url)); if (!response.ok) { @@ -258,171 +278,196 @@ export class PhishingDataService { private getTestWebAddresses() { const flag = devFlagEnabled("testPhishingUrls"); + // Normalize URLs by converting to URL object and back to ensure consistent format (e.g., trailing slashes) + const testWebAddresses: string[] = [ + new URL("http://phishing.testcategory.com").href, + new URL("https://phishing.testcategory.com").href, + new URL("https://phishing.testcategory.com/block").href, + ]; if (!flag) { - return []; + return testWebAddresses; } const webAddresses = devFlagValue("testPhishingUrls") as unknown[]; if (webAddresses && webAddresses instanceof Array) { this.logService.debug( - "[PhishingDetectionService] Dev flag enabled for testing phishing detection. Adding test phishing web addresses:", + "[PhishingDataService] Dev flag enabled for testing phishing detection. Adding test phishing web addresses:", webAddresses, ); - return webAddresses as string[]; + // Normalize dev flag URLs as well, filtering out invalid ones + const normalizedDevAddresses = (webAddresses as string[]) + .filter((addr) => { + try { + new URL(addr); + return true; + } catch { + this.logService.warning( + `[PhishingDataService] Invalid test URL in dev flag, skipping: ${addr}`, + ); + return false; + } + }) + .map((addr) => new URL(addr).href); + return testWebAddresses.concat(normalizedDevAddresses); } - return []; + return testWebAddresses; } - // Runs the update flow in the background and retries up to 3 times on failure - private async _backgroundUpdate(previous: PhishingDataMeta | null): Promise { - this.logService.info(`[PhishingDataService] Update web addresses triggered...`); - const phishingMeta: PhishingDataMeta = previous ?? { - timestamp: 0, - checksum: "", - applicationVersion: "", - }; - // Start time for logging performance of update - const startTime = Date.now(); - const maxAttempts = 3; - const delayMs = 5 * 60 * 1000; // 5 minutes + private _getUpdatedMeta(): Observable { + return defer(() => { + const now = Date.now(); - for (let attempt = 1; attempt <= maxAttempts; attempt++) { - try { - const next = await this.getNextWebAddresses(phishingMeta); - if (!next) { - return; // No update needed - } + return forkJoin({ + applicationVersion: from(this.platformUtilsService.getApplicationVersion()), + remoteChecksum: from(this.fetchPhishingChecksum(this.resourceType)), + }).pipe( + map(({ applicationVersion, remoteChecksum }) => { + return { + checksum: remoteChecksum, + timestamp: now, + applicationVersion, + }; + }), + ); + }); + } - if (next.meta) { - await this._phishingMetaState.update(() => next!.meta!); - } - if (next.blob) { - await this._phishingBlobState.update(() => next!.blob!); - this._loadBlobToMemory(); - } + // Streams the full phishing data set and saves it to IndexedDB + private _updateFullDataSet() { + const resource = getPhishingResources(this.resourceType); - // Performance logging - const elapsed = Date.now() - startTime; - this.logService.info(`[PhishingDataService] Phishing data cache updated in ${elapsed}ms`); - } catch (err) { - this.logService.error( - `[PhishingDataService] Unable to update web addresses. Attempt ${attempt}.`, - err, - ); - if (attempt < maxAttempts) { - await new Promise((res) => setTimeout(res, delayMs)); - } else { - const elapsed = Date.now() - startTime; - this.logService.error( - `[PhishingDataService] Retries unsuccessful after ${elapsed}ms. Unable to update web addresses.`, - err, + if (!resource?.remoteUrl) { + return throwError(() => new Error("Invalid resource URL")); + } + + this.logService.info(`[PhishingDataService] Starting FULL update using ${resource.remoteUrl}`); + return from(this.apiService.nativeFetch(new Request(resource.remoteUrl))).pipe( + switchMap((response) => { + if (!response.ok || !response.body) { + return throwError( + () => + new Error( + `[PhishingDataService] Full fetch failed: ${response.status}, ${response.statusText}`, + ), ); } - } - } + + return from(this.indexedDbService.saveUrlsFromStream(response.body)); + }), + catchError((err: unknown) => { + this.logService.error( + `[PhishingDataService] Full dataset update failed using primary source ${err}`, + ); + this.logService.warning( + `[PhishingDataService] Falling back to: ${resource.fallbackUrl} (Note: Fallback data may be less up-to-date)`, + ); + // Try fallback URL + return from(this.apiService.nativeFetch(new Request(resource.fallbackUrl))).pipe( + switchMap((fallbackResponse) => { + if (!fallbackResponse.ok || !fallbackResponse.body) { + return throwError( + () => + new Error( + `[PhishingDataService] Fallback fetch failed: ${fallbackResponse.status}, ${fallbackResponse.statusText}`, + ), + ); + } + + return from(this.indexedDbService.saveUrlsFromStream(fallbackResponse.body)); + }), + catchError((fallbackError: unknown) => { + this.logService.error(`[PhishingDataService] Fallback source failed`); + return throwError(() => fallbackError); + }), + ); + }), + ); } - // Sets up the load pipeline to load the blob into memory when triggered - private _setupLoadPipeline(): void { - this._loadTrigger$ - .pipe( - switchMap(() => - this._phishingBlobState.state$.pipe( - first(), - switchMap((blobBase64) => { - if (!blobBase64) { - return of(undefined); - } - // Note: _decompressString wraps a promise that cannot be aborted - // If performance improvements are needed, consider migrating to a cancellable approach - return from(this._decompressString(blobBase64)).pipe( - map((text) => { - const lines = text.split(/\r?\n/); - const newWebAddressesSet = new Set(lines); - this._testWebAddresses.forEach((a) => newWebAddressesSet.add(a)); - this._webAddressesSet = new Set(newWebAddressesSet); - this.logService.info( - `[PhishingDataService] loaded ${this._webAddressesSet.size} addresses into memory from blob`, - ); - }), + private _updateDailyDataSet() { + this.logService.info("[PhishingDataService] Starting DAILY update..."); + + const todayUrl = getPhishingResources(this.resourceType)?.todayUrl; + if (!todayUrl) { + return throwError(() => new Error("Today URL missing")); + } + + return from(this.fetchToday(todayUrl)).pipe( + switchMap((lines) => from(this.indexedDbService.addUrls(lines))), + ); + } + + private _backgroundUpdate( + previous: PhishingDataMeta | null, + ): Observable { + // Use defer to restart timer if retry is activated + return defer(() => { + const startTime = Date.now(); + this.logService.info(`[PhishingDataService] Update triggered...`); + + // Get updated meta info + return this._getUpdatedMeta().pipe( + // Update full data set if application version or checksum changed + concatMap((newMeta) => + iif( + () => { + const appVersionChanged = newMeta.applicationVersion !== previous?.applicationVersion; + const checksumChanged = newMeta.checksum !== previous?.checksum; + + this.logService.info( + `[PhishingDataService] Checking if full update is needed: appVersionChanged=${appVersionChanged}, checksumChanged=${checksumChanged}`, ); - }), - catchError((err: unknown) => { - this.logService.error("[PhishingDataService] Failed to load blob into memory", err); - return of(undefined); - }), + return appVersionChanged || checksumChanged; + }, + this._updateFullDataSet().pipe(map(() => ({ meta: newMeta, updated: true }))), + of({ meta: newMeta, updated: false }), ), ), - catchError((err: unknown) => { - this.logService.error("[PhishingDataService] Load pipeline failed", err); - return of(undefined); + // Update daily data set if last update was more than UPDATE_INTERVAL_DURATION ago + concatMap((result) => + iif( + () => { + const isCacheExpired = + Date.now() - (previous?.timestamp ?? 0) > this.UPDATE_INTERVAL_DURATION; + return isCacheExpired; + }, + this._updateDailyDataSet().pipe(map(() => ({ meta: result.meta, updated: true }))), + of(result), + ), + ), + concatMap((result) => { + if (!result.updated) { + this.logService.debug(`[PhishingDataService] No update needed, metadata unchanged`); + return of(previous); + } + + this.logService.debug(`[PhishingDataService] Updated phishing meta data:`, result.meta); + return from(this._phishingMetaState.update(() => result.meta)).pipe( + tap(() => { + const elapsed = Date.now() - startTime; + this.logService.info(`[PhishingDataService] Updated data set in ${elapsed}ms`); + }), + ); }), - takeUntil(this._destroy$), - share(), - ) - .subscribe(); - } - - // [FIXME] Move compression helpers to a shared utils library - // to separate from phishing data service. - // ------------------------- Blob and Compression Handling ------------------------- - private async _compressString(input: string): Promise { - try { - const stream = new Blob([input]).stream().pipeThrough(new CompressionStream("gzip")); - - const compressedBuffer = await new Response(stream).arrayBuffer(); - const bytes = new Uint8Array(compressedBuffer); - - // Modern browsers support direct toBase64 conversion - // For older support, use fallback - return (bytes as any).toBase64 - ? (bytes as any).toBase64() - : this._uint8ToBase64Fallback(bytes); - } catch (err) { - this.logService.error("[PhishingDataService] Compression failed", err); - return btoa(encodeURIComponent(input)); - } - } - - private async _decompressString(base64: string): Promise { - try { - // Modern browsers support direct toBase64 conversion - // For older support, use fallback - const bytes = (Uint8Array as any).fromBase64 - ? (Uint8Array as any).fromBase64(base64) - : this._base64ToUint8Fallback(base64); - if (bytes == null) { - throw new Error("Base64 decoding resulted in null"); - } - const byteResponse = new Response(bytes); - if (!byteResponse.body) { - throw new Error("Response body is null"); - } - const stream = byteResponse.body.pipeThrough(new DecompressionStream("gzip")); - const streamResponse = new Response(stream); - return await streamResponse.text(); - } catch (err) { - this.logService.error("[PhishingDataService] Decompression failed", err); - return decodeURIComponent(atob(base64)); - } - } - - // Trigger a load of the blob into memory - private _loadBlobToMemory(): void { - this._loadTrigger$.next(); - } - private _uint8ToBase64Fallback(bytes: Uint8Array): string { - const CHUNK_SIZE = 0x8000; // 32KB chunks - let binary = ""; - for (let i = 0; i < bytes.length; i += CHUNK_SIZE) { - const chunk = bytes.subarray(i, i + CHUNK_SIZE); - binary += String.fromCharCode.apply(null, chunk as any); - } - return btoa(binary); - } - - private _base64ToUint8Fallback(base64: string): Uint8Array { - const binary = atob(base64); - return Uint8Array.from(binary, (c) => c.charCodeAt(0)); + retry({ + count: 2, // Total 3 attempts (initial + 2 retries) + delay: (error, retryCount) => { + this.logService.error( + `[PhishingDataService] Attempt ${retryCount} failed. Retrying in 5m...`, + error, + ); + return timer(5 * 60 * 1000); // Wait 5 mins before next attempt + }, + }), + catchError((err: unknown) => { + const elapsed = Date.now() - startTime; + this.logService.error( + `[PhishingDataService] Retries unsuccessful after ${elapsed}ms.`, + err, + ); + return of(previous); + }), + ); + }); } } diff --git a/apps/browser/src/dirt/phishing-detection/services/phishing-detection.service.ts b/apps/browser/src/dirt/phishing-detection/services/phishing-detection.service.ts index 815007e1d4c..6ca5bad8942 100644 --- a/apps/browser/src/dirt/phishing-detection/services/phishing-detection.service.ts +++ b/apps/browser/src/dirt/phishing-detection/services/phishing-detection.service.ts @@ -1,10 +1,10 @@ import { - concatMap, distinctUntilChanged, EMPTY, filter, map, merge, + mergeMap, Subject, switchMap, tap, @@ -43,6 +43,7 @@ export class PhishingDetectionService { private static _tabUpdated$ = new Subject(); private static _ignoredHostnames = new Set(); private static _didInit = false; + private static _activeSearchCount = 0; static initialize( logService: LogService, @@ -63,7 +64,7 @@ export class PhishingDetectionService { tap((message) => logService.debug(`[PhishingDetectionService] user selected continue for ${message.url}`), ), - concatMap(async (message) => { + mergeMap(async (message) => { const url = new URL(message.url); this._ignoredHostnames.add(url.hostname); await BrowserApi.navigateTabToUrl(message.tabId, url); @@ -88,23 +89,40 @@ export class PhishingDetectionService { prev.ignored === curr.ignored, ), tap((event) => logService.debug(`[PhishingDetectionService] processing event:`, event)), - concatMap(async ({ tabId, url, ignored }) => { - if (ignored) { - // The next time this host is visited, block again - this._ignoredHostnames.delete(url.hostname); - return; - } - const isPhishing = await phishingDataService.isPhishingWebAddress(url); - if (!isPhishing) { - return; - } - - const phishingWarningPage = new URL( - BrowserApi.getRuntimeURL("popup/index.html#/security/phishing-warning") + - `?phishingUrl=${url.toString()}`, + // Use mergeMap for parallel processing - each tab check runs independently + // Concurrency limit of 5 prevents overwhelming IndexedDB + mergeMap(async ({ tabId, url, ignored }) => { + this._activeSearchCount++; + const searchId = `${tabId}-${Date.now()}`; + logService.debug( + `[PhishingDetectionService] Search STARTED [${searchId}] for ${url.href} (active: ${this._activeSearchCount}/5)`, ); - await BrowserApi.navigateTabToUrl(tabId, phishingWarningPage); - }), + const startTime = performance.now(); + + try { + if (ignored) { + // The next time this host is visited, block again + this._ignoredHostnames.delete(url.hostname); + return; + } + const isPhishing = await phishingDataService.isPhishingWebAddress(url); + if (!isPhishing) { + return; + } + + const phishingWarningPage = new URL( + BrowserApi.getRuntimeURL("popup/index.html#/security/phishing-warning") + + `?phishingUrl=${url.toString()}`, + ); + await BrowserApi.navigateTabToUrl(tabId, phishingWarningPage); + } finally { + this._activeSearchCount--; + const duration = (performance.now() - startTime).toFixed(2); + logService.debug( + `[PhishingDetectionService] Search FINISHED [${searchId}] for ${url.href} in ${duration}ms (active: ${this._activeSearchCount}/5)`, + ); + } + }, 5), ); const onCancelCommand$ = messageListener diff --git a/apps/browser/src/dirt/phishing-detection/services/phishing-indexeddb.service.spec.ts b/apps/browser/src/dirt/phishing-detection/services/phishing-indexeddb.service.spec.ts index 75bd634b1fc..05aeced16fc 100644 --- a/apps/browser/src/dirt/phishing-detection/services/phishing-indexeddb.service.spec.ts +++ b/apps/browser/src/dirt/phishing-detection/services/phishing-indexeddb.service.spec.ts @@ -355,6 +355,89 @@ describe("PhishingIndexedDbService", () => { }); }); + describe("findMatchingUrl", () => { + it("returns true when matcher finds a match", async () => { + mockStore.set("https://example.com", { url: "https://example.com" }); + mockStore.set("https://phishing.net", { url: "https://phishing.net" }); + mockStore.set("https://test.org", { url: "https://test.org" }); + + const matcher = (url: string) => url.includes("phishing"); + const result = await service.findMatchingUrl(matcher); + + expect(result).toBe(true); + expect(mockDb.transaction).toHaveBeenCalledWith("phishing-urls", "readonly"); + expect(mockObjectStore.openCursor).toHaveBeenCalled(); + }); + + it("returns false when no URLs match", async () => { + mockStore.set("https://example.com", { url: "https://example.com" }); + mockStore.set("https://test.org", { url: "https://test.org" }); + + const matcher = (url: string) => url.includes("notfound"); + const result = await service.findMatchingUrl(matcher); + + expect(result).toBe(false); + }); + + it("returns false when store is empty", async () => { + const matcher = (url: string) => url.includes("anything"); + const result = await service.findMatchingUrl(matcher); + + expect(result).toBe(false); + }); + + it("exits early on first match without iterating all records", async () => { + mockStore.set("https://match1.com", { url: "https://match1.com" }); + mockStore.set("https://match2.com", { url: "https://match2.com" }); + mockStore.set("https://match3.com", { url: "https://match3.com" }); + + const matcherCallCount = jest + .fn() + .mockImplementation((url: string) => url.includes("match2")); + await service.findMatchingUrl(matcherCallCount); + + // Matcher should be called for match1.com and match2.com, but NOT match3.com + // because it exits early on first match + expect(matcherCallCount).toHaveBeenCalledWith("https://match1.com"); + expect(matcherCallCount).toHaveBeenCalledWith("https://match2.com"); + expect(matcherCallCount).not.toHaveBeenCalledWith("https://match3.com"); + expect(matcherCallCount).toHaveBeenCalledTimes(2); + }); + + it("supports complex matcher logic", async () => { + mockStore.set("https://example.com/path", { url: "https://example.com/path" }); + mockStore.set("https://test.org", { url: "https://test.org" }); + mockStore.set("https://phishing.net/login", { url: "https://phishing.net/login" }); + + const matcher = (url: string) => { + return url.includes("phishing") && url.includes("login"); + }; + const result = await service.findMatchingUrl(matcher); + + expect(result).toBe(true); + }); + + it("returns false on error", async () => { + const error = new Error("IndexedDB error"); + mockOpenRequest.error = error; + (global.indexedDB.open as jest.Mock).mockImplementation(() => { + setTimeout(() => { + mockOpenRequest.onerror?.(); + }, 0); + return mockOpenRequest; + }); + + const matcher = (url: string) => url.includes("test"); + const result = await service.findMatchingUrl(matcher); + + expect(result).toBe(false); + expect(logService.error).toHaveBeenCalledWith( + "[PhishingIndexedDbService] Cursor search failed", + expect.any(Error), + ); + }); + }); + describe("database initialization", () => { it("creates object store with keyPath on upgrade", async () => { mockDb.objectStoreNames.contains.mockReturnValue(false); diff --git a/apps/browser/src/dirt/phishing-detection/services/phishing-indexeddb.service.ts b/apps/browser/src/dirt/phishing-detection/services/phishing-indexeddb.service.ts index 099839a38d9..b94b4047faf 100644 --- a/apps/browser/src/dirt/phishing-detection/services/phishing-indexeddb.service.ts +++ b/apps/browser/src/dirt/phishing-detection/services/phishing-indexeddb.service.ts @@ -165,6 +165,60 @@ export class PhishingIndexedDbService { }); } + /** + * Checks if any URL in the database matches the given matcher function. + * Uses a cursor to iterate through records without loading all into memory. + * Returns immediately on first match for optimal performance. + * + * @param matcher - Function that tests each URL and returns true if it matches + * @returns `true` if any URL matches, `false` if none match or on error + */ + async findMatchingUrl(matcher: (url: string) => boolean): Promise { + this.logService.debug("[PhishingIndexedDbService] Searching for matching URL with cursor..."); + + let db: IDBDatabase | null = null; + try { + db = await this.openDatabase(); + return await this.cursorSearch(db, matcher); + } catch (error) { + this.logService.error("[PhishingIndexedDbService] Cursor search failed", error); + return false; + } finally { + db?.close(); + } + } + + /** + * Performs cursor-based search through all URLs. + * Tests each URL with the matcher without accumulating records in memory. + */ + private cursorSearch(db: IDBDatabase, matcher: (url: string) => boolean): Promise { + return new Promise((resolve, reject) => { + const req = db + .transaction(this.STORE_NAME, "readonly") + .objectStore(this.STORE_NAME) + .openCursor(); + req.onerror = () => reject(req.error); + req.onsuccess = (e) => { + const cursor = (e.target as IDBRequest).result; + if (cursor) { + const url = (cursor.value as PhishingUrlRecord).url; + // Test the URL immediately without accumulating in memory + if (matcher(url)) { + // Found a match + resolve(true); + return; + } + // No match, continue to next record + cursor.continue(); + } else { + // Reached end of records without finding a match + resolve(false); + } + }; + }); + } + /** * Saves phishing URLs directly from a stream. * Processes data incrementally to minimize memory usage.