diff --git a/apps/browser/src/dirt/phishing-detection/phishing-resources.ts b/apps/browser/src/dirt/phishing-detection/phishing-resources.ts index 4cd155c8ae3..c8dd083e6a6 100644 --- a/apps/browser/src/dirt/phishing-detection/phishing-resources.ts +++ b/apps/browser/src/dirt/phishing-detection/phishing-resources.ts @@ -5,6 +5,8 @@ export type PhishingResource = { todayUrl: string; /** Matcher used to decide whether a given URL matches an entry from this resource */ match: (url: URL, entry: string) => boolean; + /** Whether to use the custom matcher. If false, only exact hasUrl lookups are used. Default: true */ + useCustomMatcher?: boolean; }; export const PhishingResourceType = Object.freeze({ @@ -50,6 +52,8 @@ export const PHISHING_RESOURCES: Record { if (!entry) { return false; diff --git a/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.spec.ts b/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.spec.ts index c277b8d33f8..2d6c7a5a651 100644 --- a/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.spec.ts +++ b/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.spec.ts @@ -1,4 +1,5 @@ import { MockProxy, mock } from "jest-mock-extended"; +import { firstValueFrom } from "rxjs"; import { ApiService } from "@bitwarden/common/abstractions/api.service"; import { PlatformUtilsService } from "@bitwarden/common/platform/abstractions/platform-utils.service"; @@ -9,66 +10,8 @@ import { import { FakeGlobalStateProvider } from "@bitwarden/common/spec"; import { LogService } from "@bitwarden/logging"; -import { - PhishingDataService, - PHISHING_DOMAINS_META_KEY, - PHISHING_DOMAINS_BLOB_KEY, - PhishingDataMeta, - PhishingDataBlob, -} from "./phishing-data.service"; - -const flushPromises = () => - new Promise((resolve) => jest.requireActual("timers").setImmediate(resolve)); - -// [FIXME] Move mocking and compression helpers to a shared test utils library -// to separate from phishing data service tests. -export const setupPhishingMocks = (mockedResult: string | ArrayBuffer = "mocked-data") => { - // Store original globals - const originals = { - Response: global.Response, - CompressionStream: global.CompressionStream, - DecompressionStream: global.DecompressionStream, - Blob: global.Blob, - atob: global.atob, - btoa: global.btoa, - }; - - // Mock missing or browser-only globals - global.atob = (str) => Buffer.from(str, "base64").toString("binary"); - global.btoa = (str) => Buffer.from(str, "binary").toString("base64"); - - (global as any).CompressionStream = class {}; - (global as any).DecompressionStream = class {}; - - global.Blob = class { - constructor(public parts: any[]) {} - stream() { - return { pipeThrough: () => ({}) }; - } - } as any; - - global.Response = class { - body = { pipeThrough: () => ({}) }; - // Return string for decompression - text() { - return Promise.resolve(typeof mockedResult === "string" ? mockedResult : ""); - } - // Return ArrayBuffer for compression - arrayBuffer() { - if (typeof mockedResult === "string") { - const bytes = new TextEncoder().encode(mockedResult); - return Promise.resolve(bytes.buffer); - } - - return Promise.resolve(mockedResult); - } - } as any; - - // Cleanup function - return () => { - Object.assign(global, originals); - }; -}; +import { PHISHING_DOMAINS_META_KEY, PhishingDataService } from "./phishing-data.service"; +import type { PhishingIndexedDbService } from "./phishing-indexeddb.service"; describe("PhishingDataService", () => { let service: PhishingDataService; @@ -76,33 +19,31 @@ describe("PhishingDataService", () => { let taskSchedulerService: TaskSchedulerService; let logService: MockProxy; let platformUtilsService: MockProxy; + let mockIndexedDbService: MockProxy; const fakeGlobalStateProvider: FakeGlobalStateProvider = new FakeGlobalStateProvider(); - - const setMockMeta = (state: PhishingDataMeta) => { - fakeGlobalStateProvider.getFake(PHISHING_DOMAINS_META_KEY).stateSubject.next(state); - return state; - }; - const setMockBlob = (state: PhishingDataBlob) => { - fakeGlobalStateProvider.getFake(PHISHING_DOMAINS_BLOB_KEY).stateSubject.next(state); - return state; - }; - let fetchChecksumSpy: jest.SpyInstance; - let fetchAndCompressSpy: jest.SpyInstance; - - const mockMeta: PhishingDataMeta = { - checksum: "abc", - timestamp: Date.now(), - applicationVersion: "1.0.0", - }; - const mockBlob = "http://phish.com\nhttps://badguy.net"; - const mockCompressedBlob = - "H4sIAAAAAAAA/8vMTSzJzM9TSE7MLchJLElVyE9TyC9KSS1S0FFIz8hLz0ksSQUAtK7XMSYAAAA="; beforeEach(async () => { - jest.useFakeTimers(); + jest.clearAllMocks(); + + // Mock Request global if not available + if (typeof Request === "undefined") { + (global as any).Request = class { + constructor(public url: string) {} + }; + } + apiService = mock(); logService = mock(); + mockIndexedDbService = mock(); + + // Set default mock behaviors + mockIndexedDbService.hasUrl.mockResolvedValue(false); + mockIndexedDbService.loadAllUrls.mockResolvedValue([]); + mockIndexedDbService.findMatchingUrl.mockResolvedValue(false); + mockIndexedDbService.saveUrls.mockResolvedValue(undefined); + mockIndexedDbService.addUrls.mockResolvedValue(undefined); + mockIndexedDbService.saveUrlsFromStream.mockResolvedValue(undefined); platformUtilsService = mock(); platformUtilsService.getApplicationVersion.mockResolvedValue("1.0.0"); @@ -116,217 +57,308 @@ describe("PhishingDataService", () => { logService, platformUtilsService, ); - fetchChecksumSpy = jest.spyOn(service as any, "fetchPhishingChecksum"); - fetchAndCompressSpy = jest.spyOn(service as any, "fetchAndCompress"); + // Replace the IndexedDB service with our mock + service["indexedDbService"] = mockIndexedDbService; + + fetchChecksumSpy = jest.spyOn(service as any, "fetchPhishingChecksum"); fetchChecksumSpy.mockResolvedValue("new-checksum"); - fetchAndCompressSpy.mockResolvedValue("compressed-blob"); }); describe("initialization", () => { - beforeEach(() => { - jest.spyOn(service as any, "_compressString").mockResolvedValue(mockCompressedBlob); - jest.spyOn(service as any, "_decompressString").mockResolvedValue(mockBlob); + it("should initialize with IndexedDB service", () => { + expect(service["indexedDbService"]).toBeDefined(); }); - it("should perform background update", async () => { - platformUtilsService.getApplicationVersion.mockResolvedValue("1.0.x"); - jest - .spyOn(service as any, "getNextWebAddresses") - .mockResolvedValue({ meta: mockMeta, blob: mockBlob }); - - setMockBlob(mockBlob); - setMockMeta(mockMeta); - - const sub = service.update$.subscribe(); - await flushPromises(); - - const url = new URL("http://phish.com"); - const QAurl = new URL("http://phishing.testcategory.com"); + it("should detect QA test addresses - http protocol", async () => { + const url = new URL("http://phishing.testcategory.com"); expect(await service.isPhishingWebAddress(url)).toBe(true); - expect(await service.isPhishingWebAddress(QAurl)).toBe(true); + // IndexedDB should not be called for test addresses + expect(mockIndexedDbService.hasUrl).not.toHaveBeenCalled(); + }); - sub.unsubscribe(); + it("should detect QA test addresses - https protocol", async () => { + const url = new URL("https://phishing.testcategory.com"); + expect(await service.isPhishingWebAddress(url)).toBe(true); + expect(mockIndexedDbService.hasUrl).not.toHaveBeenCalled(); + }); + + it("should detect QA test addresses - specific subpath /block", async () => { + const url = new URL("https://phishing.testcategory.com/block"); + expect(await service.isPhishingWebAddress(url)).toBe(true); + expect(mockIndexedDbService.hasUrl).not.toHaveBeenCalled(); + }); + + it("should NOT detect QA test addresses - different subpath", async () => { + mockIndexedDbService.hasUrl.mockResolvedValue(false); + mockIndexedDbService.findMatchingUrl.mockResolvedValue(false); + + const url = new URL("https://phishing.testcategory.com/other"); + const result = await service.isPhishingWebAddress(url); + + // This should NOT be detected as a test address since only /block subpath is hardcoded + expect(result).toBe(false); + }); + + it("should detect QA test addresses - root path with trailing slash", async () => { + const url = new URL("https://phishing.testcategory.com/"); + const result = await service.isPhishingWebAddress(url); + + // This SHOULD be detected since URLs are normalized (trailing slash added to root URLs) + expect(result).toBe(true); + expect(mockIndexedDbService.hasUrl).not.toHaveBeenCalled(); }); }); describe("isPhishingWebAddress", () => { - beforeEach(() => { - jest.spyOn(service as any, "_compressString").mockResolvedValue(mockCompressedBlob); - jest.spyOn(service as any, "_decompressString").mockResolvedValue(mockBlob); - }); + it("should detect a phishing web address using quick hasUrl lookup", async () => { + // Mock hasUrl to return true for direct hostname match + mockIndexedDbService.hasUrl.mockResolvedValue(true); - it("should detect a phishing web address", async () => { - service["_webAddressesSet"] = new Set(["phish.com", "badguy.net"]); - - const url = new URL("http://phish.com"); + const url = new URL("http://phish.com/testing-param"); const result = await service.isPhishingWebAddress(url); expect(result).toBe(true); + expect(mockIndexedDbService.hasUrl).toHaveBeenCalledWith("http://phish.com/testing-param"); + // Should not fall back to custom matcher when hasUrl returns true + expect(mockIndexedDbService.findMatchingUrl).not.toHaveBeenCalled(); + }); + + it("should return false when hasUrl returns false (custom matcher disabled)", async () => { + // Mock hasUrl to return false (no direct href match) + mockIndexedDbService.hasUrl.mockResolvedValue(false); + + const url = new URL("http://phish.com/path"); + const result = await service.isPhishingWebAddress(url); + + // Custom matcher is currently disabled (useCustomMatcher: false), so result is false + expect(result).toBe(false); + expect(mockIndexedDbService.hasUrl).toHaveBeenCalledWith("http://phish.com/path"); + // Custom matcher should NOT be called since it's disabled + expect(mockIndexedDbService.findMatchingUrl).not.toHaveBeenCalled(); }); it("should not detect a safe web address", async () => { - service["_webAddressesSet"] = new Set(["phish.com", "badguy.net"]); + // Mock hasUrl to return false + mockIndexedDbService.hasUrl.mockResolvedValue(false); + const url = new URL("http://safe.com"); const result = await service.isPhishingWebAddress(url); + expect(result).toBe(false); + expect(mockIndexedDbService.hasUrl).toHaveBeenCalledWith("http://safe.com/"); + // Custom matcher is disabled, so findMatchingUrl should NOT be called + expect(mockIndexedDbService.findMatchingUrl).not.toHaveBeenCalled(); }); - it("should match against root web address", async () => { - service["_webAddressesSet"] = new Set(["phish.com", "badguy.net"]); - const url = new URL("http://phish.com/about"); + it("should not match against root web address with subpaths (custom matcher disabled)", async () => { + // Mock hasUrl to return false (no direct href match) + mockIndexedDbService.hasUrl.mockResolvedValue(false); + + const url = new URL("http://phish.com/login/page"); const result = await service.isPhishingWebAddress(url); - expect(result).toBe(true); + + expect(result).toBe(false); + expect(mockIndexedDbService.hasUrl).toHaveBeenCalledWith("http://phish.com/login/page"); + // Custom matcher is disabled, so findMatchingUrl should NOT be called + expect(mockIndexedDbService.findMatchingUrl).not.toHaveBeenCalled(); }); - it("should not error on empty state", async () => { - service["_webAddressesSet"] = null; + it("should not match against root web address with different subpaths (custom matcher disabled)", async () => { + // Mock hasUrl to return false (no direct hostname match) + mockIndexedDbService.hasUrl.mockResolvedValue(false); + + const url = new URL("http://phish.com/login/page2"); + const result = await service.isPhishingWebAddress(url); + + expect(result).toBe(false); + expect(mockIndexedDbService.hasUrl).toHaveBeenCalledWith("http://phish.com/login/page2"); + // Custom matcher is disabled, so findMatchingUrl should NOT be called + expect(mockIndexedDbService.findMatchingUrl).not.toHaveBeenCalled(); + }); + + it("should handle IndexedDB errors gracefully", async () => { + // Mock hasUrl to throw error + mockIndexedDbService.hasUrl.mockRejectedValue(new Error("hasUrl error")); + const url = new URL("http://phish.com/about"); const result = await service.isPhishingWebAddress(url); + expect(result).toBe(false); + expect(logService.error).toHaveBeenCalledWith( + "[PhishingDataService] IndexedDB lookup via hasUrl failed", + expect.any(Error), + ); + // Custom matcher is disabled, so no custom matcher error is expected + expect(mockIndexedDbService.findMatchingUrl).not.toHaveBeenCalled(); }); }); - describe("getNextWebAddresses", () => { - beforeEach(() => { - jest.spyOn(service as any, "_compressString").mockResolvedValue(mockCompressedBlob); - jest.spyOn(service as any, "_decompressString").mockResolvedValue(mockBlob); + describe("data updates", () => { + it("should update full dataset via stream", async () => { + // Mock full dataset update + const mockResponse = { + ok: true, + body: {} as ReadableStream, + } as Response; + apiService.nativeFetch.mockResolvedValue(mockResponse); + + await firstValueFrom(service["_updateFullDataSet"]()); + + expect(mockIndexedDbService.saveUrlsFromStream).toHaveBeenCalled(); }); - it("refetches all web addresses if applicationVersion has changed", async () => { - const prev: PhishingDataMeta = { - timestamp: Date.now() - 60000, - checksum: "old", - applicationVersion: "1.0.0", - }; - fetchChecksumSpy.mockResolvedValue("new"); + it("should update daily dataset via addUrls", async () => { + // Mock daily update + const mockResponse = { + ok: true, + text: jest.fn().mockResolvedValue("newphish.com\nanotherbad.net"), + } as unknown as Response; + apiService.nativeFetch.mockResolvedValue(mockResponse); + + await firstValueFrom(service["_updateDailyDataSet"]()); + + expect(mockIndexedDbService.addUrls).toHaveBeenCalledWith(["newphish.com", "anotherbad.net"]); + }); + + it("should get updated meta information", async () => { + fetchChecksumSpy.mockResolvedValue("new-checksum"); platformUtilsService.getApplicationVersion.mockResolvedValue("2.0.0"); - const result = await service.getNextWebAddresses(prev); + const meta = await firstValueFrom(service["_getUpdatedMeta"]()); - expect(result!.blob).toBe("compressed-blob"); - expect(result!.meta!.checksum).toBe("new"); - expect(result!.meta!.applicationVersion).toBe("2.0.0"); - }); - - it("returns null when checksum matches and cache not expired", async () => { - const prev: PhishingDataMeta = { - timestamp: Date.now(), - checksum: "abc", - applicationVersion: "1.0.0", - }; - fetchChecksumSpy.mockResolvedValue("abc"); - const result = await service.getNextWebAddresses(prev); - expect(result).toBeNull(); - }); - - it("patches daily domains when cache is expired and checksum unchanged", async () => { - const prev: PhishingDataMeta = { - timestamp: 0, - checksum: "old", - applicationVersion: "1.0.0", - }; - const dailyLines = ["b.com", "c.com"]; - fetchChecksumSpy.mockResolvedValue("old"); - jest.spyOn(service as any, "fetchText").mockResolvedValue(dailyLines); - - setMockBlob(mockBlob); - - const expectedBlob = - "H4sIAAAAAAAA/8vMTSzJzM9TSE7MLchJLElVyE9TyC9KSS1S0FFIz8hLz0ksSQUAtK7XMSYAAAA="; - const result = await service.getNextWebAddresses(prev); - - expect(result!.blob).toBe(expectedBlob); - expect(result!.meta!.checksum).toBe("old"); - }); - - it("fetches all domains when checksum has changed", async () => { - const prev: PhishingDataMeta = { - timestamp: 0, - checksum: "old", - applicationVersion: "1.0.0", - }; - fetchChecksumSpy.mockResolvedValue("new"); - fetchAndCompressSpy.mockResolvedValue("new-blob"); - const result = await service.getNextWebAddresses(prev); - expect(result!.blob).toBe("new-blob"); - expect(result!.meta!.checksum).toBe("new"); + expect(meta).toBeDefined(); + expect(meta.checksum).toBe("new-checksum"); + expect(meta.applicationVersion).toBe("2.0.0"); + expect(meta.timestamp).toBeDefined(); }); }); - describe("compression helpers", () => { - let restore: () => void; + describe("phishing meta data updates", () => { + it("should not update metadata when no data updates occur", async () => { + // Set up existing metadata + const existingMeta = { + checksum: "existing-checksum", + timestamp: Date.now() - 1000, // 1 second ago (not expired) + applicationVersion: "1.0.0", + }; + await fakeGlobalStateProvider.get(PHISHING_DOMAINS_META_KEY).update(() => existingMeta); - beforeEach(async () => { - restore = setupPhishingMocks("abc"); + // Mock conditions where no update is needed + fetchChecksumSpy.mockResolvedValue("existing-checksum"); // Same checksum + platformUtilsService.getApplicationVersion.mockResolvedValue("1.0.0"); // Same version + const mockResponse = { + ok: true, + body: {} as ReadableStream, + } as Response; + apiService.nativeFetch.mockResolvedValue(mockResponse); + + // Trigger background update + const result = await firstValueFrom(service["_backgroundUpdate"](existingMeta)); + + // Verify metadata was NOT updated (same reference returned) + expect(result).toEqual(existingMeta); + expect(result?.timestamp).toBe(existingMeta.timestamp); + + // Verify no data updates were performed + expect(mockIndexedDbService.saveUrlsFromStream).not.toHaveBeenCalled(); + expect(mockIndexedDbService.addUrls).not.toHaveBeenCalled(); }); - afterEach(() => { - if (restore) { - restore(); - } - delete (Uint8Array as any).fromBase64; - jest.restoreAllMocks(); + it("should update metadata when full dataset update occurs due to checksum change", async () => { + // Set up existing metadata + const existingMeta = { + checksum: "old-checksum", + timestamp: Date.now() - 1000, + applicationVersion: "1.0.0", + }; + await fakeGlobalStateProvider.get(PHISHING_DOMAINS_META_KEY).update(() => existingMeta); + + // Mock conditions for full update + fetchChecksumSpy.mockResolvedValue("new-checksum"); // Different checksum + platformUtilsService.getApplicationVersion.mockResolvedValue("1.0.0"); + const mockResponse = { + ok: true, + body: {} as ReadableStream, + } as Response; + apiService.nativeFetch.mockResolvedValue(mockResponse); + + // Trigger background update + const result = await firstValueFrom(service["_backgroundUpdate"](existingMeta)); + + // Verify metadata WAS updated with new values + expect(result?.checksum).toBe("new-checksum"); + expect(result?.timestamp).toBeGreaterThan(existingMeta.timestamp); + + // Verify full update was performed + expect(mockIndexedDbService.saveUrlsFromStream).toHaveBeenCalled(); + expect(mockIndexedDbService.addUrls).not.toHaveBeenCalled(); // Daily should not run }); - describe("_compressString", () => { - it("compresses a string to base64", async () => { - const out = await service["_compressString"]("abc"); - expect(out).toBe("YWJj"); // base64 for 'abc' - }); + it("should update metadata when full dataset update occurs due to version change", async () => { + // Set up existing metadata + const existingMeta = { + checksum: "same-checksum", + timestamp: Date.now() - 1000, + applicationVersion: "1.0.0", + }; + await fakeGlobalStateProvider.get(PHISHING_DOMAINS_META_KEY).update(() => existingMeta); - it("compresses using fallback on older browsers", async () => { - const input = "abc"; - const expected = btoa(encodeURIComponent(input)); - const out = await service["_compressString"](input); - expect(out).toBe(expected); - }); + // Mock conditions for full update + fetchChecksumSpy.mockResolvedValue("same-checksum"); + platformUtilsService.getApplicationVersion.mockResolvedValue("2.0.0"); // Different version + const mockResponse = { + ok: true, + body: {} as ReadableStream, + } as Response; + apiService.nativeFetch.mockResolvedValue(mockResponse); - it("compresses using btoa on error", async () => { - const input = "abc"; - const expected = btoa(encodeURIComponent(input)); - const out = await service["_compressString"](input); - expect(out).toBe(expected); - }); + // Trigger background update + const result = await firstValueFrom(service["_backgroundUpdate"](existingMeta)); + + // Verify metadata WAS updated + expect(result?.applicationVersion).toBe("2.0.0"); + expect(result?.timestamp).toBeGreaterThan(existingMeta.timestamp); + + // Verify full update was performed + expect(mockIndexedDbService.saveUrlsFromStream).toHaveBeenCalled(); + expect(mockIndexedDbService.addUrls).not.toHaveBeenCalled(); }); - describe("_decompressString", () => { - it("decompresses a string from base64", async () => { - const base64 = btoa("ignored"); - const out = await service["_decompressString"](base64); - expect(out).toBe("abc"); - }); - it("decompresses using fallback on older browsers", async () => { - // Provide a fromBase64 implementation - (Uint8Array as any).fromBase64 = (b64: string) => new Uint8Array([100, 101, 102]); + it("should update metadata when daily update occurs due to cache expiration", async () => { + // Set up existing metadata (expired cache) + const existingMeta = { + checksum: "same-checksum", + timestamp: Date.now() - 25 * 60 * 60 * 1000, // 25 hours ago (expired) + applicationVersion: "1.0.0", + }; + await fakeGlobalStateProvider.get(PHISHING_DOMAINS_META_KEY).update(() => existingMeta); - const out = await service["_decompressString"]("ignored"); - expect(out).toBe("abc"); - }); + // Mock conditions for daily update only + fetchChecksumSpy.mockResolvedValue("same-checksum"); // Same checksum (no full update) + platformUtilsService.getApplicationVersion.mockResolvedValue("1.0.0"); // Same version + const mockFullResponse = { + ok: true, + body: {} as ReadableStream, + } as Response; + const mockDailyResponse = { + ok: true, + text: jest.fn().mockResolvedValue("newdomain.com"), + } as unknown as Response; + apiService.nativeFetch + .mockResolvedValueOnce(mockFullResponse) + .mockResolvedValueOnce(mockDailyResponse); - it("decompresses using atob on error", async () => { - const base64 = btoa(encodeURIComponent("abc")); - const out = await service["_decompressString"](base64); - expect(out).toBe("abc"); - }); - }); - }); + // Trigger background update + const result = await firstValueFrom(service["_backgroundUpdate"](existingMeta)); - describe("_loadBlobToMemory", () => { - it("loads blob into memory set", async () => { - const prevBlob = "ignored-base64"; - fakeGlobalStateProvider.getFake(PHISHING_DOMAINS_BLOB_KEY).stateSubject.next(prevBlob); + // Verify metadata WAS updated + expect(result?.timestamp).toBeGreaterThan(existingMeta.timestamp); + expect(result?.checksum).toBe("same-checksum"); - jest.spyOn(service as any, "_decompressString").mockResolvedValue("phish.com\nbadguy.net"); - - // Trigger the load pipeline and allow async RxJS processing to complete - service["_loadBlobToMemory"](); - await flushPromises(); - - const set = service["_webAddressesSet"] as Set; - expect(set).toBeDefined(); - expect(set.has("phish.com")).toBe(true); - expect(set.has("badguy.net")).toBe(true); + // Verify only daily update was performed + expect(mockIndexedDbService.saveUrlsFromStream).not.toHaveBeenCalled(); + expect(mockIndexedDbService.addUrls).toHaveBeenCalledWith(["newdomain.com"]); }); }); }); diff --git a/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.ts b/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.ts index 7d5f04cc276..c34a94ecced 100644 --- a/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.ts +++ b/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.ts @@ -1,17 +1,25 @@ import { catchError, + concatMap, + defer, EMPTY, + exhaustMap, first, - firstValueFrom, + forkJoin, from, + iif, + map, + Observable, of, + retry, share, takeUntil, startWith, Subject, switchMap, tap, - map, + throwError, + timer, } from "rxjs"; import { devFlagEnabled, devFlagValue } from "@bitwarden/browser/platform/flags"; @@ -23,6 +31,8 @@ import { GlobalStateProvider, KeyDefinition, PHISHING_DETECTION_DISK } from "@bi import { getPhishingResources, PhishingResourceType } from "../phishing-resources"; +import { PhishingIndexedDbService } from "./phishing-indexeddb.service"; + /** * Metadata about the phishing data set */ @@ -73,19 +83,16 @@ export class PhishingDataService { // We are adding the destroy to guard against accidental leaks. private _destroy$ = new Subject(); - private _testWebAddresses = this.getTestWebAddresses().concat("phishing.testcategory.com"); // Included for QA to test in prod + private _testWebAddresses = this.getTestWebAddresses(); private _phishingMetaState = this.globalStateProvider.get(PHISHING_DOMAINS_META_KEY); - private _phishingBlobState = this.globalStateProvider.get(PHISHING_DOMAINS_BLOB_KEY); - // In-memory set loaded from blob for fast lookups without reading large storage repeatedly - private _webAddressesSet: Set | null = null; - // Loading variables for web addresses set - // Triggers a load for _webAddressesSet - private _loadTrigger$ = new Subject(); + private indexedDbService: PhishingIndexedDbService; // How often are new web addresses added to the remote? readonly UPDATE_INTERVAL_DURATION = 24 * 60 * 60 * 1000; // 24 hours + private _backgroundUpdateTrigger$ = new Subject(); + private _triggerUpdate$ = new Subject(); update$ = this._triggerUpdate$.pipe( startWith(undefined), // Always emit once @@ -93,12 +100,8 @@ export class PhishingDataService { this._phishingMetaState.state$.pipe( first(), // Only take the first value to avoid an infinite loop when updating the cache below tap((metaState) => { - // Initial loading of web addresses set if not already loaded - if (!this._webAddressesSet) { - this._loadBlobToMemory(); - } - // Perform any updates in the background if needed - void this._backgroundUpdate(metaState); + // Perform any updates in the background + this._backgroundUpdateTrigger$.next(metaState); }), catchError((err: unknown) => { this.logService.error("[PhishingDataService] Background update failed to start.", err); @@ -106,7 +109,6 @@ export class PhishingDataService { }), ), ), - // Stop emitting when dispose() is called takeUntil(this._destroy$), share(), ); @@ -120,6 +122,7 @@ export class PhishingDataService { private resourceType: PhishingResourceType = PhishingResourceType.Links, ) { this.logService.debug("[PhishingDataService] Initializing service..."); + this.indexedDbService = new PhishingIndexedDbService(this.logService); this.taskSchedulerService.registerTaskHandler(ScheduledTaskNames.phishingDomainUpdate, () => { this._triggerUpdate$.next(); }); @@ -127,18 +130,20 @@ export class PhishingDataService { ScheduledTaskNames.phishingDomainUpdate, this.UPDATE_INTERVAL_DURATION, ); - this._setupLoadPipeline(); + this._backgroundUpdateTrigger$ + .pipe( + exhaustMap((currentMeta) => { + return this._backgroundUpdate(currentMeta); + }), + takeUntil(this._destroy$), + ) + .subscribe(); } dispose(): void { // Signal all pipelines to stop and unsubscribe stored subscriptions this._destroy$.next(); this._destroy$.complete(); - - // Clear web addresses set from memory - if (this._webAddressesSet !== null) { - this._webAddressesSet = null; - } } /** @@ -148,105 +153,120 @@ export class PhishingDataService { * @returns True if the URL is a known phishing web address, false otherwise */ async isPhishingWebAddress(url: URL): Promise { - if (!this._webAddressesSet) { - this.logService.debug("[PhishingDataService] Set not loaded; skipping check"); + this.logService.debug("[PhishingDataService] isPhishingWebAddress called for: " + url.href); + + // Skip non-http(s) protocols - phishing database only contains web URLs + // This prevents expensive fallback checks for chrome://, about:, file://, etc. + if (url.protocol !== "http:" && url.protocol !== "https:") { + this.logService.debug("[PhishingDataService] Skipping non-http(s) protocol: " + url.protocol); return false; } - const set = this._webAddressesSet!; + // Quick check for QA/dev test addresses + if (this._testWebAddresses.includes(url.href)) { + this.logService.info("[PhishingDataService] Found test web address: " + url.href); + return true; + } + const resource = getPhishingResources(this.resourceType); - // Custom matcher per resource - if (resource && resource?.match) { - for (const entry of set) { - if (resource.match(url, entry)) { - return true; - } + try { + // Quick lookup: check direct presence of href in IndexedDB + // Also check without trailing slash since browsers add it but DB entries may not have it + const urlHref = url.href; + const urlWithoutTrailingSlash = urlHref.endsWith("/") ? urlHref.slice(0, -1) : null; + + this.logService.debug("[PhishingDataService] Checking hasUrl on this string: " + urlHref); + let hasUrl = await this.indexedDbService.hasUrl(urlHref); + + // If not found and URL has trailing slash, try without it + if (!hasUrl && urlWithoutTrailingSlash) { + this.logService.debug( + "[PhishingDataService] Checking hasUrl without trailing slash: " + + urlWithoutTrailingSlash, + ); + hasUrl = await this.indexedDbService.hasUrl(urlWithoutTrailingSlash); } - return false; + + if (hasUrl) { + this.logService.info( + "[PhishingDataService] Found phishing web address through direct lookup: " + urlHref, + ); + return true; + } + } catch (err) { + this.logService.error("[PhishingDataService] IndexedDB lookup via hasUrl failed", err); } - // Default set-based lookup - return set.has(url.hostname); - } - - async getNextWebAddresses( - previous: PhishingDataMeta | null, - ): Promise | null> { - const prevMeta = previous ?? { timestamp: 0, checksum: "", applicationVersion: "" }; - const now = Date.now(); - - // Updates to check - const applicationVersion = await this.platformUtilsService.getApplicationVersion(); - const remoteChecksum = await this.fetchPhishingChecksum(this.resourceType); - - // Logic checks - const appVersionChanged = applicationVersion !== prevMeta.applicationVersion; - const masterChecksumChanged = remoteChecksum !== prevMeta.checksum; - - // Check for full updated - if (masterChecksumChanged || appVersionChanged) { - this.logService.info("[PhishingDataService] Checksum or version changed; Fetching ALL."); - const remoteUrl = getPhishingResources(this.resourceType)!.remoteUrl; - const blob = await this.fetchAndCompress(remoteUrl); - return { - blob, - meta: { checksum: remoteChecksum, timestamp: now, applicationVersion }, - }; - } - - // Check for daily file - const isCacheExpired = now - prevMeta.timestamp > this.UPDATE_INTERVAL_DURATION; - - if (isCacheExpired) { - this.logService.info("[PhishingDataService] Daily cache expired; Fetching TODAY's"); - const url = getPhishingResources(this.resourceType)!.todayUrl; - const newLines = await this.fetchText(url); - const prevBlob = (await firstValueFrom(this._phishingBlobState.state$)) ?? ""; - const oldText = prevBlob ? await this._decompressString(prevBlob) : ""; - - // Join the new lines to the existing list - const combined = (oldText ? oldText + "\n" : "") + newLines.join("\n"); - - return { - blob: await this._compressString(combined), - meta: { - checksum: remoteChecksum, - timestamp: now, // Reset the timestamp - applicationVersion, - }, - }; - } - - return null; + // If a custom matcher is provided and enabled, use cursor-based search. + // This avoids loading all URLs into memory and allows early exit on first match. + // Can be disabled via useCustomMatcher: false for performance reasons. + if (resource && resource.match && resource.useCustomMatcher !== false) { + try { + this.logService.debug( + "[PhishingDataService] Starting cursor-based search for: " + url.href, + ); + const startTime = performance.now(); + + const found = await this.indexedDbService.findMatchingUrl((entry) => + resource.match(url, entry), + ); + + const endTime = performance.now(); + const duration = (endTime - startTime).toFixed(2); + this.logService.debug( + `[PhishingDataService] Cursor-based search completed in ${duration}ms for: ${url.href} (found: ${found})`, + ); + + if (found) { + this.logService.info( + "[PhishingDataService] Found phishing web address through custom matcher: " + url.href, + ); + } else { + this.logService.debug( + "[PhishingDataService] No match found, returning false for: " + url.href, + ); + } + return found; + } catch (err) { + this.logService.error("[PhishingDataService] Error running custom matcher", err); + this.logService.debug( + "[PhishingDataService] Returning false due to error for: " + url.href, + ); + return false; + } + } + this.logService.debug( + "[PhishingDataService] No custom matcher, returning false for: " + url.href, + ); + return false; } + // [FIXME] Pull fetches into api service private async fetchPhishingChecksum(type: PhishingResourceType = PhishingResourceType.Domains) { const checksumUrl = getPhishingResources(type)!.checksumUrl; - const response = await this.apiService.nativeFetch(new Request(checksumUrl)); - if (!response.ok) { - throw new Error(`[PhishingDataService] Failed to fetch checksum: ${response.status}`); - } - return response.text(); - } - private async fetchAndCompress(url: string): Promise { - const response = await this.apiService.nativeFetch(new Request(url)); - if (!response.ok) { - throw new Error("Fetch failed"); - } + this.logService.debug(`[PhishingDataService] Fetching checksum from: ${checksumUrl}`); - const downloadStream = response.body!; - // Pipe through CompressionStream while it's downloading - const compressedStream = downloadStream.pipeThrough(new CompressionStream("gzip")); - // Convert to ArrayBuffer - const buffer = await new Response(compressedStream).arrayBuffer(); - const bytes = new Uint8Array(buffer); + try { + const response = await this.apiService.nativeFetch(new Request(checksumUrl)); + if (!response.ok) { + throw new Error( + `[PhishingDataService] Failed to fetch checksum: ${response.status} ${response.statusText}`, + ); + } - // Return as Base64 for storage - return (bytes as any).toBase64 ? (bytes as any).toBase64() : this._uint8ToBase64Fallback(bytes); + return await response.text(); + } catch (error) { + this.logService.error( + `[PhishingDataService] Checksum fetch failed from ${checksumUrl}`, + error, + ); + throw error; + } } - private async fetchText(url: string) { + // [FIXME] Pull fetches into api service + private async fetchToday(url: string) { const response = await this.apiService.nativeFetch(new Request(url)); if (!response.ok) { @@ -258,171 +278,196 @@ export class PhishingDataService { private getTestWebAddresses() { const flag = devFlagEnabled("testPhishingUrls"); + // Normalize URLs by converting to URL object and back to ensure consistent format (e.g., trailing slashes) + const testWebAddresses: string[] = [ + new URL("http://phishing.testcategory.com").href, + new URL("https://phishing.testcategory.com").href, + new URL("https://phishing.testcategory.com/block").href, + ]; if (!flag) { - return []; + return testWebAddresses; } const webAddresses = devFlagValue("testPhishingUrls") as unknown[]; if (webAddresses && webAddresses instanceof Array) { this.logService.debug( - "[PhishingDetectionService] Dev flag enabled for testing phishing detection. Adding test phishing web addresses:", + "[PhishingDataService] Dev flag enabled for testing phishing detection. Adding test phishing web addresses:", webAddresses, ); - return webAddresses as string[]; + // Normalize dev flag URLs as well, filtering out invalid ones + const normalizedDevAddresses = (webAddresses as string[]) + .filter((addr) => { + try { + new URL(addr); + return true; + } catch { + this.logService.warning( + `[PhishingDataService] Invalid test URL in dev flag, skipping: ${addr}`, + ); + return false; + } + }) + .map((addr) => new URL(addr).href); + return testWebAddresses.concat(normalizedDevAddresses); } - return []; + return testWebAddresses; } - // Runs the update flow in the background and retries up to 3 times on failure - private async _backgroundUpdate(previous: PhishingDataMeta | null): Promise { - this.logService.info(`[PhishingDataService] Update web addresses triggered...`); - const phishingMeta: PhishingDataMeta = previous ?? { - timestamp: 0, - checksum: "", - applicationVersion: "", - }; - // Start time for logging performance of update - const startTime = Date.now(); - const maxAttempts = 3; - const delayMs = 5 * 60 * 1000; // 5 minutes + private _getUpdatedMeta(): Observable { + return defer(() => { + const now = Date.now(); - for (let attempt = 1; attempt <= maxAttempts; attempt++) { - try { - const next = await this.getNextWebAddresses(phishingMeta); - if (!next) { - return; // No update needed - } + return forkJoin({ + applicationVersion: from(this.platformUtilsService.getApplicationVersion()), + remoteChecksum: from(this.fetchPhishingChecksum(this.resourceType)), + }).pipe( + map(({ applicationVersion, remoteChecksum }) => { + return { + checksum: remoteChecksum, + timestamp: now, + applicationVersion, + }; + }), + ); + }); + } - if (next.meta) { - await this._phishingMetaState.update(() => next!.meta!); - } - if (next.blob) { - await this._phishingBlobState.update(() => next!.blob!); - this._loadBlobToMemory(); - } + // Streams the full phishing data set and saves it to IndexedDB + private _updateFullDataSet() { + const resource = getPhishingResources(this.resourceType); - // Performance logging - const elapsed = Date.now() - startTime; - this.logService.info(`[PhishingDataService] Phishing data cache updated in ${elapsed}ms`); - } catch (err) { - this.logService.error( - `[PhishingDataService] Unable to update web addresses. Attempt ${attempt}.`, - err, - ); - if (attempt < maxAttempts) { - await new Promise((res) => setTimeout(res, delayMs)); - } else { - const elapsed = Date.now() - startTime; - this.logService.error( - `[PhishingDataService] Retries unsuccessful after ${elapsed}ms. Unable to update web addresses.`, - err, + if (!resource?.remoteUrl) { + return throwError(() => new Error("Invalid resource URL")); + } + + this.logService.info(`[PhishingDataService] Starting FULL update using ${resource.remoteUrl}`); + return from(this.apiService.nativeFetch(new Request(resource.remoteUrl))).pipe( + switchMap((response) => { + if (!response.ok || !response.body) { + return throwError( + () => + new Error( + `[PhishingDataService] Full fetch failed: ${response.status}, ${response.statusText}`, + ), ); } - } - } + + return from(this.indexedDbService.saveUrlsFromStream(response.body)); + }), + catchError((err: unknown) => { + this.logService.error( + `[PhishingDataService] Full dataset update failed using primary source ${err}`, + ); + this.logService.warning( + `[PhishingDataService] Falling back to: ${resource.fallbackUrl} (Note: Fallback data may be less up-to-date)`, + ); + // Try fallback URL + return from(this.apiService.nativeFetch(new Request(resource.fallbackUrl))).pipe( + switchMap((fallbackResponse) => { + if (!fallbackResponse.ok || !fallbackResponse.body) { + return throwError( + () => + new Error( + `[PhishingDataService] Fallback fetch failed: ${fallbackResponse.status}, ${fallbackResponse.statusText}`, + ), + ); + } + + return from(this.indexedDbService.saveUrlsFromStream(fallbackResponse.body)); + }), + catchError((fallbackError: unknown) => { + this.logService.error(`[PhishingDataService] Fallback source failed`); + return throwError(() => fallbackError); + }), + ); + }), + ); } - // Sets up the load pipeline to load the blob into memory when triggered - private _setupLoadPipeline(): void { - this._loadTrigger$ - .pipe( - switchMap(() => - this._phishingBlobState.state$.pipe( - first(), - switchMap((blobBase64) => { - if (!blobBase64) { - return of(undefined); - } - // Note: _decompressString wraps a promise that cannot be aborted - // If performance improvements are needed, consider migrating to a cancellable approach - return from(this._decompressString(blobBase64)).pipe( - map((text) => { - const lines = text.split(/\r?\n/); - const newWebAddressesSet = new Set(lines); - this._testWebAddresses.forEach((a) => newWebAddressesSet.add(a)); - this._webAddressesSet = new Set(newWebAddressesSet); - this.logService.info( - `[PhishingDataService] loaded ${this._webAddressesSet.size} addresses into memory from blob`, - ); - }), + private _updateDailyDataSet() { + this.logService.info("[PhishingDataService] Starting DAILY update..."); + + const todayUrl = getPhishingResources(this.resourceType)?.todayUrl; + if (!todayUrl) { + return throwError(() => new Error("Today URL missing")); + } + + return from(this.fetchToday(todayUrl)).pipe( + switchMap((lines) => from(this.indexedDbService.addUrls(lines))), + ); + } + + private _backgroundUpdate( + previous: PhishingDataMeta | null, + ): Observable { + // Use defer to restart timer if retry is activated + return defer(() => { + const startTime = Date.now(); + this.logService.info(`[PhishingDataService] Update triggered...`); + + // Get updated meta info + return this._getUpdatedMeta().pipe( + // Update full data set if application version or checksum changed + concatMap((newMeta) => + iif( + () => { + const appVersionChanged = newMeta.applicationVersion !== previous?.applicationVersion; + const checksumChanged = newMeta.checksum !== previous?.checksum; + + this.logService.info( + `[PhishingDataService] Checking if full update is needed: appVersionChanged=${appVersionChanged}, checksumChanged=${checksumChanged}`, ); - }), - catchError((err: unknown) => { - this.logService.error("[PhishingDataService] Failed to load blob into memory", err); - return of(undefined); - }), + return appVersionChanged || checksumChanged; + }, + this._updateFullDataSet().pipe(map(() => ({ meta: newMeta, updated: true }))), + of({ meta: newMeta, updated: false }), ), ), - catchError((err: unknown) => { - this.logService.error("[PhishingDataService] Load pipeline failed", err); - return of(undefined); + // Update daily data set if last update was more than UPDATE_INTERVAL_DURATION ago + concatMap((result) => + iif( + () => { + const isCacheExpired = + Date.now() - (previous?.timestamp ?? 0) > this.UPDATE_INTERVAL_DURATION; + return isCacheExpired; + }, + this._updateDailyDataSet().pipe(map(() => ({ meta: result.meta, updated: true }))), + of(result), + ), + ), + concatMap((result) => { + if (!result.updated) { + this.logService.debug(`[PhishingDataService] No update needed, metadata unchanged`); + return of(previous); + } + + this.logService.debug(`[PhishingDataService] Updated phishing meta data:`, result.meta); + return from(this._phishingMetaState.update(() => result.meta)).pipe( + tap(() => { + const elapsed = Date.now() - startTime; + this.logService.info(`[PhishingDataService] Updated data set in ${elapsed}ms`); + }), + ); }), - takeUntil(this._destroy$), - share(), - ) - .subscribe(); - } - - // [FIXME] Move compression helpers to a shared utils library - // to separate from phishing data service. - // ------------------------- Blob and Compression Handling ------------------------- - private async _compressString(input: string): Promise { - try { - const stream = new Blob([input]).stream().pipeThrough(new CompressionStream("gzip")); - - const compressedBuffer = await new Response(stream).arrayBuffer(); - const bytes = new Uint8Array(compressedBuffer); - - // Modern browsers support direct toBase64 conversion - // For older support, use fallback - return (bytes as any).toBase64 - ? (bytes as any).toBase64() - : this._uint8ToBase64Fallback(bytes); - } catch (err) { - this.logService.error("[PhishingDataService] Compression failed", err); - return btoa(encodeURIComponent(input)); - } - } - - private async _decompressString(base64: string): Promise { - try { - // Modern browsers support direct toBase64 conversion - // For older support, use fallback - const bytes = (Uint8Array as any).fromBase64 - ? (Uint8Array as any).fromBase64(base64) - : this._base64ToUint8Fallback(base64); - if (bytes == null) { - throw new Error("Base64 decoding resulted in null"); - } - const byteResponse = new Response(bytes); - if (!byteResponse.body) { - throw new Error("Response body is null"); - } - const stream = byteResponse.body.pipeThrough(new DecompressionStream("gzip")); - const streamResponse = new Response(stream); - return await streamResponse.text(); - } catch (err) { - this.logService.error("[PhishingDataService] Decompression failed", err); - return decodeURIComponent(atob(base64)); - } - } - - // Trigger a load of the blob into memory - private _loadBlobToMemory(): void { - this._loadTrigger$.next(); - } - private _uint8ToBase64Fallback(bytes: Uint8Array): string { - const CHUNK_SIZE = 0x8000; // 32KB chunks - let binary = ""; - for (let i = 0; i < bytes.length; i += CHUNK_SIZE) { - const chunk = bytes.subarray(i, i + CHUNK_SIZE); - binary += String.fromCharCode.apply(null, chunk as any); - } - return btoa(binary); - } - - private _base64ToUint8Fallback(base64: string): Uint8Array { - const binary = atob(base64); - return Uint8Array.from(binary, (c) => c.charCodeAt(0)); + retry({ + count: 2, // Total 3 attempts (initial + 2 retries) + delay: (error, retryCount) => { + this.logService.error( + `[PhishingDataService] Attempt ${retryCount} failed. Retrying in 5m...`, + error, + ); + return timer(5 * 60 * 1000); // Wait 5 mins before next attempt + }, + }), + catchError((err: unknown) => { + const elapsed = Date.now() - startTime; + this.logService.error( + `[PhishingDataService] Retries unsuccessful after ${elapsed}ms.`, + err, + ); + return of(previous); + }), + ); + }); } } diff --git a/apps/browser/src/dirt/phishing-detection/services/phishing-detection.service.ts b/apps/browser/src/dirt/phishing-detection/services/phishing-detection.service.ts index 815007e1d4c..6ca5bad8942 100644 --- a/apps/browser/src/dirt/phishing-detection/services/phishing-detection.service.ts +++ b/apps/browser/src/dirt/phishing-detection/services/phishing-detection.service.ts @@ -1,10 +1,10 @@ import { - concatMap, distinctUntilChanged, EMPTY, filter, map, merge, + mergeMap, Subject, switchMap, tap, @@ -43,6 +43,7 @@ export class PhishingDetectionService { private static _tabUpdated$ = new Subject(); private static _ignoredHostnames = new Set(); private static _didInit = false; + private static _activeSearchCount = 0; static initialize( logService: LogService, @@ -63,7 +64,7 @@ export class PhishingDetectionService { tap((message) => logService.debug(`[PhishingDetectionService] user selected continue for ${message.url}`), ), - concatMap(async (message) => { + mergeMap(async (message) => { const url = new URL(message.url); this._ignoredHostnames.add(url.hostname); await BrowserApi.navigateTabToUrl(message.tabId, url); @@ -88,23 +89,40 @@ export class PhishingDetectionService { prev.ignored === curr.ignored, ), tap((event) => logService.debug(`[PhishingDetectionService] processing event:`, event)), - concatMap(async ({ tabId, url, ignored }) => { - if (ignored) { - // The next time this host is visited, block again - this._ignoredHostnames.delete(url.hostname); - return; - } - const isPhishing = await phishingDataService.isPhishingWebAddress(url); - if (!isPhishing) { - return; - } - - const phishingWarningPage = new URL( - BrowserApi.getRuntimeURL("popup/index.html#/security/phishing-warning") + - `?phishingUrl=${url.toString()}`, + // Use mergeMap for parallel processing - each tab check runs independently + // Concurrency limit of 5 prevents overwhelming IndexedDB + mergeMap(async ({ tabId, url, ignored }) => { + this._activeSearchCount++; + const searchId = `${tabId}-${Date.now()}`; + logService.debug( + `[PhishingDetectionService] Search STARTED [${searchId}] for ${url.href} (active: ${this._activeSearchCount}/5)`, ); - await BrowserApi.navigateTabToUrl(tabId, phishingWarningPage); - }), + const startTime = performance.now(); + + try { + if (ignored) { + // The next time this host is visited, block again + this._ignoredHostnames.delete(url.hostname); + return; + } + const isPhishing = await phishingDataService.isPhishingWebAddress(url); + if (!isPhishing) { + return; + } + + const phishingWarningPage = new URL( + BrowserApi.getRuntimeURL("popup/index.html#/security/phishing-warning") + + `?phishingUrl=${url.toString()}`, + ); + await BrowserApi.navigateTabToUrl(tabId, phishingWarningPage); + } finally { + this._activeSearchCount--; + const duration = (performance.now() - startTime).toFixed(2); + logService.debug( + `[PhishingDetectionService] Search FINISHED [${searchId}] for ${url.href} in ${duration}ms (active: ${this._activeSearchCount}/5)`, + ); + } + }, 5), ); const onCancelCommand$ = messageListener diff --git a/apps/browser/src/dirt/phishing-detection/services/phishing-indexeddb.service.spec.ts b/apps/browser/src/dirt/phishing-detection/services/phishing-indexeddb.service.spec.ts index 75bd634b1fc..05aeced16fc 100644 --- a/apps/browser/src/dirt/phishing-detection/services/phishing-indexeddb.service.spec.ts +++ b/apps/browser/src/dirt/phishing-detection/services/phishing-indexeddb.service.spec.ts @@ -355,6 +355,89 @@ describe("PhishingIndexedDbService", () => { }); }); + describe("findMatchingUrl", () => { + it("returns true when matcher finds a match", async () => { + mockStore.set("https://example.com", { url: "https://example.com" }); + mockStore.set("https://phishing.net", { url: "https://phishing.net" }); + mockStore.set("https://test.org", { url: "https://test.org" }); + + const matcher = (url: string) => url.includes("phishing"); + const result = await service.findMatchingUrl(matcher); + + expect(result).toBe(true); + expect(mockDb.transaction).toHaveBeenCalledWith("phishing-urls", "readonly"); + expect(mockObjectStore.openCursor).toHaveBeenCalled(); + }); + + it("returns false when no URLs match", async () => { + mockStore.set("https://example.com", { url: "https://example.com" }); + mockStore.set("https://test.org", { url: "https://test.org" }); + + const matcher = (url: string) => url.includes("notfound"); + const result = await service.findMatchingUrl(matcher); + + expect(result).toBe(false); + }); + + it("returns false when store is empty", async () => { + const matcher = (url: string) => url.includes("anything"); + const result = await service.findMatchingUrl(matcher); + + expect(result).toBe(false); + }); + + it("exits early on first match without iterating all records", async () => { + mockStore.set("https://match1.com", { url: "https://match1.com" }); + mockStore.set("https://match2.com", { url: "https://match2.com" }); + mockStore.set("https://match3.com", { url: "https://match3.com" }); + + const matcherCallCount = jest + .fn() + .mockImplementation((url: string) => url.includes("match2")); + await service.findMatchingUrl(matcherCallCount); + + // Matcher should be called for match1.com and match2.com, but NOT match3.com + // because it exits early on first match + expect(matcherCallCount).toHaveBeenCalledWith("https://match1.com"); + expect(matcherCallCount).toHaveBeenCalledWith("https://match2.com"); + expect(matcherCallCount).not.toHaveBeenCalledWith("https://match3.com"); + expect(matcherCallCount).toHaveBeenCalledTimes(2); + }); + + it("supports complex matcher logic", async () => { + mockStore.set("https://example.com/path", { url: "https://example.com/path" }); + mockStore.set("https://test.org", { url: "https://test.org" }); + mockStore.set("https://phishing.net/login", { url: "https://phishing.net/login" }); + + const matcher = (url: string) => { + return url.includes("phishing") && url.includes("login"); + }; + const result = await service.findMatchingUrl(matcher); + + expect(result).toBe(true); + }); + + it("returns false on error", async () => { + const error = new Error("IndexedDB error"); + mockOpenRequest.error = error; + (global.indexedDB.open as jest.Mock).mockImplementation(() => { + setTimeout(() => { + mockOpenRequest.onerror?.(); + }, 0); + return mockOpenRequest; + }); + + const matcher = (url: string) => url.includes("test"); + const result = await service.findMatchingUrl(matcher); + + expect(result).toBe(false); + expect(logService.error).toHaveBeenCalledWith( + "[PhishingIndexedDbService] Cursor search failed", + expect.any(Error), + ); + }); + }); + describe("database initialization", () => { it("creates object store with keyPath on upgrade", async () => { mockDb.objectStoreNames.contains.mockReturnValue(false); diff --git a/apps/browser/src/dirt/phishing-detection/services/phishing-indexeddb.service.ts b/apps/browser/src/dirt/phishing-detection/services/phishing-indexeddb.service.ts index 099839a38d9..b94b4047faf 100644 --- a/apps/browser/src/dirt/phishing-detection/services/phishing-indexeddb.service.ts +++ b/apps/browser/src/dirt/phishing-detection/services/phishing-indexeddb.service.ts @@ -165,6 +165,60 @@ export class PhishingIndexedDbService { }); } + /** + * Checks if any URL in the database matches the given matcher function. + * Uses a cursor to iterate through records without loading all into memory. + * Returns immediately on first match for optimal performance. + * + * @param matcher - Function that tests each URL and returns true if it matches + * @returns `true` if any URL matches, `false` if none match or on error + */ + async findMatchingUrl(matcher: (url: string) => boolean): Promise { + this.logService.debug("[PhishingIndexedDbService] Searching for matching URL with cursor..."); + + let db: IDBDatabase | null = null; + try { + db = await this.openDatabase(); + return await this.cursorSearch(db, matcher); + } catch (error) { + this.logService.error("[PhishingIndexedDbService] Cursor search failed", error); + return false; + } finally { + db?.close(); + } + } + + /** + * Performs cursor-based search through all URLs. + * Tests each URL with the matcher without accumulating records in memory. + */ + private cursorSearch(db: IDBDatabase, matcher: (url: string) => boolean): Promise { + return new Promise((resolve, reject) => { + const req = db + .transaction(this.STORE_NAME, "readonly") + .objectStore(this.STORE_NAME) + .openCursor(); + req.onerror = () => reject(req.error); + req.onsuccess = (e) => { + const cursor = (e.target as IDBRequest).result; + if (cursor) { + const url = (cursor.value as PhishingUrlRecord).url; + // Test the URL immediately without accumulating in memory + if (matcher(url)) { + // Found a match + resolve(true); + return; + } + // No match, continue to next record + cursor.continue(); + } else { + // Reached end of records without finding a match + resolve(false); + } + }; + }); + } + /** * Saves phishing URLs directly from a stream. * Processes data incrementally to minimize memory usage.