diff --git a/apps/browser/src/dirt/phishing-detection/phishing-resources.ts b/apps/browser/src/dirt/phishing-detection/phishing-resources.ts index 4cd155c8ae3..88068987dd7 100644 --- a/apps/browser/src/dirt/phishing-detection/phishing-resources.ts +++ b/apps/browser/src/dirt/phishing-detection/phishing-resources.ts @@ -1,6 +1,8 @@ export type PhishingResource = { name?: string; remoteUrl: string; + /** Fallback URL to use if remoteUrl fails (e.g., due to SSL interception/cert issues) */ + fallbackUrl: string; checksumUrl: string; todayUrl: string; /** Matcher used to decide whether a given URL matches an entry from this resource */ @@ -19,6 +21,8 @@ export const PHISHING_RESOURCES: Record - new Promise((resolve) => jest.requireActual("timers").setImmediate(resolve)); - -// [FIXME] Move mocking and compression helpers to a shared test utils library -// to separate from phishing data service tests. -export const setupPhishingMocks = (mockedResult: string | ArrayBuffer = "mocked-data") => { - // Store original globals - const originals = { - Response: global.Response, - CompressionStream: global.CompressionStream, - DecompressionStream: global.DecompressionStream, - Blob: global.Blob, - atob: global.atob, - btoa: global.btoa, - }; - - // Mock missing or browser-only globals - global.atob = (str) => Buffer.from(str, "base64").toString("binary"); - global.btoa = (str) => Buffer.from(str, "binary").toString("base64"); - - (global as any).CompressionStream = class {}; - (global as any).DecompressionStream = class {}; - - global.Blob = class { - constructor(public parts: any[]) {} - stream() { - return { pipeThrough: () => ({}) }; - } - } as any; - - global.Response = class { - body = { pipeThrough: () => ({}) }; - // Return string for decompression - text() { - return Promise.resolve(typeof mockedResult === "string" ? mockedResult : ""); - } - // Return ArrayBuffer for compression - arrayBuffer() { - if (typeof mockedResult === "string") { - const bytes = new TextEncoder().encode(mockedResult); - return Promise.resolve(bytes.buffer); - } - - return Promise.resolve(mockedResult); - } - } as any; - - // Cleanup function - return () => { - Object.assign(global, originals); - }; -}; +import { PHISHING_DOMAINS_META_KEY, PhishingDataService } from "./phishing-data.service"; +import type { PhishingIndexedDbService } from "./phishing-indexeddb.service"; describe("PhishingDataService", () => { let service: PhishingDataService; @@ -76,33 +19,30 @@ describe("PhishingDataService", () => { let taskSchedulerService: TaskSchedulerService; let logService: MockProxy; let platformUtilsService: MockProxy; + let mockIndexedDbService: MockProxy; const fakeGlobalStateProvider: FakeGlobalStateProvider = new FakeGlobalStateProvider(); - - const setMockMeta = (state: PhishingDataMeta) => { - fakeGlobalStateProvider.getFake(PHISHING_DOMAINS_META_KEY).stateSubject.next(state); - return state; - }; - const setMockBlob = (state: PhishingDataBlob) => { - fakeGlobalStateProvider.getFake(PHISHING_DOMAINS_BLOB_KEY).stateSubject.next(state); - return state; - }; - let fetchChecksumSpy: jest.SpyInstance; - let fetchAndCompressSpy: jest.SpyInstance; - - const mockMeta: PhishingDataMeta = { - checksum: "abc", - timestamp: Date.now(), - applicationVersion: "1.0.0", - }; - const mockBlob = "http://phish.com\nhttps://badguy.net"; - const mockCompressedBlob = - "H4sIAAAAAAAA/8vMTSzJzM9TSE7MLchJLElVyE9TyC9KSS1S0FFIz8hLz0ksSQUAtK7XMSYAAAA="; beforeEach(async () => { - jest.useFakeTimers(); + jest.clearAllMocks(); + + // Mock Request global if not available + if (typeof Request === "undefined") { + (global as any).Request = class { + constructor(public url: string) {} + }; + } + apiService = mock(); logService = mock(); + mockIndexedDbService = mock(); + + // Set default mock behaviors + mockIndexedDbService.hasUrl.mockResolvedValue(false); + mockIndexedDbService.loadAllUrls.mockResolvedValue([]); + mockIndexedDbService.saveUrls.mockResolvedValue(undefined); + mockIndexedDbService.addUrls.mockResolvedValue(undefined); + mockIndexedDbService.saveUrlsFromStream.mockResolvedValue(undefined); platformUtilsService = mock(); platformUtilsService.getApplicationVersion.mockResolvedValue("1.0.0"); @@ -116,217 +56,315 @@ describe("PhishingDataService", () => { logService, platformUtilsService, ); - fetchChecksumSpy = jest.spyOn(service as any, "fetchPhishingChecksum"); - fetchAndCompressSpy = jest.spyOn(service as any, "fetchAndCompress"); + // Replace the IndexedDB service with our mock + service["indexedDbService"] = mockIndexedDbService; + + fetchChecksumSpy = jest.spyOn(service as any, "fetchPhishingChecksum"); fetchChecksumSpy.mockResolvedValue("new-checksum"); - fetchAndCompressSpy.mockResolvedValue("compressed-blob"); }); describe("initialization", () => { - beforeEach(() => { - jest.spyOn(service as any, "_compressString").mockResolvedValue(mockCompressedBlob); - jest.spyOn(service as any, "_decompressString").mockResolvedValue(mockBlob); + it("should initialize with IndexedDB service", () => { + expect(service["indexedDbService"]).toBeDefined(); }); - it("should perform background update", async () => { - platformUtilsService.getApplicationVersion.mockResolvedValue("1.0.x"); - jest - .spyOn(service as any, "getNextWebAddresses") - .mockResolvedValue({ meta: mockMeta, blob: mockBlob }); - - setMockBlob(mockBlob); - setMockMeta(mockMeta); - - const sub = service.update$.subscribe(); - await flushPromises(); - - const url = new URL("http://phish.com"); - const QAurl = new URL("http://phishing.testcategory.com"); + it("should detect QA test addresses - http protocol", async () => { + const url = new URL("http://phishing.testcategory.com"); expect(await service.isPhishingWebAddress(url)).toBe(true); - expect(await service.isPhishingWebAddress(QAurl)).toBe(true); + // IndexedDB should not be called for test addresses + expect(mockIndexedDbService.hasUrl).not.toHaveBeenCalled(); + }); - sub.unsubscribe(); + it("should detect QA test addresses - https protocol", async () => { + const url = new URL("https://phishing.testcategory.com"); + expect(await service.isPhishingWebAddress(url)).toBe(true); + expect(mockIndexedDbService.hasUrl).not.toHaveBeenCalled(); + }); + + it("should detect QA test addresses - specific subpath /block", async () => { + const url = new URL("https://phishing.testcategory.com/block"); + expect(await service.isPhishingWebAddress(url)).toBe(true); + expect(mockIndexedDbService.hasUrl).not.toHaveBeenCalled(); + }); + + it("should NOT detect QA test addresses - different subpath", async () => { + mockIndexedDbService.hasUrl.mockResolvedValue(false); + mockIndexedDbService.loadAllUrls.mockResolvedValue([]); + + const url = new URL("https://phishing.testcategory.com/other"); + const result = await service.isPhishingWebAddress(url); + + // This should NOT be detected as a test address since only /block subpath is hardcoded + expect(result).toBe(false); + }); + + it("should detect QA test addresses - root path with trailing slash", async () => { + const url = new URL("https://phishing.testcategory.com/"); + const result = await service.isPhishingWebAddress(url); + + // This SHOULD be detected since URLs are normalized (trailing slash added to root URLs) + expect(result).toBe(true); + expect(mockIndexedDbService.hasUrl).not.toHaveBeenCalled(); }); }); describe("isPhishingWebAddress", () => { - beforeEach(() => { - jest.spyOn(service as any, "_compressString").mockResolvedValue(mockCompressedBlob); - jest.spyOn(service as any, "_decompressString").mockResolvedValue(mockBlob); - }); + it("should detect a phishing web address using quick hasUrl lookup", async () => { + // Mock hasUrl to return true for direct hostname match + mockIndexedDbService.hasUrl.mockResolvedValue(true); - it("should detect a phishing web address", async () => { - service["_webAddressesSet"] = new Set(["phish.com", "badguy.net"]); - - const url = new URL("http://phish.com"); + const url = new URL("http://phish.com/testing-param"); const result = await service.isPhishingWebAddress(url); expect(result).toBe(true); + expect(mockIndexedDbService.hasUrl).toHaveBeenCalledWith("http://phish.com/testing-param"); + // Should not fall back to custom matcher when hasUrl returns true + expect(mockIndexedDbService.loadAllUrls).not.toHaveBeenCalled(); + }); + + it("should fall back to custom matcher when hasUrl returns false", async () => { + // Mock hasUrl to return false (no direct href match) + mockIndexedDbService.hasUrl.mockResolvedValue(false); + // Mock loadAllUrls to return phishing URLs for custom matcher + mockIndexedDbService.loadAllUrls.mockResolvedValue(["http://phish.com/path"]); + + const url = new URL("http://phish.com/path"); + const result = await service.isPhishingWebAddress(url); + + expect(result).toBe(true); + expect(mockIndexedDbService.hasUrl).toHaveBeenCalledWith("http://phish.com/path"); + expect(mockIndexedDbService.loadAllUrls).toHaveBeenCalled(); }); it("should not detect a safe web address", async () => { - service["_webAddressesSet"] = new Set(["phish.com", "badguy.net"]); + // Mock hasUrl to return false + mockIndexedDbService.hasUrl.mockResolvedValue(false); + // Mock loadAllUrls to return phishing URLs that don't match + mockIndexedDbService.loadAllUrls.mockResolvedValue(["http://phish.com", "http://badguy.net"]); + const url = new URL("http://safe.com"); const result = await service.isPhishingWebAddress(url); + expect(result).toBe(false); + expect(mockIndexedDbService.hasUrl).toHaveBeenCalledWith("http://safe.com/"); + expect(mockIndexedDbService.loadAllUrls).toHaveBeenCalled(); }); - it("should match against root web address", async () => { - service["_webAddressesSet"] = new Set(["phish.com", "badguy.net"]); - const url = new URL("http://phish.com/about"); + it("should not match against root web address with subpaths using custom matcher", async () => { + // Mock hasUrl to return false (no direct href match) + mockIndexedDbService.hasUrl.mockResolvedValue(false); + // Mock loadAllUrls to return entry that matches with subpath + mockIndexedDbService.loadAllUrls.mockResolvedValue(["http://phish.com/login"]); + + const url = new URL("http://phish.com/login/page"); const result = await service.isPhishingWebAddress(url); - expect(result).toBe(true); + + expect(result).toBe(false); + expect(mockIndexedDbService.hasUrl).toHaveBeenCalledWith("http://phish.com/login/page"); + expect(mockIndexedDbService.loadAllUrls).toHaveBeenCalled(); }); - it("should not error on empty state", async () => { - service["_webAddressesSet"] = null; + it("should not match against root web address with different subpaths using custom matcher", async () => { + // Mock hasUrl to return false (no direct hostname match) + mockIndexedDbService.hasUrl.mockResolvedValue(false); + // Mock loadAllUrls to return entry that matches with subpath + mockIndexedDbService.loadAllUrls.mockResolvedValue(["http://phish.com/login/page1"]); + + const url = new URL("http://phish.com/login/page2"); + const result = await service.isPhishingWebAddress(url); + + expect(result).toBe(false); + expect(mockIndexedDbService.hasUrl).toHaveBeenCalledWith("http://phish.com/login/page2"); + expect(mockIndexedDbService.loadAllUrls).toHaveBeenCalled(); + }); + + it("should handle IndexedDB errors gracefully", async () => { + // Mock hasUrl to throw error + mockIndexedDbService.hasUrl.mockRejectedValue(new Error("hasUrl error")); + // Mock loadAllUrls to also throw error + mockIndexedDbService.loadAllUrls.mockRejectedValue(new Error("IndexedDB error")); + const url = new URL("http://phish.com/about"); const result = await service.isPhishingWebAddress(url); + expect(result).toBe(false); + expect(logService.error).toHaveBeenCalledWith( + "[PhishingDataService] IndexedDB lookup via hasUrl failed", + expect.any(Error), + ); + expect(logService.error).toHaveBeenCalledWith( + "[PhishingDataService] Error running custom matcher", + expect.any(Error), + ); }); }); - describe("getNextWebAddresses", () => { - beforeEach(() => { - jest.spyOn(service as any, "_compressString").mockResolvedValue(mockCompressedBlob); - jest.spyOn(service as any, "_decompressString").mockResolvedValue(mockBlob); + describe("data updates", () => { + it("should update full dataset via stream", async () => { + // Mock full dataset update + const mockResponse = { + ok: true, + body: {} as ReadableStream, + } as Response; + apiService.nativeFetch.mockResolvedValue(mockResponse); + + await firstValueFrom(service["_updateFullDataSet"]()); + + expect(mockIndexedDbService.saveUrlsFromStream).toHaveBeenCalled(); }); - it("refetches all web addresses if applicationVersion has changed", async () => { - const prev: PhishingDataMeta = { - timestamp: Date.now() - 60000, - checksum: "old", - applicationVersion: "1.0.0", - }; - fetchChecksumSpy.mockResolvedValue("new"); + it("should update daily dataset via addUrls", async () => { + // Mock daily update + const mockResponse = { + ok: true, + text: jest.fn().mockResolvedValue("newphish.com\nanotherbad.net"), + } as unknown as Response; + apiService.nativeFetch.mockResolvedValue(mockResponse); + + await firstValueFrom(service["_updateDailyDataSet"]()); + + expect(mockIndexedDbService.addUrls).toHaveBeenCalledWith(["newphish.com", "anotherbad.net"]); + }); + + it("should get updated meta information", async () => { + fetchChecksumSpy.mockResolvedValue("new-checksum"); platformUtilsService.getApplicationVersion.mockResolvedValue("2.0.0"); - const result = await service.getNextWebAddresses(prev); + const meta = await firstValueFrom(service["_getUpdatedMeta"]()); - expect(result!.blob).toBe("compressed-blob"); - expect(result!.meta!.checksum).toBe("new"); - expect(result!.meta!.applicationVersion).toBe("2.0.0"); - }); - - it("returns null when checksum matches and cache not expired", async () => { - const prev: PhishingDataMeta = { - timestamp: Date.now(), - checksum: "abc", - applicationVersion: "1.0.0", - }; - fetchChecksumSpy.mockResolvedValue("abc"); - const result = await service.getNextWebAddresses(prev); - expect(result).toBeNull(); - }); - - it("patches daily domains when cache is expired and checksum unchanged", async () => { - const prev: PhishingDataMeta = { - timestamp: 0, - checksum: "old", - applicationVersion: "1.0.0", - }; - const dailyLines = ["b.com", "c.com"]; - fetchChecksumSpy.mockResolvedValue("old"); - jest.spyOn(service as any, "fetchText").mockResolvedValue(dailyLines); - - setMockBlob(mockBlob); - - const expectedBlob = - "H4sIAAAAAAAA/8vMTSzJzM9TSE7MLchJLElVyE9TyC9KSS1S0FFIz8hLz0ksSQUAtK7XMSYAAAA="; - const result = await service.getNextWebAddresses(prev); - - expect(result!.blob).toBe(expectedBlob); - expect(result!.meta!.checksum).toBe("old"); - }); - - it("fetches all domains when checksum has changed", async () => { - const prev: PhishingDataMeta = { - timestamp: 0, - checksum: "old", - applicationVersion: "1.0.0", - }; - fetchChecksumSpy.mockResolvedValue("new"); - fetchAndCompressSpy.mockResolvedValue("new-blob"); - const result = await service.getNextWebAddresses(prev); - expect(result!.blob).toBe("new-blob"); - expect(result!.meta!.checksum).toBe("new"); + expect(meta).toBeDefined(); + expect(meta.checksum).toBe("new-checksum"); + expect(meta.applicationVersion).toBe("2.0.0"); + expect(meta.timestamp).toBeDefined(); }); }); - describe("compression helpers", () => { - let restore: () => void; + describe("phishing meta data updates", () => { + it("should not update metadata when no data updates occur", async () => { + // Set up existing metadata + const existingMeta = { + checksum: "existing-checksum", + timestamp: Date.now() - 1000, // 1 second ago (not expired) + applicationVersion: "1.0.0", + }; + await fakeGlobalStateProvider.get(PHISHING_DOMAINS_META_KEY).update(() => existingMeta); - beforeEach(async () => { - restore = setupPhishingMocks("abc"); + // Mock conditions where no update is needed + fetchChecksumSpy.mockResolvedValue("existing-checksum"); // Same checksum + platformUtilsService.getApplicationVersion.mockResolvedValue("1.0.0"); // Same version + const mockResponse = { + ok: true, + body: {} as ReadableStream, + } as Response; + apiService.nativeFetch.mockResolvedValue(mockResponse); + + // Trigger background update + const result = await firstValueFrom(service["_backgroundUpdate"](existingMeta)); + + // Verify metadata was NOT updated (same reference returned) + expect(result).toEqual(existingMeta); + expect(result?.timestamp).toBe(existingMeta.timestamp); + + // Verify no data updates were performed + expect(mockIndexedDbService.saveUrlsFromStream).not.toHaveBeenCalled(); + expect(mockIndexedDbService.addUrls).not.toHaveBeenCalled(); }); - afterEach(() => { - if (restore) { - restore(); - } - delete (Uint8Array as any).fromBase64; - jest.restoreAllMocks(); + it("should update metadata when full dataset update occurs due to checksum change", async () => { + // Set up existing metadata + const existingMeta = { + checksum: "old-checksum", + timestamp: Date.now() - 1000, + applicationVersion: "1.0.0", + }; + await fakeGlobalStateProvider.get(PHISHING_DOMAINS_META_KEY).update(() => existingMeta); + + // Mock conditions for full update + fetchChecksumSpy.mockResolvedValue("new-checksum"); // Different checksum + platformUtilsService.getApplicationVersion.mockResolvedValue("1.0.0"); + const mockResponse = { + ok: true, + body: {} as ReadableStream, + } as Response; + apiService.nativeFetch.mockResolvedValue(mockResponse); + + // Trigger background update + const result = await firstValueFrom(service["_backgroundUpdate"](existingMeta)); + + // Verify metadata WAS updated with new values + expect(result?.checksum).toBe("new-checksum"); + expect(result?.timestamp).toBeGreaterThan(existingMeta.timestamp); + + // Verify full update was performed + expect(mockIndexedDbService.saveUrlsFromStream).toHaveBeenCalled(); + expect(mockIndexedDbService.addUrls).not.toHaveBeenCalled(); // Daily should not run }); - describe("_compressString", () => { - it("compresses a string to base64", async () => { - const out = await service["_compressString"]("abc"); - expect(out).toBe("YWJj"); // base64 for 'abc' - }); + it("should update metadata when full dataset update occurs due to version change", async () => { + // Set up existing metadata + const existingMeta = { + checksum: "same-checksum", + timestamp: Date.now() - 1000, + applicationVersion: "1.0.0", + }; + await fakeGlobalStateProvider.get(PHISHING_DOMAINS_META_KEY).update(() => existingMeta); - it("compresses using fallback on older browsers", async () => { - const input = "abc"; - const expected = btoa(encodeURIComponent(input)); - const out = await service["_compressString"](input); - expect(out).toBe(expected); - }); + // Mock conditions for full update + fetchChecksumSpy.mockResolvedValue("same-checksum"); + platformUtilsService.getApplicationVersion.mockResolvedValue("2.0.0"); // Different version + const mockResponse = { + ok: true, + body: {} as ReadableStream, + } as Response; + apiService.nativeFetch.mockResolvedValue(mockResponse); - it("compresses using btoa on error", async () => { - const input = "abc"; - const expected = btoa(encodeURIComponent(input)); - const out = await service["_compressString"](input); - expect(out).toBe(expected); - }); + // Trigger background update + const result = await firstValueFrom(service["_backgroundUpdate"](existingMeta)); + + // Verify metadata WAS updated + expect(result?.applicationVersion).toBe("2.0.0"); + expect(result?.timestamp).toBeGreaterThan(existingMeta.timestamp); + + // Verify full update was performed + expect(mockIndexedDbService.saveUrlsFromStream).toHaveBeenCalled(); + expect(mockIndexedDbService.addUrls).not.toHaveBeenCalled(); }); - describe("_decompressString", () => { - it("decompresses a string from base64", async () => { - const base64 = btoa("ignored"); - const out = await service["_decompressString"](base64); - expect(out).toBe("abc"); - }); - it("decompresses using fallback on older browsers", async () => { - // Provide a fromBase64 implementation - (Uint8Array as any).fromBase64 = (b64: string) => new Uint8Array([100, 101, 102]); + it("should update metadata when daily update occurs due to cache expiration", async () => { + // Set up existing metadata (expired cache) + const existingMeta = { + checksum: "same-checksum", + timestamp: Date.now() - 25 * 60 * 60 * 1000, // 25 hours ago (expired) + applicationVersion: "1.0.0", + }; + await fakeGlobalStateProvider.get(PHISHING_DOMAINS_META_KEY).update(() => existingMeta); - const out = await service["_decompressString"]("ignored"); - expect(out).toBe("abc"); - }); + // Mock conditions for daily update only + fetchChecksumSpy.mockResolvedValue("same-checksum"); // Same checksum (no full update) + platformUtilsService.getApplicationVersion.mockResolvedValue("1.0.0"); // Same version + const mockFullResponse = { + ok: true, + body: {} as ReadableStream, + } as Response; + const mockDailyResponse = { + ok: true, + text: jest.fn().mockResolvedValue("newdomain.com"), + } as unknown as Response; + apiService.nativeFetch + .mockResolvedValueOnce(mockFullResponse) + .mockResolvedValueOnce(mockDailyResponse); - it("decompresses using atob on error", async () => { - const base64 = btoa(encodeURIComponent("abc")); - const out = await service["_decompressString"](base64); - expect(out).toBe("abc"); - }); - }); - }); + // Trigger background update + const result = await firstValueFrom(service["_backgroundUpdate"](existingMeta)); - describe("_loadBlobToMemory", () => { - it("loads blob into memory set", async () => { - const prevBlob = "ignored-base64"; - fakeGlobalStateProvider.getFake(PHISHING_DOMAINS_BLOB_KEY).stateSubject.next(prevBlob); + // Verify metadata WAS updated + expect(result?.timestamp).toBeGreaterThan(existingMeta.timestamp); + expect(result?.checksum).toBe("same-checksum"); - jest.spyOn(service as any, "_decompressString").mockResolvedValue("phish.com\nbadguy.net"); - - // Trigger the load pipeline and allow async RxJS processing to complete - service["_loadBlobToMemory"](); - await flushPromises(); - - const set = service["_webAddressesSet"] as Set; - expect(set).toBeDefined(); - expect(set.has("phish.com")).toBe(true); - expect(set.has("badguy.net")).toBe(true); + // Verify only daily update was performed + expect(mockIndexedDbService.saveUrlsFromStream).not.toHaveBeenCalled(); + expect(mockIndexedDbService.addUrls).toHaveBeenCalledWith(["newdomain.com"]); }); }); }); diff --git a/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.ts b/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.ts index 7d5f04cc276..10268fa7f93 100644 --- a/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.ts +++ b/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.ts @@ -1,17 +1,25 @@ import { catchError, + concatMap, + defer, EMPTY, + exhaustMap, first, - firstValueFrom, + forkJoin, from, + iif, + map, + Observable, of, + retry, share, takeUntil, startWith, Subject, switchMap, tap, - map, + throwError, + timer, } from "rxjs"; import { devFlagEnabled, devFlagValue } from "@bitwarden/browser/platform/flags"; @@ -23,6 +31,8 @@ import { GlobalStateProvider, KeyDefinition, PHISHING_DETECTION_DISK } from "@bi import { getPhishingResources, PhishingResourceType } from "../phishing-resources"; +import { PhishingIndexedDbService } from "./phishing-indexeddb.service"; + /** * Metadata about the phishing data set */ @@ -73,19 +83,16 @@ export class PhishingDataService { // We are adding the destroy to guard against accidental leaks. private _destroy$ = new Subject(); - private _testWebAddresses = this.getTestWebAddresses().concat("phishing.testcategory.com"); // Included for QA to test in prod + private _testWebAddresses = this.getTestWebAddresses(); private _phishingMetaState = this.globalStateProvider.get(PHISHING_DOMAINS_META_KEY); - private _phishingBlobState = this.globalStateProvider.get(PHISHING_DOMAINS_BLOB_KEY); - // In-memory set loaded from blob for fast lookups without reading large storage repeatedly - private _webAddressesSet: Set | null = null; - // Loading variables for web addresses set - // Triggers a load for _webAddressesSet - private _loadTrigger$ = new Subject(); + private indexedDbService: PhishingIndexedDbService; // How often are new web addresses added to the remote? readonly UPDATE_INTERVAL_DURATION = 24 * 60 * 60 * 1000; // 24 hours + private _backgroundUpdateTrigger$ = new Subject(); + private _triggerUpdate$ = new Subject(); update$ = this._triggerUpdate$.pipe( startWith(undefined), // Always emit once @@ -93,12 +100,8 @@ export class PhishingDataService { this._phishingMetaState.state$.pipe( first(), // Only take the first value to avoid an infinite loop when updating the cache below tap((metaState) => { - // Initial loading of web addresses set if not already loaded - if (!this._webAddressesSet) { - this._loadBlobToMemory(); - } - // Perform any updates in the background if needed - void this._backgroundUpdate(metaState); + // Perform any updates in the background + this._backgroundUpdateTrigger$.next(metaState); }), catchError((err: unknown) => { this.logService.error("[PhishingDataService] Background update failed to start.", err); @@ -106,7 +109,6 @@ export class PhishingDataService { }), ), ), - // Stop emitting when dispose() is called takeUntil(this._destroy$), share(), ); @@ -120,6 +122,7 @@ export class PhishingDataService { private resourceType: PhishingResourceType = PhishingResourceType.Links, ) { this.logService.debug("[PhishingDataService] Initializing service..."); + this.indexedDbService = new PhishingIndexedDbService(this.logService); this.taskSchedulerService.registerTaskHandler(ScheduledTaskNames.phishingDomainUpdate, () => { this._triggerUpdate$.next(); }); @@ -127,18 +130,20 @@ export class PhishingDataService { ScheduledTaskNames.phishingDomainUpdate, this.UPDATE_INTERVAL_DURATION, ); - this._setupLoadPipeline(); + this._backgroundUpdateTrigger$ + .pipe( + exhaustMap((currentMeta) => { + return this._backgroundUpdate(currentMeta); + }), + takeUntil(this._destroy$), + ) + .subscribe(); } dispose(): void { // Signal all pipelines to stop and unsubscribe stored subscriptions this._destroy$.next(); this._destroy$.complete(); - - // Clear web addresses set from memory - if (this._webAddressesSet !== null) { - this._webAddressesSet = null; - } } /** @@ -148,105 +153,65 @@ export class PhishingDataService { * @returns True if the URL is a known phishing web address, false otherwise */ async isPhishingWebAddress(url: URL): Promise { - if (!this._webAddressesSet) { - this.logService.debug("[PhishingDataService] Set not loaded; skipping check"); - return false; + // Quick check for QA/dev test addresses + if (this._testWebAddresses.includes(url.href)) { + return true; } - const set = this._webAddressesSet!; const resource = getPhishingResources(this.resourceType); - // Custom matcher per resource - if (resource && resource?.match) { - for (const entry of set) { - if (resource.match(url, entry)) { - return true; + try { + // Quick lookup: check direct presence of href in IndexedDB + const hasUrl = await this.indexedDbService.hasUrl(url.href); + if (hasUrl) { + return true; + } + } catch (err) { + this.logService.error("[PhishingDataService] IndexedDB lookup via hasUrl failed", err); + } + + // If a custom matcher is provided, iterate stored entries and apply the matcher. + if (resource && resource.match) { + try { + const entries = await this.indexedDbService.loadAllUrls(); + for (const entry of entries) { + if (resource.match(url, entry)) { + return true; + } } + } catch (err) { + this.logService.error("[PhishingDataService] Error running custom matcher", err); } return false; } - - // Default set-based lookup - return set.has(url.hostname); - } - - async getNextWebAddresses( - previous: PhishingDataMeta | null, - ): Promise | null> { - const prevMeta = previous ?? { timestamp: 0, checksum: "", applicationVersion: "" }; - const now = Date.now(); - - // Updates to check - const applicationVersion = await this.platformUtilsService.getApplicationVersion(); - const remoteChecksum = await this.fetchPhishingChecksum(this.resourceType); - - // Logic checks - const appVersionChanged = applicationVersion !== prevMeta.applicationVersion; - const masterChecksumChanged = remoteChecksum !== prevMeta.checksum; - - // Check for full updated - if (masterChecksumChanged || appVersionChanged) { - this.logService.info("[PhishingDataService] Checksum or version changed; Fetching ALL."); - const remoteUrl = getPhishingResources(this.resourceType)!.remoteUrl; - const blob = await this.fetchAndCompress(remoteUrl); - return { - blob, - meta: { checksum: remoteChecksum, timestamp: now, applicationVersion }, - }; - } - - // Check for daily file - const isCacheExpired = now - prevMeta.timestamp > this.UPDATE_INTERVAL_DURATION; - - if (isCacheExpired) { - this.logService.info("[PhishingDataService] Daily cache expired; Fetching TODAY's"); - const url = getPhishingResources(this.resourceType)!.todayUrl; - const newLines = await this.fetchText(url); - const prevBlob = (await firstValueFrom(this._phishingBlobState.state$)) ?? ""; - const oldText = prevBlob ? await this._decompressString(prevBlob) : ""; - - // Join the new lines to the existing list - const combined = (oldText ? oldText + "\n" : "") + newLines.join("\n"); - - return { - blob: await this._compressString(combined), - meta: { - checksum: remoteChecksum, - timestamp: now, // Reset the timestamp - applicationVersion, - }, - }; - } - - return null; + return false; } + // [FIXME] Pull fetches into api service private async fetchPhishingChecksum(type: PhishingResourceType = PhishingResourceType.Domains) { const checksumUrl = getPhishingResources(type)!.checksumUrl; - const response = await this.apiService.nativeFetch(new Request(checksumUrl)); - if (!response.ok) { - throw new Error(`[PhishingDataService] Failed to fetch checksum: ${response.status}`); - } - return response.text(); - } - private async fetchAndCompress(url: string): Promise { - const response = await this.apiService.nativeFetch(new Request(url)); - if (!response.ok) { - throw new Error("Fetch failed"); - } + this.logService.debug(`[PhishingDataService] Fetching checksum from: ${checksumUrl}`); - const downloadStream = response.body!; - // Pipe through CompressionStream while it's downloading - const compressedStream = downloadStream.pipeThrough(new CompressionStream("gzip")); - // Convert to ArrayBuffer - const buffer = await new Response(compressedStream).arrayBuffer(); - const bytes = new Uint8Array(buffer); + try { + const response = await this.apiService.nativeFetch(new Request(checksumUrl)); + if (!response.ok) { + throw new Error( + `[PhishingDataService] Failed to fetch checksum: ${response.status} ${response.statusText}`, + ); + } - // Return as Base64 for storage - return (bytes as any).toBase64 ? (bytes as any).toBase64() : this._uint8ToBase64Fallback(bytes); + return await response.text(); + } catch (error) { + this.logService.error( + `[PhishingDataService] Checksum fetch failed from ${checksumUrl}`, + error, + ); + throw error; + } } - private async fetchText(url: string) { + // [FIXME] Pull fetches into api service + private async fetchToday(url: string) { const response = await this.apiService.nativeFetch(new Request(url)); if (!response.ok) { @@ -258,171 +223,196 @@ export class PhishingDataService { private getTestWebAddresses() { const flag = devFlagEnabled("testPhishingUrls"); + // Normalize URLs by converting to URL object and back to ensure consistent format (e.g., trailing slashes) + const testWebAddresses: string[] = [ + new URL("http://phishing.testcategory.com").href, + new URL("https://phishing.testcategory.com").href, + new URL("https://phishing.testcategory.com/block").href, + ]; if (!flag) { - return []; + return testWebAddresses; } const webAddresses = devFlagValue("testPhishingUrls") as unknown[]; if (webAddresses && webAddresses instanceof Array) { this.logService.debug( - "[PhishingDetectionService] Dev flag enabled for testing phishing detection. Adding test phishing web addresses:", + "[PhishingDataService] Dev flag enabled for testing phishing detection. Adding test phishing web addresses:", webAddresses, ); - return webAddresses as string[]; + // Normalize dev flag URLs as well, filtering out invalid ones + const normalizedDevAddresses = (webAddresses as string[]) + .filter((addr) => { + try { + new URL(addr); + return true; + } catch { + this.logService.warning( + `[PhishingDataService] Invalid test URL in dev flag, skipping: ${addr}`, + ); + return false; + } + }) + .map((addr) => new URL(addr).href); + return testWebAddresses.concat(normalizedDevAddresses); } - return []; + return testWebAddresses; } - // Runs the update flow in the background and retries up to 3 times on failure - private async _backgroundUpdate(previous: PhishingDataMeta | null): Promise { - this.logService.info(`[PhishingDataService] Update web addresses triggered...`); - const phishingMeta: PhishingDataMeta = previous ?? { - timestamp: 0, - checksum: "", - applicationVersion: "", - }; - // Start time for logging performance of update - const startTime = Date.now(); - const maxAttempts = 3; - const delayMs = 5 * 60 * 1000; // 5 minutes + private _getUpdatedMeta(): Observable { + return defer(() => { + const now = Date.now(); - for (let attempt = 1; attempt <= maxAttempts; attempt++) { - try { - const next = await this.getNextWebAddresses(phishingMeta); - if (!next) { - return; // No update needed - } + return forkJoin({ + applicationVersion: from(this.platformUtilsService.getApplicationVersion()), + remoteChecksum: from(this.fetchPhishingChecksum(this.resourceType)), + }).pipe( + map(({ applicationVersion, remoteChecksum }) => { + return { + checksum: remoteChecksum, + timestamp: now, + applicationVersion, + }; + }), + ); + }); + } - if (next.meta) { - await this._phishingMetaState.update(() => next!.meta!); - } - if (next.blob) { - await this._phishingBlobState.update(() => next!.blob!); - this._loadBlobToMemory(); - } + // Streams the full phishing data set and saves it to IndexedDB + private _updateFullDataSet() { + const resource = getPhishingResources(this.resourceType); - // Performance logging - const elapsed = Date.now() - startTime; - this.logService.info(`[PhishingDataService] Phishing data cache updated in ${elapsed}ms`); - } catch (err) { - this.logService.error( - `[PhishingDataService] Unable to update web addresses. Attempt ${attempt}.`, - err, - ); - if (attempt < maxAttempts) { - await new Promise((res) => setTimeout(res, delayMs)); - } else { - const elapsed = Date.now() - startTime; - this.logService.error( - `[PhishingDataService] Retries unsuccessful after ${elapsed}ms. Unable to update web addresses.`, - err, + if (!resource?.remoteUrl) { + return throwError(() => new Error("Invalid resource URL")); + } + + this.logService.info(`[PhishingDataService] Starting FULL update using ${resource.remoteUrl}`); + return from(this.apiService.nativeFetch(new Request(resource.remoteUrl))).pipe( + switchMap((response) => { + if (!response.ok || !response.body) { + return throwError( + () => + new Error( + `[PhishingDataService] Full fetch failed: ${response.status}, ${response.statusText}`, + ), ); } - } - } + + return from(this.indexedDbService.saveUrlsFromStream(response.body)); + }), + catchError((err: unknown) => { + this.logService.error( + `[PhishingDataService] Full dataset update failed using primary source ${err}`, + ); + this.logService.warning( + `[PhishingDataService] Falling back to: ${resource.fallbackUrl} (Note: Fallback data may be less up-to-date)`, + ); + // Try fallback URL + return from(this.apiService.nativeFetch(new Request(resource.fallbackUrl))).pipe( + switchMap((fallbackResponse) => { + if (!fallbackResponse.ok || !fallbackResponse.body) { + return throwError( + () => + new Error( + `[PhishingDataService] Fallback fetch failed: ${fallbackResponse.status}, ${fallbackResponse.statusText}`, + ), + ); + } + + return from(this.indexedDbService.saveUrlsFromStream(fallbackResponse.body)); + }), + catchError((fallbackError: unknown) => { + this.logService.error(`[PhishingDataService] Fallback source failed`); + return throwError(() => fallbackError); + }), + ); + }), + ); } - // Sets up the load pipeline to load the blob into memory when triggered - private _setupLoadPipeline(): void { - this._loadTrigger$ - .pipe( - switchMap(() => - this._phishingBlobState.state$.pipe( - first(), - switchMap((blobBase64) => { - if (!blobBase64) { - return of(undefined); - } - // Note: _decompressString wraps a promise that cannot be aborted - // If performance improvements are needed, consider migrating to a cancellable approach - return from(this._decompressString(blobBase64)).pipe( - map((text) => { - const lines = text.split(/\r?\n/); - const newWebAddressesSet = new Set(lines); - this._testWebAddresses.forEach((a) => newWebAddressesSet.add(a)); - this._webAddressesSet = new Set(newWebAddressesSet); - this.logService.info( - `[PhishingDataService] loaded ${this._webAddressesSet.size} addresses into memory from blob`, - ); - }), + private _updateDailyDataSet() { + this.logService.info("[PhishingDataService] Starting DAILY update..."); + + const todayUrl = getPhishingResources(this.resourceType)?.todayUrl; + if (!todayUrl) { + return throwError(() => new Error("Today URL missing")); + } + + return from(this.fetchToday(todayUrl)).pipe( + switchMap((lines) => from(this.indexedDbService.addUrls(lines))), + ); + } + + private _backgroundUpdate( + previous: PhishingDataMeta | null, + ): Observable { + // Use defer to restart timer if retry is activated + return defer(() => { + const startTime = Date.now(); + this.logService.info(`[PhishingDataService] Update triggered...`); + + // Get updated meta info + return this._getUpdatedMeta().pipe( + // Update full data set if application version or checksum changed + concatMap((newMeta) => + iif( + () => { + const appVersionChanged = newMeta.applicationVersion !== previous?.applicationVersion; + const checksumChanged = newMeta.checksum !== previous?.checksum; + + this.logService.info( + `[PhishingDataService] Checking if full update is needed: appVersionChanged=${appVersionChanged}, checksumChanged=${checksumChanged}`, ); - }), - catchError((err: unknown) => { - this.logService.error("[PhishingDataService] Failed to load blob into memory", err); - return of(undefined); - }), + return appVersionChanged || checksumChanged; + }, + this._updateFullDataSet().pipe(map(() => ({ meta: newMeta, updated: true }))), + of({ meta: newMeta, updated: false }), ), ), - catchError((err: unknown) => { - this.logService.error("[PhishingDataService] Load pipeline failed", err); - return of(undefined); + // Update daily data set if last update was more than UPDATE_INTERVAL_DURATION ago + concatMap((result) => + iif( + () => { + const isCacheExpired = + Date.now() - (previous?.timestamp ?? 0) > this.UPDATE_INTERVAL_DURATION; + return isCacheExpired; + }, + this._updateDailyDataSet().pipe(map(() => ({ meta: result.meta, updated: true }))), + of(result), + ), + ), + concatMap((result) => { + if (!result.updated) { + this.logService.debug(`[PhishingDataService] No update needed, metadata unchanged`); + return of(previous); + } + + this.logService.debug(`[PhishingDataService] Updated phishing meta data:`, result.meta); + return from(this._phishingMetaState.update(() => result.meta)).pipe( + tap(() => { + const elapsed = Date.now() - startTime; + this.logService.info(`[PhishingDataService] Updated data set in ${elapsed}ms`); + }), + ); }), - takeUntil(this._destroy$), - share(), - ) - .subscribe(); - } - - // [FIXME] Move compression helpers to a shared utils library - // to separate from phishing data service. - // ------------------------- Blob and Compression Handling ------------------------- - private async _compressString(input: string): Promise { - try { - const stream = new Blob([input]).stream().pipeThrough(new CompressionStream("gzip")); - - const compressedBuffer = await new Response(stream).arrayBuffer(); - const bytes = new Uint8Array(compressedBuffer); - - // Modern browsers support direct toBase64 conversion - // For older support, use fallback - return (bytes as any).toBase64 - ? (bytes as any).toBase64() - : this._uint8ToBase64Fallback(bytes); - } catch (err) { - this.logService.error("[PhishingDataService] Compression failed", err); - return btoa(encodeURIComponent(input)); - } - } - - private async _decompressString(base64: string): Promise { - try { - // Modern browsers support direct toBase64 conversion - // For older support, use fallback - const bytes = (Uint8Array as any).fromBase64 - ? (Uint8Array as any).fromBase64(base64) - : this._base64ToUint8Fallback(base64); - if (bytes == null) { - throw new Error("Base64 decoding resulted in null"); - } - const byteResponse = new Response(bytes); - if (!byteResponse.body) { - throw new Error("Response body is null"); - } - const stream = byteResponse.body.pipeThrough(new DecompressionStream("gzip")); - const streamResponse = new Response(stream); - return await streamResponse.text(); - } catch (err) { - this.logService.error("[PhishingDataService] Decompression failed", err); - return decodeURIComponent(atob(base64)); - } - } - - // Trigger a load of the blob into memory - private _loadBlobToMemory(): void { - this._loadTrigger$.next(); - } - private _uint8ToBase64Fallback(bytes: Uint8Array): string { - const CHUNK_SIZE = 0x8000; // 32KB chunks - let binary = ""; - for (let i = 0; i < bytes.length; i += CHUNK_SIZE) { - const chunk = bytes.subarray(i, i + CHUNK_SIZE); - binary += String.fromCharCode.apply(null, chunk as any); - } - return btoa(binary); - } - - private _base64ToUint8Fallback(base64: string): Uint8Array { - const binary = atob(base64); - return Uint8Array.from(binary, (c) => c.charCodeAt(0)); + retry({ + count: 2, // Total 3 attempts (initial + 2 retries) + delay: (error, retryCount) => { + this.logService.error( + `[PhishingDataService] Attempt ${retryCount} failed. Retrying in 5m...`, + error, + ); + return timer(5 * 60 * 1000); // Wait 5 mins before next attempt + }, + }), + catchError((err: unknown) => { + const elapsed = Date.now() - startTime; + this.logService.error( + `[PhishingDataService] Retries unsuccessful after ${elapsed}ms.`, + err, + ); + return of(previous); + }), + ); + }); } } diff --git a/apps/browser/src/dirt/phishing-detection/services/phishing-indexeddb.service.spec.ts b/apps/browser/src/dirt/phishing-detection/services/phishing-indexeddb.service.spec.ts index 75bd634b1fc..99e101cc199 100644 --- a/apps/browser/src/dirt/phishing-detection/services/phishing-indexeddb.service.spec.ts +++ b/apps/browser/src/dirt/phishing-detection/services/phishing-indexeddb.service.spec.ts @@ -215,6 +215,86 @@ describe("PhishingIndexedDbService", () => { }); }); + describe("addUrls", () => { + it("appends URLs to IndexedDB without clearing", async () => { + // Pre-populate store with existing data + mockStore.set("https://existing.com", { url: "https://existing.com" }); + + const urls = ["https://phishing.com", "https://malware.net"]; + const result = await service.addUrls(urls); + + expect(result).toBe(true); + expect(mockDb.transaction).toHaveBeenCalledWith("phishing-urls", "readwrite"); + expect(mockObjectStore.clear).not.toHaveBeenCalled(); + expect(mockObjectStore.put).toHaveBeenCalledTimes(2); + // Existing data should still be present + expect(mockStore.has("https://existing.com")).toBe(true); + expect(mockStore.size).toBe(3); + expect(mockDb.close).toHaveBeenCalled(); + }); + + it("handles empty array without clearing", async () => { + mockStore.set("https://existing.com", { url: "https://existing.com" }); + + const result = await service.addUrls([]); + + expect(result).toBe(true); + expect(mockObjectStore.clear).not.toHaveBeenCalled(); + expect(mockStore.has("https://existing.com")).toBe(true); + }); + + it("trims whitespace from URLs", async () => { + const urls = [" https://example.com ", "\nhttps://test.org\n"]; + + await service.addUrls(urls); + + expect(mockObjectStore.put).toHaveBeenCalledWith({ url: "https://example.com" }); + expect(mockObjectStore.put).toHaveBeenCalledWith({ url: "https://test.org" }); + }); + + it("skips empty lines", async () => { + const urls = ["https://example.com", "", " ", "https://test.org"]; + + await service.addUrls(urls); + + expect(mockObjectStore.put).toHaveBeenCalledTimes(2); + }); + + it("handles duplicate URLs via upsert", async () => { + mockStore.set("https://example.com", { url: "https://example.com" }); + + const urls = [ + "https://example.com", // Already exists + "https://test.org", + ]; + + const result = await service.addUrls(urls); + + expect(result).toBe(true); + expect(mockObjectStore.put).toHaveBeenCalledTimes(2); + expect(mockStore.size).toBe(2); + }); + + it("logs error and returns false on failure", async () => { + const error = new Error("IndexedDB error"); + mockOpenRequest.error = error; + (global.indexedDB.open as jest.Mock).mockImplementation(() => { + setTimeout(() => { + mockOpenRequest.onerror?.(); + }, 0); + return mockOpenRequest; + }); + + const result = await service.addUrls(["https://test.com"]); + + expect(result).toBe(false); + expect(logService.error).toHaveBeenCalledWith( + "[PhishingIndexedDbService] Add failed", + expect.any(Error), + ); + }); + }); + describe("hasUrl", () => { it("returns true for existing URL", async () => { mockStore.set("https://example.com", { url: "https://example.com" }); diff --git a/apps/browser/src/dirt/phishing-detection/services/phishing-indexeddb.service.ts b/apps/browser/src/dirt/phishing-detection/services/phishing-indexeddb.service.ts index 099839a38d9..fe0f10da221 100644 --- a/apps/browser/src/dirt/phishing-detection/services/phishing-indexeddb.service.ts +++ b/apps/browser/src/dirt/phishing-detection/services/phishing-indexeddb.service.ts @@ -53,6 +53,9 @@ export class PhishingIndexedDbService { * @returns `true` if save succeeded, `false` on error */ async saveUrls(urls: string[]): Promise { + this.logService.debug( + `[PhishingIndexedDbService] Clearing and saving ${urls.length} to the store...`, + ); let db: IDBDatabase | null = null; try { db = await this.openDatabase(); @@ -67,6 +70,29 @@ export class PhishingIndexedDbService { } } + /** + * Adds an array of phishing URLs to IndexedDB. + * Appends to existing data without clearing. + * + * @param urls - Array of phishing URLs to add + * @returns `true` if add succeeded, `false` on error + */ + async addUrls(urls: string[]): Promise { + this.logService.debug(`[PhishingIndexedDbService] Adding ${urls.length} to the store...`); + + let db: IDBDatabase | null = null; + try { + db = await this.openDatabase(); + await this.saveChunked(db, urls); + return true; + } catch (error) { + this.logService.error("[PhishingIndexedDbService] Add failed", error); + return false; + } finally { + db?.close(); + } + } + /** * Saves URLs in chunks to prevent transaction timeouts and UI freezes. */ @@ -100,6 +126,8 @@ export class PhishingIndexedDbService { * @returns `true` if URL exists, `false` if not found or on error */ async hasUrl(url: string): Promise { + this.logService.debug(`[PhishingIndexedDbService] Checking if store contains ${url}...`); + let db: IDBDatabase | null = null; try { db = await this.openDatabase(); @@ -130,6 +158,8 @@ export class PhishingIndexedDbService { * @returns Array of all stored URLs, or empty array on error */ async loadAllUrls(): Promise { + this.logService.debug("[PhishingIndexedDbService] Loading all urls from store..."); + let db: IDBDatabase | null = null; try { db = await this.openDatabase(); @@ -173,11 +203,16 @@ export class PhishingIndexedDbService { * @returns `true` if save succeeded, `false` on error */ async saveUrlsFromStream(stream: ReadableStream): Promise { + this.logService.debug("[PhishingIndexedDbService] Saving urls to the store from stream..."); + let db: IDBDatabase | null = null; try { db = await this.openDatabase(); await this.clearStore(db); await this.processStream(db, stream); + this.logService.info( + "[PhishingIndexedDbService] Finished saving urls to the store from stream.", + ); return true; } catch (error) { this.logService.error("[PhishingIndexedDbService] Stream save failed", error);