From 417dfdd305d3099bfdc23d5ba172d98ce789e95a Mon Sep 17 00:00:00 2001 From: Leslie Tilton <23057410+Banrion@users.noreply.github.com> Date: Thu, 15 Jan 2026 12:36:03 -0600 Subject: [PATCH] [PM-30319][PM-30685] Separate and compress phishing data storage (#18337) * Add logs for debugging in phishing-detection-settings.service * Update phishing data service to separate web addresses from meta data for performant writes. Store compressed string instead of array * Cleanup * Updated test cases * Cleanup comments * Fix fallback encoding/decoding mismatch * Fix type checking --- .../browser/src/background/main.background.ts | 1 + .../services/phishing-data.service.spec.ts | 291 +++++++++++++---- .../services/phishing-data.service.ts | 292 +++++++++++++----- .../src/popup/services/services.module.ts | 1 + ...hishing-detection-settings.service.spec.ts | 4 + .../phishing-detection-settings.service.ts | 31 +- 6 files changed, 478 insertions(+), 142 deletions(-) diff --git a/apps/browser/src/background/main.background.ts b/apps/browser/src/background/main.background.ts index b9b41943b04..9d551ec2622 100644 --- a/apps/browser/src/background/main.background.ts +++ b/apps/browser/src/background/main.background.ts @@ -1510,6 +1510,7 @@ export default class MainBackground { this.accountService, this.billingAccountProfileStateService, this.configService, + this.logService, this.organizationService, this.platformUtilsService, this.stateProvider, diff --git a/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.spec.ts b/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.spec.ts index 30aa947092d..746f5a1f8f7 100644 --- a/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.spec.ts +++ b/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.spec.ts @@ -9,7 +9,66 @@ import { import { FakeGlobalStateProvider } from "@bitwarden/common/spec"; import { LogService } from "@bitwarden/logging"; -import { PhishingDataService, PhishingData, PHISHING_DOMAINS_KEY } from "./phishing-data.service"; +import { + PhishingDataService, + PHISHING_DOMAINS_META_KEY, + PHISHING_DOMAINS_BLOB_KEY, + PhishingDataMeta, + PhishingDataBlob, +} from "./phishing-data.service"; + +const flushPromises = () => + new Promise((resolve) => jest.requireActual("timers").setImmediate(resolve)); + +// [FIXME] Move mocking and compression helpers to a shared test utils library +// to separate from phishing data service tests. +export const setupPhishingMocks = (mockedResult: string | ArrayBuffer = "mocked-data") => { + // Store original globals + const originals = { + Response: global.Response, + CompressionStream: global.CompressionStream, + DecompressionStream: global.DecompressionStream, + Blob: global.Blob, + atob: global.atob, + btoa: global.btoa, + }; + + // Mock missing or browser-only globals + global.atob = (str) => Buffer.from(str, "base64").toString("binary"); + global.btoa = (str) => Buffer.from(str, "binary").toString("base64"); + + (global as any).CompressionStream = class {}; + (global as any).DecompressionStream = class {}; + + global.Blob = class { + constructor(public parts: any[]) {} + stream() { + return { pipeThrough: () => ({}) }; + } + } as any; + + global.Response = class { + body = { pipeThrough: () => ({}) }; + // Return string for decompression + text() { + return Promise.resolve(typeof mockedResult === "string" ? mockedResult : ""); + } + // Return ArrayBuffer for compression + arrayBuffer() { + if (typeof mockedResult === "string") { + const bytes = new TextEncoder().encode(mockedResult); + return Promise.resolve(bytes.buffer); + } + + return Promise.resolve(mockedResult); + } + } as any; + + // Cleanup function + return () => { + Object.assign(global, originals); + }; +}; describe("PhishingDataService", () => { let service: PhishingDataService; @@ -17,17 +76,30 @@ describe("PhishingDataService", () => { let taskSchedulerService: TaskSchedulerService; let logService: MockProxy; let platformUtilsService: MockProxy; - const stateProvider: FakeGlobalStateProvider = new FakeGlobalStateProvider(); + const fakeGlobalStateProvider: FakeGlobalStateProvider = new FakeGlobalStateProvider(); - const setMockState = (state: PhishingData) => { - stateProvider.getFake(PHISHING_DOMAINS_KEY).stateSubject.next(state); + const setMockMeta = (state: PhishingDataMeta) => { + fakeGlobalStateProvider.getFake(PHISHING_DOMAINS_META_KEY).stateSubject.next(state); + return state; + }; + const setMockBlob = (state: PhishingDataBlob) => { + fakeGlobalStateProvider.getFake(PHISHING_DOMAINS_BLOB_KEY).stateSubject.next(state); return state; }; let fetchChecksumSpy: jest.SpyInstance; - let fetchWebAddressesSpy: jest.SpyInstance; + let fetchAndCompressSpy: jest.SpyInstance; - beforeEach(() => { + const mockMeta: PhishingDataMeta = { + checksum: "abc", + timestamp: Date.now(), + applicationVersion: "1.0.0", + }; + const mockBlob = "http://phish.com\nhttps://badguy.net"; + const mockCompressedBlob = + "H4sIAAAAAAAA/8vMTSzJzM9TSE7MLchJLElVyE9TyC9KSS1S0FFIz8hLz0ksSQUAtK7XMSYAAAA="; + + beforeEach(async () => { jest.useFakeTimers(); apiService = mock(); logService = mock(); @@ -40,54 +112,75 @@ describe("PhishingDataService", () => { service = new PhishingDataService( apiService, taskSchedulerService, - stateProvider, + fakeGlobalStateProvider, logService, platformUtilsService, ); - fetchChecksumSpy = jest.spyOn(service as any, "fetchPhishingChecksum"); - fetchWebAddressesSpy = jest.spyOn(service as any, "fetchPhishingWebAddresses"); + fetchAndCompressSpy = jest.spyOn(service as any, "fetchAndCompress"); + + fetchChecksumSpy.mockResolvedValue("new-checksum"); + fetchAndCompressSpy.mockResolvedValue("compressed-blob"); + }); + + describe("initialization", () => { + beforeEach(() => { + jest.spyOn(service as any, "_compressString").mockResolvedValue(mockCompressedBlob); + jest.spyOn(service as any, "_decompressString").mockResolvedValue(mockBlob); + }); + + it("should perform background update", async () => { + platformUtilsService.getApplicationVersion.mockResolvedValue("1.0.x"); + jest + .spyOn(service as any, "getNextWebAddresses") + .mockResolvedValue({ meta: mockMeta, blob: mockBlob }); + + setMockBlob(mockBlob); + setMockMeta(mockMeta); + + const sub = service.update$.subscribe(); + await flushPromises(); + + const url = new URL("http://phish.com"); + const QAurl = new URL("http://phishing.testcategory.com"); + expect(await service.isPhishingWebAddress(url)).toBe(true); + expect(await service.isPhishingWebAddress(QAurl)).toBe(true); + + sub.unsubscribe(); + }); }); describe("isPhishingWebAddress", () => { + beforeEach(() => { + jest.spyOn(service as any, "_compressString").mockResolvedValue(mockCompressedBlob); + jest.spyOn(service as any, "_decompressString").mockResolvedValue(mockBlob); + }); + it("should detect a phishing web address", async () => { - setMockState({ - webAddresses: ["phish.com", "badguy.net"], - timestamp: Date.now(), - checksum: "abc123", - applicationVersion: "1.0.0", - }); + service["_webAddressesSet"] = new Set(["phish.com", "badguy.net"]); + const url = new URL("http://phish.com"); const result = await service.isPhishingWebAddress(url); + expect(result).toBe(true); }); it("should not detect a safe web address", async () => { - setMockState({ - webAddresses: ["phish.com", "badguy.net"], - timestamp: Date.now(), - checksum: "abc123", - applicationVersion: "1.0.0", - }); + service["_webAddressesSet"] = new Set(["phish.com", "badguy.net"]); const url = new URL("http://safe.com"); const result = await service.isPhishingWebAddress(url); expect(result).toBe(false); }); it("should match against root web address", async () => { - setMockState({ - webAddresses: ["phish.com", "badguy.net"], - timestamp: Date.now(), - checksum: "abc123", - applicationVersion: "1.0.0", - }); + service["_webAddressesSet"] = new Set(["phish.com", "badguy.net"]); const url = new URL("http://phish.com/about"); const result = await service.isPhishingWebAddress(url); expect(result).toBe(true); }); it("should not error on empty state", async () => { - setMockState(undefined as any); + service["_webAddressesSet"] = null; const url = new URL("http://phish.com/about"); const result = await service.isPhishingWebAddress(url); expect(result).toBe(false); @@ -95,64 +188,142 @@ describe("PhishingDataService", () => { }); describe("getNextWebAddresses", () => { + beforeEach(() => { + jest.spyOn(service as any, "_compressString").mockResolvedValue(mockCompressedBlob); + jest.spyOn(service as any, "_decompressString").mockResolvedValue(mockBlob); + }); + it("refetches all web addresses if applicationVersion has changed", async () => { - const prev: PhishingData = { - webAddresses: ["a.com"], + const prev: PhishingDataMeta = { timestamp: Date.now() - 60000, checksum: "old", applicationVersion: "1.0.0", }; fetchChecksumSpy.mockResolvedValue("new"); - fetchWebAddressesSpy.mockResolvedValue(["d.com", "e.com"]); platformUtilsService.getApplicationVersion.mockResolvedValue("2.0.0"); const result = await service.getNextWebAddresses(prev); - expect(result!.webAddresses).toEqual(["d.com", "e.com"]); - expect(result!.checksum).toBe("new"); - expect(result!.applicationVersion).toBe("2.0.0"); + expect(result!.blob).toBe("compressed-blob"); + expect(result!.meta!.checksum).toBe("new"); + expect(result!.meta!.applicationVersion).toBe("2.0.0"); }); - it("only updates timestamp if checksum matches", async () => { - const prev: PhishingData = { - webAddresses: ["a.com"], - timestamp: Date.now() - 60000, + it("returns null when checksum matches and cache not expired", async () => { + const prev: PhishingDataMeta = { + timestamp: Date.now(), checksum: "abc", applicationVersion: "1.0.0", }; fetchChecksumSpy.mockResolvedValue("abc"); const result = await service.getNextWebAddresses(prev); - expect(result!.webAddresses).toEqual(prev.webAddresses); - expect(result!.checksum).toBe("abc"); - expect(result!.timestamp).not.toBe(prev.timestamp); + expect(result).toBeNull(); }); - it("patches daily domains if cache is fresh", async () => { - const prev: PhishingData = { - webAddresses: ["a.com"], - timestamp: Date.now() - 60000, + it("patches daily domains when cache is expired and checksum unchanged", async () => { + const prev: PhishingDataMeta = { + timestamp: 0, + checksum: "old", + applicationVersion: "1.0.0", + }; + const dailyLines = ["b.com", "c.com"]; + fetchChecksumSpy.mockResolvedValue("old"); + jest.spyOn(service as any, "fetchText").mockResolvedValue(dailyLines); + + setMockBlob(mockBlob); + + const expectedBlob = + "H4sIAAAAAAAA/8vMTSzJzM9TSE7MLchJLElVyE9TyC9KSS1S0FFIz8hLz0ksSQUAtK7XMSYAAAA="; + const result = await service.getNextWebAddresses(prev); + + expect(result!.blob).toBe(expectedBlob); + expect(result!.meta!.checksum).toBe("old"); + }); + + it("fetches all domains when checksum has changed", async () => { + const prev: PhishingDataMeta = { + timestamp: 0, checksum: "old", applicationVersion: "1.0.0", }; fetchChecksumSpy.mockResolvedValue("new"); - fetchWebAddressesSpy.mockResolvedValue(["b.com", "c.com"]); + fetchAndCompressSpy.mockResolvedValue("new-blob"); const result = await service.getNextWebAddresses(prev); - expect(result!.webAddresses).toEqual(["a.com", "b.com", "c.com"]); - expect(result!.checksum).toBe("new"); + expect(result!.blob).toBe("new-blob"); + expect(result!.meta!.checksum).toBe("new"); + }); + }); + + describe("compression helpers", () => { + let restore: () => void; + + beforeEach(async () => { + restore = setupPhishingMocks("abc"); }); - it("fetches all domains if cache is old", async () => { - const prev: PhishingData = { - webAddresses: ["a.com"], - timestamp: Date.now() - 2 * 24 * 60 * 60 * 1000, - checksum: "old", - applicationVersion: "1.0.0", - }; - fetchChecksumSpy.mockResolvedValue("new"); - fetchWebAddressesSpy.mockResolvedValue(["d.com", "e.com"]); - const result = await service.getNextWebAddresses(prev); - expect(result!.webAddresses).toEqual(["d.com", "e.com"]); - expect(result!.checksum).toBe("new"); + afterEach(() => { + if (restore) { + restore(); + } + delete (Uint8Array as any).fromBase64; + jest.restoreAllMocks(); + }); + + describe("_compressString", () => { + it("compresses a string to base64", async () => { + const out = await service["_compressString"]("abc"); + expect(out).toBe("YWJj"); // base64 for 'abc' + }); + + it("compresses using fallback on older browsers", async () => { + const input = "abc"; + const expected = btoa(encodeURIComponent(input)); + const out = await service["_compressString"](input); + expect(out).toBe(expected); + }); + + it("compresses using btoa on error", async () => { + const input = "abc"; + const expected = btoa(encodeURIComponent(input)); + const out = await service["_compressString"](input); + expect(out).toBe(expected); + }); + }); + describe("_decompressString", () => { + it("decompresses a string from base64", async () => { + const base64 = btoa("ignored"); + const out = await service["_decompressString"](base64); + expect(out).toBe("abc"); + }); + + it("decompresses using fallback on older browsers", async () => { + // Provide a fromBase64 implementation + (Uint8Array as any).fromBase64 = (b64: string) => new Uint8Array([100, 101, 102]); + + const out = await service["_decompressString"]("ignored"); + expect(out).toBe("abc"); + }); + + it("decompresses using atob on error", async () => { + const base64 = btoa(encodeURIComponent("abc")); + const out = await service["_decompressString"](base64); + expect(out).toBe("abc"); + }); + }); + }); + + describe("_loadBlobToMemory", () => { + it("loads blob into memory set", async () => { + const prevBlob = "ignored-base64"; + fakeGlobalStateProvider.getFake(PHISHING_DOMAINS_BLOB_KEY).stateSubject.next(prevBlob); + + jest.spyOn(service as any, "_decompressString").mockResolvedValue("phish.com\nbadguy.net"); + + await service["_loadBlobToMemory"](); + const set = service["_webAddressesSet"] as Set; + expect(set).toBeDefined(); + expect(set.has("phish.com")).toBe(true); + expect(set.has("badguy.net")).toBe(true); }); }); }); diff --git a/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.ts b/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.ts index 4bc31f8ea60..85e91b06a6b 100644 --- a/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.ts +++ b/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.ts @@ -3,7 +3,6 @@ import { EMPTY, first, firstValueFrom, - map, share, startWith, Subject, @@ -20,11 +19,14 @@ import { GlobalStateProvider, KeyDefinition, PHISHING_DETECTION_DISK } from "@bi import { getPhishingResources, PhishingResourceType } from "../phishing-resources"; -export type PhishingData = { - webAddresses: string[]; - timestamp: number; +/** + * Metadata about the phishing data set + */ +export type PhishingDataMeta = { + /** The last known checksum of the phishing data set */ checksum: string; - + /** The last time the data set was updated */ + timestamp: number; /** * We store the application version to refetch the entire dataset on a new client release. * This counteracts daily appends updates not removing inactive or false positive web addresses. @@ -32,30 +34,42 @@ export type PhishingData = { applicationVersion: string; }; -export const PHISHING_DOMAINS_KEY = new KeyDefinition( +/** + * The phishing data blob is a string representation of the phishing web addresses + */ +export type PhishingDataBlob = string; +export type PhishingData = { meta: PhishingDataMeta; blob: PhishingDataBlob }; + +export const PHISHING_DOMAINS_META_KEY = new KeyDefinition( PHISHING_DETECTION_DISK, - "phishingDomains", + "phishingDomainsMeta", { - deserializer: (value: PhishingData) => - value ?? { webAddresses: [], timestamp: 0, checksum: "", applicationVersion: "" }, + deserializer: (value: PhishingDataMeta) => { + return { + checksum: value?.checksum ?? "", + timestamp: value?.timestamp ?? 0, + applicationVersion: value?.applicationVersion ?? "", + }; + }, + }, +); + +export const PHISHING_DOMAINS_BLOB_KEY = new KeyDefinition( + PHISHING_DETECTION_DISK, + "phishingDomainsBlob", + { + deserializer: (value: string) => value ?? "", }, ); /** Coordinates fetching, caching, and patching of known phishing web addresses */ export class PhishingDataService { - private _testWebAddresses = this.getTestWebAddresses(); - private _cachedState = this.globalStateProvider.get(PHISHING_DOMAINS_KEY); - private _webAddresses$ = this._cachedState.state$.pipe( - map( - (state) => - new Set( - (state?.webAddresses?.filter((line) => line.trim().length > 0) ?? []).concat( - this._testWebAddresses, - "phishing.testcategory.com", // Included for QA to test in prod - ), - ), - ), - ); + private _testWebAddresses = this.getTestWebAddresses().concat("phishing.testcategory.com"); // Included for QA to test in prod + private _phishingMetaState = this.globalStateProvider.get(PHISHING_DOMAINS_META_KEY); + private _phishingBlobState = this.globalStateProvider.get(PHISHING_DOMAINS_BLOB_KEY); + + // In-memory set loaded from blob for fast lookups without reading large storage repeatedly + private _webAddressesSet: Set | null = null; // How often are new web addresses added to the remote? readonly UPDATE_INTERVAL_DURATION = 24 * 60 * 60 * 1000; // 24 hours @@ -64,10 +78,11 @@ export class PhishingDataService { update$ = this._triggerUpdate$.pipe( startWith(undefined), // Always emit once switchMap(() => - this._cachedState.state$.pipe( + this._phishingMetaState.state$.pipe( first(), // Only take the first value to avoid an infinite loop when updating the cache below - tap((cachedState) => { - void this._backgroundUpdate(cachedState); + tap((metaState) => { + // Perform any updates in the background if needed + void this._backgroundUpdate(metaState); }), catchError((err: unknown) => { this.logService.error("[PhishingDataService] Background update failed to start.", err); @@ -86,6 +101,7 @@ export class PhishingDataService { private platformUtilsService: PlatformUtilsService, private resourceType: PhishingResourceType = PhishingResourceType.Links, ) { + this.logService.debug("[PhishingDataService] Initializing service..."); this.taskSchedulerService.registerTaskHandler(ScheduledTaskNames.phishingDomainUpdate, () => { this._triggerUpdate$.next(); }); @@ -93,6 +109,7 @@ export class PhishingDataService { ScheduledTaskNames.phishingDomainUpdate, this.UPDATE_INTERVAL_DURATION, ); + void this._loadBlobToMemory(); } /** @@ -102,12 +119,17 @@ export class PhishingDataService { * @returns True if the URL is a known phishing web address, false otherwise */ async isPhishingWebAddress(url: URL): Promise { - // Use domain (hostname) matching for domain resources, and link matching for links resources - const entries = await firstValueFrom(this._webAddresses$); + if (!this._webAddressesSet) { + this.logService.debug("[PhishingDataService] Set not loaded; skipping check"); + return false; + } + const set = this._webAddressesSet!; const resource = getPhishingResources(this.resourceType); - if (resource && resource.match) { - for (const entry of entries) { + + // Custom matcher per resource + if (resource && resource?.match) { + for (const entry of set) { if (resource.match(url, entry)) { return true; } @@ -115,54 +137,59 @@ export class PhishingDataService { return false; } - // Default/domain behavior: exact hostname match as a fallback - return entries.has(url.hostname); + // Default set-based lookup + return set.has(url.hostname); } - async getNextWebAddresses(prev: PhishingData | null): Promise { - prev = prev ?? { webAddresses: [], timestamp: 0, checksum: "", applicationVersion: "" }; - const timestamp = Date.now(); - const prevAge = timestamp - prev.timestamp; - this.logService.info(`[PhishingDataService] Cache age: ${prevAge}`); + async getNextWebAddresses( + previous: PhishingDataMeta | null, + ): Promise | null> { + const prevMeta = previous ?? { timestamp: 0, checksum: "", applicationVersion: "" }; + const now = Date.now(); + // Updates to check const applicationVersion = await this.platformUtilsService.getApplicationVersion(); - - // If checksum matches, return existing data with new timestamp & version const remoteChecksum = await this.fetchPhishingChecksum(this.resourceType); - if (remoteChecksum && prev.checksum === remoteChecksum) { - this.logService.info( - `[PhishingDataService] Remote checksum matches local checksum, updating timestamp only.`, - ); - return { ...prev, timestamp, applicationVersion }; - } - // Checksum is different, data needs to be updated. - // Approach 1: Fetch only new web addresses and append - const isOneDayOldMax = prevAge <= this.UPDATE_INTERVAL_DURATION; - if (isOneDayOldMax && applicationVersion === prev.applicationVersion) { - const webAddressesTodayUrl = getPhishingResources(this.resourceType)!.todayUrl; - const dailyWebAddresses: string[] = - await this.fetchPhishingWebAddresses(webAddressesTodayUrl); - this.logService.info( - `[PhishingDataService] ${dailyWebAddresses.length} new phishing web addresses added`, - ); + // Logic checks + const appVersionChanged = applicationVersion !== prevMeta.applicationVersion; + const masterChecksumChanged = remoteChecksum !== prevMeta.checksum; + + // Check for full updated + if (masterChecksumChanged || appVersionChanged) { + this.logService.info("[PhishingDataService] Checksum or version changed; Fetching ALL."); + const remoteUrl = getPhishingResources(this.resourceType)!.remoteUrl; + const blob = await this.fetchAndCompress(remoteUrl); return { - webAddresses: prev.webAddresses.concat(dailyWebAddresses), - checksum: remoteChecksum, - timestamp, - applicationVersion, + blob, + meta: { checksum: remoteChecksum, timestamp: now, applicationVersion }, }; } - // Approach 2: Fetch all web addresses - const remoteUrl = getPhishingResources(this.resourceType)!.remoteUrl; - const remoteWebAddresses = await this.fetchPhishingWebAddresses(remoteUrl); - return { - webAddresses: remoteWebAddresses, - timestamp, - checksum: remoteChecksum, - applicationVersion, - }; + // Check for daily file + const isCacheExpired = now - prevMeta.timestamp > this.UPDATE_INTERVAL_DURATION; + + if (isCacheExpired) { + this.logService.info("[PhishingDataService] Daily cache expired; Fetching TODAY's"); + const url = getPhishingResources(this.resourceType)!.todayUrl; + const newLines = await this.fetchText(url); + const prevBlob = (await firstValueFrom(this._phishingBlobState.state$)) ?? ""; + const oldText = prevBlob ? await this._decompressString(prevBlob) : ""; + + // Join the new lines to the existing list + const combined = (oldText ? oldText + "\n" : "") + newLines.join("\n"); + + return { + blob: await this._compressString(combined), + meta: { + checksum: remoteChecksum, + timestamp: now, // Reset the timestamp + applicationVersion, + }, + }; + } + + return null; } private async fetchPhishingChecksum(type: PhishingResourceType = PhishingResourceType.Domains) { @@ -173,8 +200,24 @@ export class PhishingDataService { } return response.text(); } + private async fetchAndCompress(url: string): Promise { + const response = await this.apiService.nativeFetch(new Request(url)); + if (!response.ok) { + throw new Error("Fetch failed"); + } - private async fetchPhishingWebAddresses(url: string) { + const downloadStream = response.body!; + // Pipe through CompressionStream while it's downloading + const compressedStream = downloadStream.pipeThrough(new CompressionStream("gzip")); + // Convert to ArrayBuffer + const buffer = await new Response(compressedStream).arrayBuffer(); + const bytes = new Uint8Array(buffer); + + // Return as Base64 for storage + return (bytes as any).toBase64 ? (bytes as any).toBase64() : this._uint8ToBase64Fallback(bytes); + } + + private async fetchText(url: string) { const response = await this.apiService.nativeFetch(new Request(url)); if (!response.ok) { @@ -202,10 +245,9 @@ export class PhishingDataService { } // Runs the update flow in the background and retries up to 3 times on failure - private async _backgroundUpdate(prev: PhishingData | null): Promise { - this.logService.info(`[PhishingDataService] Update triggered...`); - const phishingData = prev ?? { - webAddresses: [], + private async _backgroundUpdate(previous: PhishingDataMeta | null): Promise { + this.logService.info(`[PhishingDataService] Update web addresses triggered...`); + const phishingMeta: PhishingDataMeta = previous ?? { timestamp: 0, checksum: "", applicationVersion: "", @@ -217,15 +259,22 @@ export class PhishingDataService { for (let attempt = 1; attempt <= maxAttempts; attempt++) { try { - const next = await this.getNextWebAddresses(phishingData); - if (next) { - await this._cachedState.update(() => next); - - // Performance logging - const elapsed = Date.now() - startTime; - this.logService.info(`[PhishingDataService] cache updated in ${elapsed}ms`); + const next = await this.getNextWebAddresses(phishingMeta); + if (!next) { + return; // No update needed } - return; + + if (next.meta) { + await this._phishingMetaState.update(() => next!.meta!); + } + if (next.blob) { + await this._phishingBlobState.update(() => next!.blob!); + await this._loadBlobToMemory(); + } + + // Performance logging + const elapsed = Date.now() - startTime; + this.logService.info(`[PhishingDataService] Phishing data cache updated in ${elapsed}ms`); } catch (err) { this.logService.error( `[PhishingDataService] Unable to update web addresses. Attempt ${attempt}.`, @@ -243,4 +292,87 @@ export class PhishingDataService { } } } + + // [FIXME] Move compression helpers to a shared utils library + // to separate from phishing data service. + // ------------------------- Blob and Compression Handling ------------------------- + private async _compressString(input: string): Promise { + try { + const stream = new Blob([input]).stream().pipeThrough(new CompressionStream("gzip")); + + const compressedBuffer = await new Response(stream).arrayBuffer(); + const bytes = new Uint8Array(compressedBuffer); + + // Modern browsers support direct toBase64 conversion + // For older support, use fallback + return (bytes as any).toBase64 + ? (bytes as any).toBase64() + : this._uint8ToBase64Fallback(bytes); + } catch (err) { + this.logService.error("[PhishingDataService] Compression failed", err); + return btoa(encodeURIComponent(input)); + } + } + + private async _decompressString(base64: string): Promise { + try { + // Modern browsers support direct toBase64 conversion + // For older support, use fallback + const bytes = (Uint8Array as any).fromBase64 + ? (Uint8Array as any).fromBase64(base64) + : this._base64ToUint8Fallback(base64); + if (bytes == null) { + throw new Error("Base64 decoding resulted in null"); + } + const byteResponse = new Response(bytes); + if (!byteResponse.body) { + throw new Error("Response body is null"); + } + const stream = byteResponse.body.pipeThrough(new DecompressionStream("gzip")); + const streamResponse = new Response(stream); + return await streamResponse.text(); + } catch (err) { + this.logService.error("[PhishingDataService] Decompression failed", err); + return decodeURIComponent(atob(base64)); + } + } + + // Try to load compressed newline blob into an in-memory Set for fast lookups + private async _loadBlobToMemory(): Promise { + this.logService.debug("[PhishingDataService] Loading data blob into memory..."); + try { + const blobBase64 = await firstValueFrom(this._phishingBlobState.state$); + if (!blobBase64) { + return; + } + + const text = await this._decompressString(blobBase64); + // Split and filter + const lines = text.split(/\r?\n/); + const newWebAddressesSet = new Set(lines); + + // Add test addresses + this._testWebAddresses.forEach((a) => newWebAddressesSet.add(a)); + this._webAddressesSet = new Set(newWebAddressesSet); + this.logService.info( + `[PhishingDataService] loaded ${this._webAddressesSet.size} addresses into memory from blob`, + ); + } catch (err) { + this.logService.error("[PhishingDataService] Failed to load blob into memory", err); + } + } + private _uint8ToBase64Fallback(bytes: Uint8Array): string { + const CHUNK_SIZE = 0x8000; // 32KB chunks + let binary = ""; + for (let i = 0; i < bytes.length; i += CHUNK_SIZE) { + const chunk = bytes.subarray(i, i + CHUNK_SIZE); + binary += String.fromCharCode.apply(null, chunk as any); + } + return btoa(binary); + } + + private _base64ToUint8Fallback(base64: string): Uint8Array { + const binary = atob(base64); + return Uint8Array.from(binary, (c) => c.charCodeAt(0)); + } } diff --git a/apps/browser/src/popup/services/services.module.ts b/apps/browser/src/popup/services/services.module.ts index c462e798a42..06a021085ea 100644 --- a/apps/browser/src/popup/services/services.module.ts +++ b/apps/browser/src/popup/services/services.module.ts @@ -537,6 +537,7 @@ const safeProviders: SafeProvider[] = [ AccountService, BillingAccountProfileStateService, ConfigService, + LogService, OrganizationService, PlatformUtilsService, StateProvider, diff --git a/libs/common/src/dirt/services/phishing-detection/phishing-detection-settings.service.spec.ts b/libs/common/src/dirt/services/phishing-detection/phishing-detection-settings.service.spec.ts index e6363b490cb..077d28f5954 100644 --- a/libs/common/src/dirt/services/phishing-detection/phishing-detection-settings.service.spec.ts +++ b/libs/common/src/dirt/services/phishing-detection/phishing-detection-settings.service.spec.ts @@ -8,6 +8,7 @@ import { BillingAccountProfileStateService } from "@bitwarden/common/billing/abs import { ProductTierType } from "@bitwarden/common/billing/enums"; import { ConfigService } from "@bitwarden/common/platform/abstractions/config/config.service"; import { PlatformUtilsService } from "@bitwarden/common/platform/abstractions/platform-utils.service"; +import { LogService } from "@bitwarden/logging"; import { FakeAccountService, FakeStateProvider, mockAccountServiceWith } from "../../../../spec"; import { UserId } from "../../../types/guid"; @@ -54,6 +55,8 @@ describe("PhishingDetectionSettingsService", () => { usePhishingBlocker: true, }); + const mockLogService = mock(); + const mockUserId = "mock-user-id" as UserId; const account = mock({ id: mockUserId }); const accountService: FakeAccountService = mockAccountServiceWith(mockUserId); @@ -85,6 +88,7 @@ describe("PhishingDetectionSettingsService", () => { mockAccountService, mockBillingService, mockConfigService, + mockLogService, mockOrganizationService, mockPlatformService, stateProvider, diff --git a/libs/common/src/dirt/services/phishing-detection/phishing-detection-settings.service.ts b/libs/common/src/dirt/services/phishing-detection/phishing-detection-settings.service.ts index e30592b2f68..91ae7c6227e 100644 --- a/libs/common/src/dirt/services/phishing-detection/phishing-detection-settings.service.ts +++ b/libs/common/src/dirt/services/phishing-detection/phishing-detection-settings.service.ts @@ -1,5 +1,5 @@ import { combineLatest, Observable, of, switchMap } from "rxjs"; -import { catchError, distinctUntilChanged, map, shareReplay } from "rxjs/operators"; +import { catchError, distinctUntilChanged, map, shareReplay, tap } from "rxjs/operators"; import { OrganizationService } from "@bitwarden/common/admin-console/abstractions/organization/organization.service.abstraction"; import { Organization } from "@bitwarden/common/admin-console/models/domain/organization"; @@ -9,6 +9,7 @@ import { ProductTierType } from "@bitwarden/common/billing/enums"; import { FeatureFlag } from "@bitwarden/common/enums/feature-flag.enum"; import { ConfigService } from "@bitwarden/common/platform/abstractions/config/config.service"; import { PlatformUtilsService } from "@bitwarden/common/platform/abstractions/platform-utils.service"; +import { LogService } from "@bitwarden/logging"; import { UserId } from "@bitwarden/user-core"; import { PHISHING_DETECTION_DISK, StateProvider, UserKeyDefinition } from "../../../platform/state"; @@ -32,27 +33,47 @@ export class PhishingDetectionSettingsService implements PhishingDetectionSettin private accountService: AccountService, private billingService: BillingAccountProfileStateService, private configService: ConfigService, + private logService: LogService, private organizationService: OrganizationService, private platformService: PlatformUtilsService, private stateProvider: StateProvider, ) { + this.logService.debug(`[PhishingDetectionSettingsService] Initializing service...`); this.available$ = this.buildAvailablePipeline$().pipe( distinctUntilChanged(), + tap((available) => + this.logService.debug( + `[PhishingDetectionSettingsService] Phishing detection available: ${available}`, + ), + ), shareReplay({ bufferSize: 1, refCount: true }), ); this.enabled$ = this.buildEnabledPipeline$().pipe( distinctUntilChanged(), + tap((enabled) => + this.logService.debug( + `[PhishingDetectionSettingsService] Phishing detection enabled: ${{ enabled }}`, + ), + ), shareReplay({ bufferSize: 1, refCount: true }), ); this.on$ = combineLatest([this.available$, this.enabled$]).pipe( map(([available, enabled]) => available && enabled), distinctUntilChanged(), - shareReplay({ bufferSize: 1, refCount: true }), + tap((on) => + this.logService.debug( + `[PhishingDetectionSettingsService] Phishing detection is on: ${{ on }}`, + ), + ), + shareReplay({ bufferSize: 1, refCount: false }), ); } async setEnabled(userId: UserId, enabled: boolean): Promise { + this.logService.debug( + `[PhishingDetectionSettingsService] Setting phishing detection enabled: ${{ enabled, userId }}`, + ); await this.stateProvider.getUser(userId, ENABLE_PHISHING_DETECTION).update(() => enabled); } @@ -64,6 +85,9 @@ export class PhishingDetectionSettingsService implements PhishingDetectionSettin private buildAvailablePipeline$(): Observable { // Phishing detection is unavailable on Safari due to platform limitations. if (this.platformService.isSafari()) { + this.logService.warning( + `[PhishingDetectionSettingsService] Phishing detection is unavailable on Safari due to platform limitations`, + ); return of(false); } @@ -97,6 +121,9 @@ export class PhishingDetectionSettingsService implements PhishingDetectionSettin if (!account) { return of(false); } + this.logService.debug( + `[PhishingDetectionSettingsService] Refreshing phishing detection enabled state`, + ); return this.stateProvider.getUserState$(ENABLE_PHISHING_DETECTION, account.id); }), map((enabled) => enabled ?? true),