diff --git a/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.spec.ts b/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.spec.ts index e3fbe74d9de..1fd1c7b4e05 100644 --- a/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.spec.ts +++ b/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.spec.ts @@ -9,7 +9,12 @@ import { import { FakeGlobalStateProvider } from "@bitwarden/common/spec"; import { LogService } from "@bitwarden/logging"; -import { PhishingDataService, PhishingData, PHISHING_DOMAINS_KEY } from "./phishing-data.service"; +import { + PhishingDataService, + PhishingData, + PhishingMetadata, + PHISHING_DATA_KEY, +} from "./phishing-data.service"; describe("PhishingDataService", () => { let service: PhishingDataService; @@ -19,9 +24,8 @@ describe("PhishingDataService", () => { let platformUtilsService: MockProxy; const stateProvider: FakeGlobalStateProvider = new FakeGlobalStateProvider(); - const setMockState = (state: PhishingData) => { - stateProvider.getFake(PHISHING_DOMAINS_KEY).stateSubject.next(state); - return state; + const setMockDataState = (state: PhishingData) => { + stateProvider.getFake(PHISHING_DATA_KEY).stateSubject.next(state); }; let fetchChecksumSpy: jest.SpyInstance; @@ -51,43 +55,28 @@ describe("PhishingDataService", () => { describe("isPhishingWebAddress", () => { it("should detect a phishing web address", async () => { - setMockState({ - webAddresses: ["phish.com", "badguy.net"], - timestamp: Date.now(), - checksum: "abc123", - applicationVersion: "1.0.0", - }); + setMockDataState({ webAddresses: ["phish.com", "badguy.net"] }); const url = new URL("http://phish.com"); const result = await service.isPhishingWebAddress(url); expect(result).toBe(true); }); it("should not detect a safe web address", async () => { - setMockState({ - webAddresses: ["phish.com", "badguy.net"], - timestamp: Date.now(), - checksum: "abc123", - applicationVersion: "1.0.0", - }); + setMockDataState({ webAddresses: ["phish.com", "badguy.net"] }); const url = new URL("http://safe.com"); const result = await service.isPhishingWebAddress(url); expect(result).toBe(false); }); it("should match against root web address", async () => { - setMockState({ - webAddresses: ["phish.com", "badguy.net"], - timestamp: Date.now(), - checksum: "abc123", - applicationVersion: "1.0.0", - }); + setMockDataState({ webAddresses: ["phish.com", "badguy.net"] }); const url = new URL("http://phish.com/about"); const result = await service.isPhishingWebAddress(url); expect(result).toBe(true); }); it("should not error on empty state", async () => { - setMockState(undefined as any); + setMockDataState(undefined as any); const url = new URL("http://phish.com/about"); const result = await service.isPhishingWebAddress(url); expect(result).toBe(false); @@ -96,8 +85,8 @@ describe("PhishingDataService", () => { describe("getNextWebAddresses", () => { it("refetches all web addresses if applicationVersion has changed", async () => { - const prev: PhishingData = { - webAddresses: ["a.com"], + const prevData: PhishingData = { webAddresses: ["a.com"] }; + const prevMetadata: PhishingMetadata = { timestamp: Date.now() - 60000, checksum: "old", applicationVersion: "1.0.0", @@ -106,51 +95,75 @@ describe("PhishingDataService", () => { fetchWebAddressesSpy.mockResolvedValue(["d.com", "e.com"]); platformUtilsService.getApplicationVersion.mockResolvedValue("2.0.0"); - const result = await service.getNextWebAddresses(prev); + const result = await service.getNextWebAddresses(prevData, prevMetadata); - expect(result!.webAddresses).toEqual(["d.com", "e.com"]); - expect(result!.checksum).toBe("new"); - expect(result!.applicationVersion).toBe("2.0.0"); + expect(result.webAddresses).toEqual(["d.com", "e.com"]); + expect(result.metadata.checksum).toBe("new"); + expect(result.metadata.applicationVersion).toBe("2.0.0"); }); - it("returns null if checksum matches to skip state update", async () => { - const prev: PhishingData = { - webAddresses: ["a.com"], + it("keeps current checksum if remote checksum is empty", async () => { + const prevData: PhishingData = { webAddresses: ["a.com"] }; + const prevMetadata: PhishingMetadata = { + timestamp: Date.now() - 60000, + checksum: "existing-checksum", + applicationVersion: "1.0.0", + }; + fetchChecksumSpy.mockResolvedValue(""); + + const result = await service.getNextWebAddresses(prevData, prevMetadata); + + expect(result.webAddresses).toBeUndefined(); + expect(result.metadata.checksum).toBe("existing-checksum"); + expect(result.metadata.timestamp).not.toBe(prevMetadata.timestamp); + }); + + it("only returns metadata if checksum matches (no webAddresses)", async () => { + const prevData: PhishingData = { webAddresses: ["a.com"] }; + const prevMetadata: PhishingMetadata = { timestamp: Date.now() - 60000, checksum: "abc", applicationVersion: "1.0.0", }; fetchChecksumSpy.mockResolvedValue("abc"); - const result = await service.getNextWebAddresses(prev); - expect(result).toBeNull(); + + const result = await service.getNextWebAddresses(prevData, prevMetadata); + + expect(result.webAddresses).toBeUndefined(); + expect(result.metadata.checksum).toBe("abc"); + expect(result.metadata.timestamp).not.toBe(prevMetadata.timestamp); }); it("patches daily domains if cache is fresh", async () => { - const prev: PhishingData = { - webAddresses: ["a.com"], + const prevData: PhishingData = { webAddresses: ["a.com"] }; + const prevMetadata: PhishingMetadata = { timestamp: Date.now() - 60000, checksum: "old", applicationVersion: "1.0.0", }; fetchChecksumSpy.mockResolvedValue("new"); fetchWebAddressesSpy.mockResolvedValue(["b.com", "c.com"]); - const result = await service.getNextWebAddresses(prev); - expect(result!.webAddresses).toEqual(["a.com", "b.com", "c.com"]); - expect(result!.checksum).toBe("new"); + + const result = await service.getNextWebAddresses(prevData, prevMetadata); + + expect(result.webAddresses).toEqual(["a.com", "b.com", "c.com"]); + expect(result.metadata.checksum).toBe("new"); }); it("fetches all domains if cache is old", async () => { - const prev: PhishingData = { - webAddresses: ["a.com"], + const prevData: PhishingData = { webAddresses: ["a.com"] }; + const prevMetadata: PhishingMetadata = { timestamp: Date.now() - 2 * 24 * 60 * 60 * 1000, checksum: "old", applicationVersion: "1.0.0", }; fetchChecksumSpy.mockResolvedValue("new"); fetchWebAddressesSpy.mockResolvedValue(["d.com", "e.com"]); - const result = await service.getNextWebAddresses(prev); - expect(result!.webAddresses).toEqual(["d.com", "e.com"]); - expect(result!.checksum).toBe("new"); + + const result = await service.getNextWebAddresses(prevData, prevMetadata); + + expect(result.webAddresses).toEqual(["d.com", "e.com"]); + expect(result.metadata.checksum).toBe("new"); }); }); }); diff --git a/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.ts b/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.ts index a3058ce3d3a..ecbb1d0b73c 100644 --- a/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.ts +++ b/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.ts @@ -1,5 +1,6 @@ import { catchError, + combineLatest, EMPTY, first, firstValueFrom, @@ -20,11 +21,15 @@ import { GlobalStateProvider, KeyDefinition, PHISHING_DETECTION_DISK } from "@bi import { getPhishingResources, PhishingResourceType } from "../phishing-resources"; +/** Stores the actual phishing web addresses (expensive to update - triggers Set rebuild) */ export type PhishingData = { webAddresses: string[]; +}; + +/** Stores metadata about the cache (cheap to update - no Set rebuild) */ +export type PhishingMetadata = { timestamp: number; checksum: string; - /** * We store the application version to refetch the entire dataset on a new client release. * This counteracts daily appends updates not removing inactive or false positive web addresses. @@ -32,20 +37,33 @@ export type PhishingData = { applicationVersion: string; }; -export const PHISHING_DOMAINS_KEY = new KeyDefinition( +export const PHISHING_DATA_KEY = new KeyDefinition( PHISHING_DETECTION_DISK, - "phishingDomains", + "phishingData", { - deserializer: (value: PhishingData) => - value ?? { webAddresses: [], timestamp: 0, checksum: "", applicationVersion: "" }, + deserializer: (value: PhishingData) => value ?? { webAddresses: [] }, + }, +); + +export const PHISHING_METADATA_KEY = new KeyDefinition( + PHISHING_DETECTION_DISK, + "phishingMetadata", + { + deserializer: (value: PhishingMetadata) => + value ?? { timestamp: 0, checksum: "", applicationVersion: "" }, }, ); /** Coordinates fetching, caching, and patching of known phishing web addresses */ export class PhishingDataService { private _testWebAddresses = this.getTestWebAddresses(); - private _cachedState = this.globalStateProvider.get(PHISHING_DOMAINS_KEY); - private _webAddresses$ = this._cachedState.state$.pipe( + + // Split state: data (expensive) and metadata (cheap) + private _dataState = this.globalStateProvider.get(PHISHING_DATA_KEY); + private _metadataState = this.globalStateProvider.get(PHISHING_METADATA_KEY); + + // Only subscribes to data state - metadata updates won't trigger Set rebuild + private _webAddresses$ = this._dataState.state$.pipe( map( (state) => new Set( @@ -64,10 +82,10 @@ export class PhishingDataService { update$ = this._triggerUpdate$.pipe( startWith(undefined), // Always emit once switchMap(() => - this._cachedState.state$.pipe( + combineLatest([this._dataState.state$, this._metadataState.state$]).pipe( first(), // Only take the first value to avoid an infinite loop when updating the cache below - tap((cachedState) => { - void this._backgroundUpdate(cachedState); + tap(([data, metadata]) => { + void this._backgroundUpdate(data, metadata); }), catchError((err: unknown) => { this.logService.error("[PhishingDataService] Background update failed to start.", err); @@ -119,25 +137,41 @@ export class PhishingDataService { return entries.has(url.hostname); } - async getNextWebAddresses(prev: PhishingData | null): Promise { - prev = prev ?? { webAddresses: [], timestamp: 0, checksum: "", applicationVersion: "" }; + async getNextWebAddresses( + prevData: PhishingData | null, + prevMetadata: PhishingMetadata | null, + ): Promise<{ metadata: PhishingMetadata; webAddresses?: string[] }> { + const data = prevData ?? { webAddresses: [] }; + const metadata = prevMetadata ?? { timestamp: 0, checksum: "", applicationVersion: "" }; + const timestamp = Date.now(); - const prevAge = timestamp - prev.timestamp; + const prevAge = timestamp - metadata.timestamp; this.logService.info(`[PhishingDataService] Cache age: ${prevAge}`); const applicationVersion = await this.platformUtilsService.getApplicationVersion(); - - // If checksum matches, skip state write to avoid expensive Set rebuild const remoteChecksum = await this.fetchPhishingChecksum(this.resourceType); - if (remoteChecksum && prev.checksum === remoteChecksum) { - this.logService.info(`[PhishingDataService] Checksum match, skipping update.`); - return null; + + // If checksum fetch failed/empty, keep current checksum and skip data update + if (!remoteChecksum) { + this.logService.info(`[PhishingDataService] Checksum fetch returned empty, skipping update.`); + return { + metadata: { timestamp, checksum: metadata.checksum, applicationVersion }, + }; } + + // If checksum matches, only update metadata (no data change needed) + if (metadata.checksum === remoteChecksum) { + this.logService.info(`[PhishingDataService] Checksum match, updating metadata only.`); + return { + metadata: { timestamp, checksum: remoteChecksum, applicationVersion }, + }; + } + // Checksum is different, data needs to be updated. // Approach 1: Fetch only new web addresses and append const isOneDayOldMax = prevAge <= this.UPDATE_INTERVAL_DURATION; - if (isOneDayOldMax && applicationVersion === prev.applicationVersion) { + if (isOneDayOldMax && applicationVersion === metadata.applicationVersion) { const webAddressesTodayUrl = getPhishingResources(this.resourceType)!.todayUrl; const dailyWebAddresses: string[] = await this.fetchPhishingWebAddresses(webAddressesTodayUrl); @@ -145,10 +179,8 @@ export class PhishingDataService { `[PhishingDataService] ${dailyWebAddresses.length} new phishing web addresses added`, ); return { - webAddresses: prev.webAddresses.concat(dailyWebAddresses), - checksum: remoteChecksum, - timestamp, - applicationVersion, + metadata: { timestamp, checksum: remoteChecksum, applicationVersion }, + webAddresses: data.webAddresses.concat(dailyWebAddresses), }; } @@ -156,10 +188,8 @@ export class PhishingDataService { const remoteUrl = getPhishingResources(this.resourceType)!.remoteUrl; const remoteWebAddresses = await this.fetchPhishingWebAddresses(remoteUrl); return { + metadata: { timestamp, checksum: remoteChecksum, applicationVersion }, webAddresses: remoteWebAddresses, - timestamp, - checksum: remoteChecksum, - applicationVersion, }; } @@ -200,14 +230,11 @@ export class PhishingDataService { } // Runs the update flow in the background and retries up to 3 times on failure - private async _backgroundUpdate(prev: PhishingData | null): Promise { + private async _backgroundUpdate( + prevData: PhishingData | null, + prevMetadata: PhishingMetadata | null, + ): Promise { this.logService.info(`[PhishingDataService] Update triggered...`); - const phishingData = prev ?? { - webAddresses: [], - timestamp: 0, - checksum: "", - applicationVersion: "", - }; // Start time for logging performance of update const startTime = Date.now(); const maxAttempts = 3; @@ -215,9 +242,14 @@ export class PhishingDataService { for (let attempt = 1; attempt <= maxAttempts; attempt++) { try { - const next = await this.getNextWebAddresses(phishingData); - if (next) { - await this._cachedState.update(() => next); + const result = await this.getNextWebAddresses(prevData, prevMetadata); + + // Always update metadata (cheap - no Set rebuild) + await this._metadataState.update(() => result.metadata); + + // Only update data if webAddresses changed (expensive - triggers Set rebuild) + if (result.webAddresses) { + await this._dataState.update(() => ({ webAddresses: result.webAddresses! })); // Performance logging const elapsed = Date.now() - startTime;