mirror of
https://github.com/bitwarden/browser
synced 2026-01-30 16:23:53 +00:00
Split phishing cache into data and metadata states
Separates webAddresses from metadata to allow cheap timestamp/checksum updates without triggering expensive Set rebuilds.
This commit is contained in:
@@ -9,7 +9,12 @@ import {
|
||||
import { FakeGlobalStateProvider } from "@bitwarden/common/spec";
|
||||
import { LogService } from "@bitwarden/logging";
|
||||
|
||||
import { PhishingDataService, PhishingData, PHISHING_DOMAINS_KEY } from "./phishing-data.service";
|
||||
import {
|
||||
PhishingDataService,
|
||||
PhishingData,
|
||||
PhishingMetadata,
|
||||
PHISHING_DATA_KEY,
|
||||
} from "./phishing-data.service";
|
||||
|
||||
describe("PhishingDataService", () => {
|
||||
let service: PhishingDataService;
|
||||
@@ -19,9 +24,8 @@ describe("PhishingDataService", () => {
|
||||
let platformUtilsService: MockProxy<PlatformUtilsService>;
|
||||
const stateProvider: FakeGlobalStateProvider = new FakeGlobalStateProvider();
|
||||
|
||||
const setMockState = (state: PhishingData) => {
|
||||
stateProvider.getFake(PHISHING_DOMAINS_KEY).stateSubject.next(state);
|
||||
return state;
|
||||
const setMockDataState = (state: PhishingData) => {
|
||||
stateProvider.getFake(PHISHING_DATA_KEY).stateSubject.next(state);
|
||||
};
|
||||
|
||||
let fetchChecksumSpy: jest.SpyInstance;
|
||||
@@ -51,43 +55,28 @@ describe("PhishingDataService", () => {
|
||||
|
||||
describe("isPhishingWebAddress", () => {
|
||||
it("should detect a phishing web address", async () => {
|
||||
setMockState({
|
||||
webAddresses: ["phish.com", "badguy.net"],
|
||||
timestamp: Date.now(),
|
||||
checksum: "abc123",
|
||||
applicationVersion: "1.0.0",
|
||||
});
|
||||
setMockDataState({ webAddresses: ["phish.com", "badguy.net"] });
|
||||
const url = new URL("http://phish.com");
|
||||
const result = await service.isPhishingWebAddress(url);
|
||||
expect(result).toBe(true);
|
||||
});
|
||||
|
||||
it("should not detect a safe web address", async () => {
|
||||
setMockState({
|
||||
webAddresses: ["phish.com", "badguy.net"],
|
||||
timestamp: Date.now(),
|
||||
checksum: "abc123",
|
||||
applicationVersion: "1.0.0",
|
||||
});
|
||||
setMockDataState({ webAddresses: ["phish.com", "badguy.net"] });
|
||||
const url = new URL("http://safe.com");
|
||||
const result = await service.isPhishingWebAddress(url);
|
||||
expect(result).toBe(false);
|
||||
});
|
||||
|
||||
it("should match against root web address", async () => {
|
||||
setMockState({
|
||||
webAddresses: ["phish.com", "badguy.net"],
|
||||
timestamp: Date.now(),
|
||||
checksum: "abc123",
|
||||
applicationVersion: "1.0.0",
|
||||
});
|
||||
setMockDataState({ webAddresses: ["phish.com", "badguy.net"] });
|
||||
const url = new URL("http://phish.com/about");
|
||||
const result = await service.isPhishingWebAddress(url);
|
||||
expect(result).toBe(true);
|
||||
});
|
||||
|
||||
it("should not error on empty state", async () => {
|
||||
setMockState(undefined as any);
|
||||
setMockDataState(undefined as any);
|
||||
const url = new URL("http://phish.com/about");
|
||||
const result = await service.isPhishingWebAddress(url);
|
||||
expect(result).toBe(false);
|
||||
@@ -96,8 +85,8 @@ describe("PhishingDataService", () => {
|
||||
|
||||
describe("getNextWebAddresses", () => {
|
||||
it("refetches all web addresses if applicationVersion has changed", async () => {
|
||||
const prev: PhishingData = {
|
||||
webAddresses: ["a.com"],
|
||||
const prevData: PhishingData = { webAddresses: ["a.com"] };
|
||||
const prevMetadata: PhishingMetadata = {
|
||||
timestamp: Date.now() - 60000,
|
||||
checksum: "old",
|
||||
applicationVersion: "1.0.0",
|
||||
@@ -106,51 +95,75 @@ describe("PhishingDataService", () => {
|
||||
fetchWebAddressesSpy.mockResolvedValue(["d.com", "e.com"]);
|
||||
platformUtilsService.getApplicationVersion.mockResolvedValue("2.0.0");
|
||||
|
||||
const result = await service.getNextWebAddresses(prev);
|
||||
const result = await service.getNextWebAddresses(prevData, prevMetadata);
|
||||
|
||||
expect(result!.webAddresses).toEqual(["d.com", "e.com"]);
|
||||
expect(result!.checksum).toBe("new");
|
||||
expect(result!.applicationVersion).toBe("2.0.0");
|
||||
expect(result.webAddresses).toEqual(["d.com", "e.com"]);
|
||||
expect(result.metadata.checksum).toBe("new");
|
||||
expect(result.metadata.applicationVersion).toBe("2.0.0");
|
||||
});
|
||||
|
||||
it("returns null if checksum matches to skip state update", async () => {
|
||||
const prev: PhishingData = {
|
||||
webAddresses: ["a.com"],
|
||||
it("keeps current checksum if remote checksum is empty", async () => {
|
||||
const prevData: PhishingData = { webAddresses: ["a.com"] };
|
||||
const prevMetadata: PhishingMetadata = {
|
||||
timestamp: Date.now() - 60000,
|
||||
checksum: "existing-checksum",
|
||||
applicationVersion: "1.0.0",
|
||||
};
|
||||
fetchChecksumSpy.mockResolvedValue("");
|
||||
|
||||
const result = await service.getNextWebAddresses(prevData, prevMetadata);
|
||||
|
||||
expect(result.webAddresses).toBeUndefined();
|
||||
expect(result.metadata.checksum).toBe("existing-checksum");
|
||||
expect(result.metadata.timestamp).not.toBe(prevMetadata.timestamp);
|
||||
});
|
||||
|
||||
it("only returns metadata if checksum matches (no webAddresses)", async () => {
|
||||
const prevData: PhishingData = { webAddresses: ["a.com"] };
|
||||
const prevMetadata: PhishingMetadata = {
|
||||
timestamp: Date.now() - 60000,
|
||||
checksum: "abc",
|
||||
applicationVersion: "1.0.0",
|
||||
};
|
||||
fetchChecksumSpy.mockResolvedValue("abc");
|
||||
const result = await service.getNextWebAddresses(prev);
|
||||
expect(result).toBeNull();
|
||||
|
||||
const result = await service.getNextWebAddresses(prevData, prevMetadata);
|
||||
|
||||
expect(result.webAddresses).toBeUndefined();
|
||||
expect(result.metadata.checksum).toBe("abc");
|
||||
expect(result.metadata.timestamp).not.toBe(prevMetadata.timestamp);
|
||||
});
|
||||
|
||||
it("patches daily domains if cache is fresh", async () => {
|
||||
const prev: PhishingData = {
|
||||
webAddresses: ["a.com"],
|
||||
const prevData: PhishingData = { webAddresses: ["a.com"] };
|
||||
const prevMetadata: PhishingMetadata = {
|
||||
timestamp: Date.now() - 60000,
|
||||
checksum: "old",
|
||||
applicationVersion: "1.0.0",
|
||||
};
|
||||
fetchChecksumSpy.mockResolvedValue("new");
|
||||
fetchWebAddressesSpy.mockResolvedValue(["b.com", "c.com"]);
|
||||
const result = await service.getNextWebAddresses(prev);
|
||||
expect(result!.webAddresses).toEqual(["a.com", "b.com", "c.com"]);
|
||||
expect(result!.checksum).toBe("new");
|
||||
|
||||
const result = await service.getNextWebAddresses(prevData, prevMetadata);
|
||||
|
||||
expect(result.webAddresses).toEqual(["a.com", "b.com", "c.com"]);
|
||||
expect(result.metadata.checksum).toBe("new");
|
||||
});
|
||||
|
||||
it("fetches all domains if cache is old", async () => {
|
||||
const prev: PhishingData = {
|
||||
webAddresses: ["a.com"],
|
||||
const prevData: PhishingData = { webAddresses: ["a.com"] };
|
||||
const prevMetadata: PhishingMetadata = {
|
||||
timestamp: Date.now() - 2 * 24 * 60 * 60 * 1000,
|
||||
checksum: "old",
|
||||
applicationVersion: "1.0.0",
|
||||
};
|
||||
fetchChecksumSpy.mockResolvedValue("new");
|
||||
fetchWebAddressesSpy.mockResolvedValue(["d.com", "e.com"]);
|
||||
const result = await service.getNextWebAddresses(prev);
|
||||
expect(result!.webAddresses).toEqual(["d.com", "e.com"]);
|
||||
expect(result!.checksum).toBe("new");
|
||||
|
||||
const result = await service.getNextWebAddresses(prevData, prevMetadata);
|
||||
|
||||
expect(result.webAddresses).toEqual(["d.com", "e.com"]);
|
||||
expect(result.metadata.checksum).toBe("new");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import {
|
||||
catchError,
|
||||
combineLatest,
|
||||
EMPTY,
|
||||
first,
|
||||
firstValueFrom,
|
||||
@@ -20,11 +21,15 @@ import { GlobalStateProvider, KeyDefinition, PHISHING_DETECTION_DISK } from "@bi
|
||||
|
||||
import { getPhishingResources, PhishingResourceType } from "../phishing-resources";
|
||||
|
||||
/** Stores the actual phishing web addresses (expensive to update - triggers Set rebuild) */
|
||||
export type PhishingData = {
|
||||
webAddresses: string[];
|
||||
};
|
||||
|
||||
/** Stores metadata about the cache (cheap to update - no Set rebuild) */
|
||||
export type PhishingMetadata = {
|
||||
timestamp: number;
|
||||
checksum: string;
|
||||
|
||||
/**
|
||||
* We store the application version to refetch the entire dataset on a new client release.
|
||||
* This counteracts daily appends updates not removing inactive or false positive web addresses.
|
||||
@@ -32,20 +37,33 @@ export type PhishingData = {
|
||||
applicationVersion: string;
|
||||
};
|
||||
|
||||
export const PHISHING_DOMAINS_KEY = new KeyDefinition<PhishingData>(
|
||||
export const PHISHING_DATA_KEY = new KeyDefinition<PhishingData>(
|
||||
PHISHING_DETECTION_DISK,
|
||||
"phishingDomains",
|
||||
"phishingData",
|
||||
{
|
||||
deserializer: (value: PhishingData) =>
|
||||
value ?? { webAddresses: [], timestamp: 0, checksum: "", applicationVersion: "" },
|
||||
deserializer: (value: PhishingData) => value ?? { webAddresses: [] },
|
||||
},
|
||||
);
|
||||
|
||||
export const PHISHING_METADATA_KEY = new KeyDefinition<PhishingMetadata>(
|
||||
PHISHING_DETECTION_DISK,
|
||||
"phishingMetadata",
|
||||
{
|
||||
deserializer: (value: PhishingMetadata) =>
|
||||
value ?? { timestamp: 0, checksum: "", applicationVersion: "" },
|
||||
},
|
||||
);
|
||||
|
||||
/** Coordinates fetching, caching, and patching of known phishing web addresses */
|
||||
export class PhishingDataService {
|
||||
private _testWebAddresses = this.getTestWebAddresses();
|
||||
private _cachedState = this.globalStateProvider.get(PHISHING_DOMAINS_KEY);
|
||||
private _webAddresses$ = this._cachedState.state$.pipe(
|
||||
|
||||
// Split state: data (expensive) and metadata (cheap)
|
||||
private _dataState = this.globalStateProvider.get(PHISHING_DATA_KEY);
|
||||
private _metadataState = this.globalStateProvider.get(PHISHING_METADATA_KEY);
|
||||
|
||||
// Only subscribes to data state - metadata updates won't trigger Set rebuild
|
||||
private _webAddresses$ = this._dataState.state$.pipe(
|
||||
map(
|
||||
(state) =>
|
||||
new Set(
|
||||
@@ -64,10 +82,10 @@ export class PhishingDataService {
|
||||
update$ = this._triggerUpdate$.pipe(
|
||||
startWith(undefined), // Always emit once
|
||||
switchMap(() =>
|
||||
this._cachedState.state$.pipe(
|
||||
combineLatest([this._dataState.state$, this._metadataState.state$]).pipe(
|
||||
first(), // Only take the first value to avoid an infinite loop when updating the cache below
|
||||
tap((cachedState) => {
|
||||
void this._backgroundUpdate(cachedState);
|
||||
tap(([data, metadata]) => {
|
||||
void this._backgroundUpdate(data, metadata);
|
||||
}),
|
||||
catchError((err: unknown) => {
|
||||
this.logService.error("[PhishingDataService] Background update failed to start.", err);
|
||||
@@ -119,25 +137,41 @@ export class PhishingDataService {
|
||||
return entries.has(url.hostname);
|
||||
}
|
||||
|
||||
async getNextWebAddresses(prev: PhishingData | null): Promise<PhishingData | null> {
|
||||
prev = prev ?? { webAddresses: [], timestamp: 0, checksum: "", applicationVersion: "" };
|
||||
async getNextWebAddresses(
|
||||
prevData: PhishingData | null,
|
||||
prevMetadata: PhishingMetadata | null,
|
||||
): Promise<{ metadata: PhishingMetadata; webAddresses?: string[] }> {
|
||||
const data = prevData ?? { webAddresses: [] };
|
||||
const metadata = prevMetadata ?? { timestamp: 0, checksum: "", applicationVersion: "" };
|
||||
|
||||
const timestamp = Date.now();
|
||||
const prevAge = timestamp - prev.timestamp;
|
||||
const prevAge = timestamp - metadata.timestamp;
|
||||
this.logService.info(`[PhishingDataService] Cache age: ${prevAge}`);
|
||||
|
||||
const applicationVersion = await this.platformUtilsService.getApplicationVersion();
|
||||
|
||||
// If checksum matches, skip state write to avoid expensive Set rebuild
|
||||
const remoteChecksum = await this.fetchPhishingChecksum(this.resourceType);
|
||||
if (remoteChecksum && prev.checksum === remoteChecksum) {
|
||||
this.logService.info(`[PhishingDataService] Checksum match, skipping update.`);
|
||||
return null;
|
||||
|
||||
// If checksum fetch failed/empty, keep current checksum and skip data update
|
||||
if (!remoteChecksum) {
|
||||
this.logService.info(`[PhishingDataService] Checksum fetch returned empty, skipping update.`);
|
||||
return {
|
||||
metadata: { timestamp, checksum: metadata.checksum, applicationVersion },
|
||||
};
|
||||
}
|
||||
|
||||
// If checksum matches, only update metadata (no data change needed)
|
||||
if (metadata.checksum === remoteChecksum) {
|
||||
this.logService.info(`[PhishingDataService] Checksum match, updating metadata only.`);
|
||||
return {
|
||||
metadata: { timestamp, checksum: remoteChecksum, applicationVersion },
|
||||
};
|
||||
}
|
||||
|
||||
// Checksum is different, data needs to be updated.
|
||||
|
||||
// Approach 1: Fetch only new web addresses and append
|
||||
const isOneDayOldMax = prevAge <= this.UPDATE_INTERVAL_DURATION;
|
||||
if (isOneDayOldMax && applicationVersion === prev.applicationVersion) {
|
||||
if (isOneDayOldMax && applicationVersion === metadata.applicationVersion) {
|
||||
const webAddressesTodayUrl = getPhishingResources(this.resourceType)!.todayUrl;
|
||||
const dailyWebAddresses: string[] =
|
||||
await this.fetchPhishingWebAddresses(webAddressesTodayUrl);
|
||||
@@ -145,10 +179,8 @@ export class PhishingDataService {
|
||||
`[PhishingDataService] ${dailyWebAddresses.length} new phishing web addresses added`,
|
||||
);
|
||||
return {
|
||||
webAddresses: prev.webAddresses.concat(dailyWebAddresses),
|
||||
checksum: remoteChecksum,
|
||||
timestamp,
|
||||
applicationVersion,
|
||||
metadata: { timestamp, checksum: remoteChecksum, applicationVersion },
|
||||
webAddresses: data.webAddresses.concat(dailyWebAddresses),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -156,10 +188,8 @@ export class PhishingDataService {
|
||||
const remoteUrl = getPhishingResources(this.resourceType)!.remoteUrl;
|
||||
const remoteWebAddresses = await this.fetchPhishingWebAddresses(remoteUrl);
|
||||
return {
|
||||
metadata: { timestamp, checksum: remoteChecksum, applicationVersion },
|
||||
webAddresses: remoteWebAddresses,
|
||||
timestamp,
|
||||
checksum: remoteChecksum,
|
||||
applicationVersion,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -200,14 +230,11 @@ export class PhishingDataService {
|
||||
}
|
||||
|
||||
// Runs the update flow in the background and retries up to 3 times on failure
|
||||
private async _backgroundUpdate(prev: PhishingData | null): Promise<void> {
|
||||
private async _backgroundUpdate(
|
||||
prevData: PhishingData | null,
|
||||
prevMetadata: PhishingMetadata | null,
|
||||
): Promise<void> {
|
||||
this.logService.info(`[PhishingDataService] Update triggered...`);
|
||||
const phishingData = prev ?? {
|
||||
webAddresses: [],
|
||||
timestamp: 0,
|
||||
checksum: "",
|
||||
applicationVersion: "",
|
||||
};
|
||||
// Start time for logging performance of update
|
||||
const startTime = Date.now();
|
||||
const maxAttempts = 3;
|
||||
@@ -215,9 +242,14 @@ export class PhishingDataService {
|
||||
|
||||
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
||||
try {
|
||||
const next = await this.getNextWebAddresses(phishingData);
|
||||
if (next) {
|
||||
await this._cachedState.update(() => next);
|
||||
const result = await this.getNextWebAddresses(prevData, prevMetadata);
|
||||
|
||||
// Always update metadata (cheap - no Set rebuild)
|
||||
await this._metadataState.update(() => result.metadata);
|
||||
|
||||
// Only update data if webAddresses changed (expensive - triggers Set rebuild)
|
||||
if (result.webAddresses) {
|
||||
await this._dataState.update(() => ({ webAddresses: result.webAddresses! }));
|
||||
|
||||
// Performance logging
|
||||
const elapsed = Date.now() - startTime;
|
||||
|
||||
Reference in New Issue
Block a user