1
0
mirror of https://github.com/bitwarden/browser synced 2026-02-06 11:43:51 +00:00

[PM-28548] Phishing Blocker support links (#18070)

* Change domain terminology to web addresses

* Added phishing resource file

* Finish renaming and adding runtime configuration for domains vs links setting

* Update reference

* Add matching functions per resource

* correct URL matching logic for links-based detection

Problem:
The phishing link matcher was failing to detect known phishing URLs due to
two issues:

1. Protocol mismatch: Entries in the phishing list use `http://` but users
   typically visit `https://` versions. The matcher was comparing full URLs
   including protocol, causing legitimate matches to fail.
   - List entry: `http://smartdapptradxx.pages.dev`
   - User visits: `https://smartdapptradxx.pages.dev/`
   - Result: No match (incorrect)

2. Hostname-only matching would have caused false positives: An earlier
   attempt to fix #1 included hostname-only comparison, which defeats the
   purpose of links-based detection. The goal of PM-28548 is precise URL
   matching to avoid blocking entire domains (like pages.dev, github.io)
   when only specific paths are malicious.

Solution:
- Always strip protocol (http:// or https://) from both entry and URL
  before comparison, treating them as equivalent
- Remove hostname-only matching to maintain precision
- Keep prefix matching for subpaths, query strings, and fragments

---------

Co-authored-by: Alex <adewitt@bitwarden.com>
(cherry picked from commit 800a21d8a3)
This commit is contained in:
Leslie Tilton
2025-12-30 11:06:30 -06:00
committed by Alex
parent 3d6448ba68
commit ad4b01f315
4 changed files with 186 additions and 83 deletions

View File

@@ -0,0 +1,98 @@
export type PhishingResource = {
name?: string;
remoteUrl: string;
checksumUrl: string;
todayUrl: string;
/** Matcher used to decide whether a given URL matches an entry from this resource */
match: (url: URL, entry: string) => boolean;
};
export const PhishingResourceType = Object.freeze({
Domains: "domains",
Links: "links",
} as const);
export type PhishingResourceType = (typeof PhishingResourceType)[keyof typeof PhishingResourceType];
export const PHISHING_RESOURCES: Record<PhishingResourceType, PhishingResource[]> = {
[PhishingResourceType.Domains]: [
{
name: "Phishing.Database Domains",
remoteUrl:
"https://raw.githubusercontent.com/Phishing-Database/Phishing.Database/master/phishing-domains-ACTIVE.txt",
checksumUrl:
"https://raw.githubusercontent.com/Phishing-Database/checksums/refs/heads/master/phishing-domains-ACTIVE.txt.md5",
todayUrl:
"https://raw.githubusercontent.com/Phishing-Database/Phishing.Database/refs/heads/master/phishing-domains-NEW-today.txt",
match: (url: URL, entry: string) => {
if (!entry) {
return false;
}
const candidate = entry.trim().toLowerCase().replace(/\/$/, "");
// If entry contains a scheme, strip it for comparison
const e = candidate.replace(/^https?:\/\//, "");
// Compare against hostname or host+path
if (e === url.hostname.toLowerCase()) {
return true;
}
const urlNoProto = url.href
.toLowerCase()
.replace(/https?:\/\//, "")
.replace(/\/$/, "");
return urlNoProto === e || urlNoProto.startsWith(e + "/");
},
},
],
[PhishingResourceType.Links]: [
{
name: "Phishing.Database Links",
remoteUrl:
"https://raw.githubusercontent.com/Phishing-Database/Phishing.Database/master/phishing-links-ACTIVE.txt",
checksumUrl:
"https://raw.githubusercontent.com/Phishing-Database/checksums/refs/heads/master/phishing-links-ACTIVE.txt.md5",
todayUrl:
"https://raw.githubusercontent.com/Phishing-Database/Phishing.Database/refs/heads/master/phishing-links-NEW-today.txt",
match: (url: URL, entry: string) => {
if (!entry) {
return false;
}
// Basic HTML entity decode for common cases (the lists sometimes contain &amp;)
const decodeHtml = (s: string) => s.replace(/&amp;/g, "&");
const normalizedEntry = decodeHtml(entry.trim()).toLowerCase().replace(/\/$/, "");
// Normalize URL for comparison - always strip protocol for consistent matching
const normalizedUrl = decodeHtml(url.href).toLowerCase().replace(/\/$/, "");
const urlNoProto = normalizedUrl.replace(/^https?:\/\//, "");
// Strip protocol from entry if present (http:// and https:// should be treated as equivalent)
const entryNoProto = normalizedEntry.replace(/^https?:\/\//, "");
// Compare full path (without protocol) - exact match
if (urlNoProto === entryNoProto) {
return true;
}
// Check if URL starts with entry (prefix match for subpaths/query/hash)
// e.g., entry "site.com/phish" matches "site.com/phish/subpage" or "site.com/phish?id=1"
if (
urlNoProto.startsWith(entryNoProto + "/") ||
urlNoProto.startsWith(entryNoProto + "?") ||
urlNoProto.startsWith(entryNoProto + "#")
) {
return true;
}
return false;
},
},
],
};
export function getPhishingResources(
type: PhishingResourceType,
index = 0,
): PhishingResource | undefined {
const list = PHISHING_RESOURCES[type] ?? [];
return list[index];
}

View File

@@ -25,7 +25,7 @@ describe("PhishingDataService", () => {
};
let fetchChecksumSpy: jest.SpyInstance;
let fetchDomainsSpy: jest.SpyInstance;
let fetchWebAddressesSpy: jest.SpyInstance;
beforeEach(() => {
jest.useFakeTimers();
@@ -45,113 +45,113 @@ describe("PhishingDataService", () => {
platformUtilsService,
);
fetchChecksumSpy = jest.spyOn(service as any, "fetchPhishingDomainsChecksum");
fetchDomainsSpy = jest.spyOn(service as any, "fetchPhishingDomains");
fetchChecksumSpy = jest.spyOn(service as any, "fetchPhishingChecksum");
fetchWebAddressesSpy = jest.spyOn(service as any, "fetchPhishingWebAddresses");
});
describe("isPhishingDomains", () => {
it("should detect a phishing domain", async () => {
describe("isPhishingWebAddress", () => {
it("should detect a phishing web address", async () => {
setMockState({
domains: ["phish.com", "badguy.net"],
webAddresses: ["phish.com", "badguy.net"],
timestamp: Date.now(),
checksum: "abc123",
applicationVersion: "1.0.0",
});
const url = new URL("http://phish.com");
const result = await service.isPhishingDomain(url);
const result = await service.isPhishingWebAddress(url);
expect(result).toBe(true);
});
it("should not detect a safe domain", async () => {
it("should not detect a safe web address", async () => {
setMockState({
domains: ["phish.com", "badguy.net"],
webAddresses: ["phish.com", "badguy.net"],
timestamp: Date.now(),
checksum: "abc123",
applicationVersion: "1.0.0",
});
const url = new URL("http://safe.com");
const result = await service.isPhishingDomain(url);
const result = await service.isPhishingWebAddress(url);
expect(result).toBe(false);
});
it("should match against root domain", async () => {
it("should match against root web address", async () => {
setMockState({
domains: ["phish.com", "badguy.net"],
webAddresses: ["phish.com", "badguy.net"],
timestamp: Date.now(),
checksum: "abc123",
applicationVersion: "1.0.0",
});
const url = new URL("http://phish.com/about");
const result = await service.isPhishingDomain(url);
const result = await service.isPhishingWebAddress(url);
expect(result).toBe(true);
});
it("should not error on empty state", async () => {
setMockState(undefined as any);
const url = new URL("http://phish.com/about");
const result = await service.isPhishingDomain(url);
const result = await service.isPhishingWebAddress(url);
expect(result).toBe(false);
});
});
describe("getNextDomains", () => {
it("refetches all domains if applicationVersion has changed", async () => {
describe("getNextWebAddresses", () => {
it("refetches all web addresses if applicationVersion has changed", async () => {
const prev: PhishingData = {
domains: ["a.com"],
webAddresses: ["a.com"],
timestamp: Date.now() - 60000,
checksum: "old",
applicationVersion: "1.0.0",
};
fetchChecksumSpy.mockResolvedValue("new");
fetchDomainsSpy.mockResolvedValue(["d.com", "e.com"]);
fetchWebAddressesSpy.mockResolvedValue(["d.com", "e.com"]);
platformUtilsService.getApplicationVersion.mockResolvedValue("2.0.0");
const result = await service.getNextDomains(prev);
const result = await service.getNextWebAddresses(prev);
expect(result!.domains).toEqual(["d.com", "e.com"]);
expect(result!.webAddresses).toEqual(["d.com", "e.com"]);
expect(result!.checksum).toBe("new");
expect(result!.applicationVersion).toBe("2.0.0");
});
it("only updates timestamp if checksum matches", async () => {
const prev: PhishingData = {
domains: ["a.com"],
webAddresses: ["a.com"],
timestamp: Date.now() - 60000,
checksum: "abc",
applicationVersion: "1.0.0",
};
fetchChecksumSpy.mockResolvedValue("abc");
const result = await service.getNextDomains(prev);
expect(result!.domains).toEqual(prev.domains);
const result = await service.getNextWebAddresses(prev);
expect(result!.webAddresses).toEqual(prev.webAddresses);
expect(result!.checksum).toBe("abc");
expect(result!.timestamp).not.toBe(prev.timestamp);
});
it("patches daily domains if cache is fresh", async () => {
const prev: PhishingData = {
domains: ["a.com"],
webAddresses: ["a.com"],
timestamp: Date.now() - 60000,
checksum: "old",
applicationVersion: "1.0.0",
};
fetchChecksumSpy.mockResolvedValue("new");
fetchDomainsSpy.mockResolvedValue(["b.com", "c.com"]);
const result = await service.getNextDomains(prev);
expect(result!.domains).toEqual(["a.com", "b.com", "c.com"]);
fetchWebAddressesSpy.mockResolvedValue(["b.com", "c.com"]);
const result = await service.getNextWebAddresses(prev);
expect(result!.webAddresses).toEqual(["a.com", "b.com", "c.com"]);
expect(result!.checksum).toBe("new");
});
it("fetches all domains if cache is old", async () => {
const prev: PhishingData = {
domains: ["a.com"],
webAddresses: ["a.com"],
timestamp: Date.now() - 2 * 24 * 60 * 60 * 1000,
checksum: "old",
applicationVersion: "1.0.0",
};
fetchChecksumSpy.mockResolvedValue("new");
fetchDomainsSpy.mockResolvedValue(["d.com", "e.com"]);
const result = await service.getNextDomains(prev);
expect(result!.domains).toEqual(["d.com", "e.com"]);
fetchWebAddressesSpy.mockResolvedValue(["d.com", "e.com"]);
const result = await service.getNextWebAddresses(prev);
expect(result!.webAddresses).toEqual(["d.com", "e.com"]);
expect(result!.checksum).toBe("new");
});
});

View File

@@ -20,14 +20,16 @@ import { ScheduledTaskNames, TaskSchedulerService } from "@bitwarden/common/plat
import { LogService } from "@bitwarden/logging";
import { GlobalStateProvider, KeyDefinition, PHISHING_DETECTION_DISK } from "@bitwarden/state";
import { getPhishingResources, PhishingResourceType } from "../phishing-resources";
export type PhishingData = {
domains: string[];
webAddresses: string[];
timestamp: number;
checksum: string;
/**
* We store the application version to refetch the entire dataset on a new client release.
* This counteracts daily appends updates not removing inactive or false positive domains.
* This counteracts daily appends updates not removing inactive or false positive web addresses.
*/
applicationVersion: string;
};
@@ -37,34 +39,27 @@ export const PHISHING_DOMAINS_KEY = new KeyDefinition<PhishingData>(
"phishingDomains",
{
deserializer: (value: PhishingData) =>
value ?? { domains: [], timestamp: 0, checksum: "", applicationVersion: "" },
value ?? { webAddresses: [], timestamp: 0, checksum: "", applicationVersion: "" },
},
);
/** Coordinates fetching, caching, and patching of known phishing domains */
/** Coordinates fetching, caching, and patching of known phishing web addresses */
export class PhishingDataService {
private static readonly RemotePhishingDatabaseUrl =
"https://raw.githubusercontent.com/Phishing-Database/Phishing.Database/master/phishing-domains-ACTIVE.txt";
private static readonly RemotePhishingDatabaseChecksumUrl =
"https://raw.githubusercontent.com/Phishing-Database/checksums/refs/heads/master/phishing-domains-ACTIVE.txt.md5";
private static readonly RemotePhishingDatabaseTodayUrl =
"https://raw.githubusercontent.com/Phishing-Database/Phishing.Database/refs/heads/master/phishing-domains-NEW-today.txt";
private _testDomains = this.getTestDomains();
private _testWebAddresses = this.getTestWebAddresses();
private _cachedState = this.globalStateProvider.get(PHISHING_DOMAINS_KEY);
private _domains$ = this._cachedState.state$.pipe(
private _webAddresses$ = this._cachedState.state$.pipe(
map(
(state) =>
new Set(
(state?.domains?.filter((line) => line.trim().length > 0) ?? []).concat(
this._testDomains,
(state?.webAddresses?.filter((line) => line.trim().length > 0) ?? []).concat(
this._testWebAddresses,
"phishing.testcategory.com", // Included for QA to test in prod
),
),
),
);
// How often are new domains added to the remote?
// How often are new web addresses added to the remote?
readonly UPDATE_INTERVAL_DURATION = 24 * 60 * 60 * 1000; // 24 hours
private _triggerUpdate$ = new Subject<void>();
@@ -75,7 +70,7 @@ export class PhishingDataService {
this._cachedState.state$.pipe(
first(), // Only take the first value to avoid an infinite loop when updating the cache below
switchMap(async (cachedState) => {
const next = await this.getNextDomains(cachedState);
const next = await this.getNextWebAddresses(cachedState);
if (next) {
await this._cachedState.update(() => next);
this.logService.info(`[PhishingDataService] cache updated`);
@@ -85,7 +80,7 @@ export class PhishingDataService {
count: 3,
delay: (err, count) => {
this.logService.error(
`[PhishingDataService] Unable to update domains. Attempt ${count}.`,
`[PhishingDataService] Unable to update web addresses. Attempt ${count}.`,
err,
);
return timer(5 * 60 * 1000); // 5 minutes
@@ -97,7 +92,7 @@ export class PhishingDataService {
err: unknown /** Eslint actually crashed if you remove this type: https://github.com/cartant/eslint-plugin-rxjs/issues/122 */,
) => {
this.logService.error(
"[PhishingDataService] Retries unsuccessful. Unable to update domains.",
"[PhishingDataService] Retries unsuccessful. Unable to update web addresses.",
err,
);
return EMPTY;
@@ -114,6 +109,7 @@ export class PhishingDataService {
private globalStateProvider: GlobalStateProvider,
private logService: LogService,
private platformUtilsService: PlatformUtilsService,
private resourceType: PhishingResourceType = PhishingResourceType.Links,
) {
this.taskSchedulerService.registerTaskHandler(ScheduledTaskNames.phishingDomainUpdate, () => {
this._triggerUpdate$.next();
@@ -125,22 +121,31 @@ export class PhishingDataService {
}
/**
* Checks if the given URL is a known phishing domain
* Checks if the given URL is a known phishing web address
*
* @param url The URL to check
* @returns True if the URL is a known phishing domain, false otherwise
* @returns True if the URL is a known phishing web address, false otherwise
*/
async isPhishingDomain(url: URL): Promise<boolean> {
const domains = await firstValueFrom(this._domains$);
const result = domains.has(url.hostname);
if (result) {
return true;
async isPhishingWebAddress(url: URL): Promise<boolean> {
// Use domain (hostname) matching for domain resources, and link matching for links resources
const entries = await firstValueFrom(this._webAddresses$);
const resource = getPhishingResources(this.resourceType);
if (resource && resource.match) {
for (const entry of entries) {
if (resource.match(url, entry)) {
return true;
}
}
return false;
}
return false;
// Default/domain behavior: exact hostname match as a fallback
return entries.has(url.hostname);
}
async getNextDomains(prev: PhishingData | null): Promise<PhishingData | null> {
prev = prev ?? { domains: [], timestamp: 0, checksum: "", applicationVersion: "" };
async getNextWebAddresses(prev: PhishingData | null): Promise<PhishingData | null> {
prev = prev ?? { webAddresses: [], timestamp: 0, checksum: "", applicationVersion: "" };
const timestamp = Date.now();
const prevAge = timestamp - prev.timestamp;
this.logService.info(`[PhishingDataService] Cache age: ${prevAge}`);
@@ -148,7 +153,7 @@ export class PhishingDataService {
const applicationVersion = await this.platformUtilsService.getApplicationVersion();
// If checksum matches, return existing data with new timestamp & version
const remoteChecksum = await this.fetchPhishingDomainsChecksum();
const remoteChecksum = await this.fetchPhishingChecksum(this.resourceType);
if (remoteChecksum && prev.checksum === remoteChecksum) {
this.logService.info(
`[PhishingDataService] Remote checksum matches local checksum, updating timestamp only.`,
@@ -157,66 +162,66 @@ export class PhishingDataService {
}
// Checksum is different, data needs to be updated.
// Approach 1: Fetch only new domains and append
// Approach 1: Fetch only new web addresses and append
const isOneDayOldMax = prevAge <= this.UPDATE_INTERVAL_DURATION;
if (isOneDayOldMax && applicationVersion === prev.applicationVersion) {
const dailyDomains: string[] = await this.fetchPhishingDomains(
PhishingDataService.RemotePhishingDatabaseTodayUrl,
);
const webAddressesTodayUrl = getPhishingResources(this.resourceType)!.todayUrl;
const dailyWebAddresses: string[] =
await this.fetchPhishingWebAddresses(webAddressesTodayUrl);
this.logService.info(
`[PhishingDataService] ${dailyDomains.length} new phishing domains added`,
`[PhishingDataService] ${dailyWebAddresses.length} new phishing web addresses added`,
);
return {
domains: prev.domains.concat(dailyDomains),
webAddresses: prev.webAddresses.concat(dailyWebAddresses),
checksum: remoteChecksum,
timestamp,
applicationVersion,
};
}
// Approach 2: Fetch all domains
const domains = await this.fetchPhishingDomains(PhishingDataService.RemotePhishingDatabaseUrl);
// Approach 2: Fetch all web addresses
const remoteUrl = getPhishingResources(this.resourceType)!.remoteUrl;
const remoteWebAddresses = await this.fetchPhishingWebAddresses(remoteUrl);
return {
domains,
webAddresses: remoteWebAddresses,
timestamp,
checksum: remoteChecksum,
applicationVersion,
};
}
private async fetchPhishingDomainsChecksum() {
const response = await this.apiService.nativeFetch(
new Request(PhishingDataService.RemotePhishingDatabaseChecksumUrl),
);
private async fetchPhishingChecksum(type: PhishingResourceType = PhishingResourceType.Domains) {
const checksumUrl = getPhishingResources(type)!.checksumUrl;
const response = await this.apiService.nativeFetch(new Request(checksumUrl));
if (!response.ok) {
throw new Error(`[PhishingDataService] Failed to fetch checksum: ${response.status}`);
}
return response.text();
}
private async fetchPhishingDomains(url: string) {
private async fetchPhishingWebAddresses(url: string) {
const response = await this.apiService.nativeFetch(new Request(url));
if (!response.ok) {
throw new Error(`[PhishingDataService] Failed to fetch domains: ${response.status}`);
throw new Error(`[PhishingDataService] Failed to fetch web addresses: ${response.status}`);
}
return response.text().then((text) => text.split("\n"));
}
private getTestDomains() {
private getTestWebAddresses() {
const flag = devFlagEnabled("testPhishingUrls");
if (!flag) {
return [];
}
const domains = devFlagValue("testPhishingUrls") as unknown[];
if (domains && domains instanceof Array) {
const webAddresses = devFlagValue("testPhishingUrls") as unknown[];
if (webAddresses && webAddresses instanceof Array) {
this.logService.debug(
"[PhishingDetectionService] Dev flag enabled for testing phishing detection. Adding test phishing domains:",
domains,
"[PhishingDetectionService] Dev flag enabled for testing phishing detection. Adding test phishing web addresses:",
webAddresses,
);
return domains as string[];
return webAddresses as string[];
}
return [];
}

View File

@@ -94,7 +94,7 @@ export class PhishingDetectionService {
this._ignoredHostnames.delete(url.hostname);
return;
}
const isPhishing = await phishingDataService.isPhishingDomain(url);
const isPhishing = await phishingDataService.isPhishingWebAddress(url);
if (!isPhishing) {
return;
}