diff --git a/apps/browser/src/dirt/phishing-detection/phishing-resources.ts b/apps/browser/src/dirt/phishing-detection/phishing-resources.ts index 88068987dd7..6595104207a 100644 --- a/apps/browser/src/dirt/phishing-detection/phishing-resources.ts +++ b/apps/browser/src/dirt/phishing-detection/phishing-resources.ts @@ -7,6 +7,8 @@ export type PhishingResource = { todayUrl: string; /** Matcher used to decide whether a given URL matches an entry from this resource */ match: (url: URL, entry: string) => boolean; + /** Whether to use the custom matcher. If false, only exact hasUrl lookups are used. Default: true */ + useCustomMatcher?: boolean; }; export const PhishingResourceType = Object.freeze({ @@ -56,6 +58,8 @@ export const PHISHING_RESOURCES: Record { if (!entry) { return false; diff --git a/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.spec.ts b/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.spec.ts index d633c0612f5..2d6c7a5a651 100644 --- a/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.spec.ts +++ b/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.spec.ts @@ -40,6 +40,7 @@ describe("PhishingDataService", () => { // Set default mock behaviors mockIndexedDbService.hasUrl.mockResolvedValue(false); mockIndexedDbService.loadAllUrls.mockResolvedValue([]); + mockIndexedDbService.findMatchingUrl.mockResolvedValue(false); mockIndexedDbService.saveUrls.mockResolvedValue(undefined); mockIndexedDbService.addUrls.mockResolvedValue(undefined); mockIndexedDbService.saveUrlsFromStream.mockResolvedValue(undefined); @@ -90,7 +91,7 @@ describe("PhishingDataService", () => { it("should NOT detect QA test addresses - different subpath", async () => { mockIndexedDbService.hasUrl.mockResolvedValue(false); - mockIndexedDbService.loadAllUrls.mockResolvedValue([]); + mockIndexedDbService.findMatchingUrl.mockResolvedValue(false); const url = new URL("https://phishing.testcategory.com/other"); const result = await service.isPhishingWebAddress(url); @@ -120,70 +121,65 @@ describe("PhishingDataService", () => { expect(result).toBe(true); expect(mockIndexedDbService.hasUrl).toHaveBeenCalledWith("http://phish.com/testing-param"); // Should not fall back to custom matcher when hasUrl returns true - expect(mockIndexedDbService.loadAllUrls).not.toHaveBeenCalled(); + expect(mockIndexedDbService.findMatchingUrl).not.toHaveBeenCalled(); }); - it("should fall back to custom matcher when hasUrl returns false", async () => { + it("should return false when hasUrl returns false (custom matcher disabled)", async () => { // Mock hasUrl to return false (no direct href match) mockIndexedDbService.hasUrl.mockResolvedValue(false); - // Mock loadAllUrls to return phishing URLs for custom matcher - mockIndexedDbService.loadAllUrls.mockResolvedValue(["http://phish.com/path"]); const url = new URL("http://phish.com/path"); const result = await service.isPhishingWebAddress(url); - expect(result).toBe(true); + // Custom matcher is currently disabled (useCustomMatcher: false), so result is false + expect(result).toBe(false); expect(mockIndexedDbService.hasUrl).toHaveBeenCalledWith("http://phish.com/path"); - expect(mockIndexedDbService.loadAllUrls).toHaveBeenCalled(); + // Custom matcher should NOT be called since it's disabled + expect(mockIndexedDbService.findMatchingUrl).not.toHaveBeenCalled(); }); it("should not detect a safe web address", async () => { // Mock hasUrl to return false mockIndexedDbService.hasUrl.mockResolvedValue(false); - // Mock loadAllUrls to return phishing URLs that don't match - mockIndexedDbService.loadAllUrls.mockResolvedValue(["http://phish.com", "http://badguy.net"]); const url = new URL("http://safe.com"); const result = await service.isPhishingWebAddress(url); expect(result).toBe(false); expect(mockIndexedDbService.hasUrl).toHaveBeenCalledWith("http://safe.com/"); - expect(mockIndexedDbService.loadAllUrls).toHaveBeenCalled(); + // Custom matcher is disabled, so findMatchingUrl should NOT be called + expect(mockIndexedDbService.findMatchingUrl).not.toHaveBeenCalled(); }); - it("should not match against root web address with subpaths using custom matcher", async () => { + it("should not match against root web address with subpaths (custom matcher disabled)", async () => { // Mock hasUrl to return false (no direct href match) mockIndexedDbService.hasUrl.mockResolvedValue(false); - // Mock loadAllUrls to return entry that matches with subpath - mockIndexedDbService.loadAllUrls.mockResolvedValue(["http://phish.com/login"]); const url = new URL("http://phish.com/login/page"); const result = await service.isPhishingWebAddress(url); expect(result).toBe(false); expect(mockIndexedDbService.hasUrl).toHaveBeenCalledWith("http://phish.com/login/page"); - expect(mockIndexedDbService.loadAllUrls).toHaveBeenCalled(); + // Custom matcher is disabled, so findMatchingUrl should NOT be called + expect(mockIndexedDbService.findMatchingUrl).not.toHaveBeenCalled(); }); - it("should not match against root web address with different subpaths using custom matcher", async () => { + it("should not match against root web address with different subpaths (custom matcher disabled)", async () => { // Mock hasUrl to return false (no direct hostname match) mockIndexedDbService.hasUrl.mockResolvedValue(false); - // Mock loadAllUrls to return entry that matches with subpath - mockIndexedDbService.loadAllUrls.mockResolvedValue(["http://phish.com/login/page1"]); const url = new URL("http://phish.com/login/page2"); const result = await service.isPhishingWebAddress(url); expect(result).toBe(false); expect(mockIndexedDbService.hasUrl).toHaveBeenCalledWith("http://phish.com/login/page2"); - expect(mockIndexedDbService.loadAllUrls).toHaveBeenCalled(); + // Custom matcher is disabled, so findMatchingUrl should NOT be called + expect(mockIndexedDbService.findMatchingUrl).not.toHaveBeenCalled(); }); it("should handle IndexedDB errors gracefully", async () => { // Mock hasUrl to throw error mockIndexedDbService.hasUrl.mockRejectedValue(new Error("hasUrl error")); - // Mock loadAllUrls to also throw error - mockIndexedDbService.loadAllUrls.mockRejectedValue(new Error("IndexedDB error")); const url = new URL("http://phish.com/about"); const result = await service.isPhishingWebAddress(url); @@ -193,10 +189,8 @@ describe("PhishingDataService", () => { "[PhishingDataService] IndexedDB lookup via hasUrl failed", expect.any(Error), ); - expect(logService.error).toHaveBeenCalledWith( - "[PhishingDataService] Error running custom matcher", - expect.any(Error), - ); + // Custom matcher is disabled, so no custom matcher error is expected + expect(mockIndexedDbService.findMatchingUrl).not.toHaveBeenCalled(); }); }); diff --git a/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.ts b/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.ts index 10268fa7f93..c34a94ecced 100644 --- a/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.ts +++ b/apps/browser/src/dirt/phishing-detection/services/phishing-data.service.ts @@ -153,8 +153,18 @@ export class PhishingDataService { * @returns True if the URL is a known phishing web address, false otherwise */ async isPhishingWebAddress(url: URL): Promise { + this.logService.debug("[PhishingDataService] isPhishingWebAddress called for: " + url.href); + + // Skip non-http(s) protocols - phishing database only contains web URLs + // This prevents expensive fallback checks for chrome://, about:, file://, etc. + if (url.protocol !== "http:" && url.protocol !== "https:") { + this.logService.debug("[PhishingDataService] Skipping non-http(s) protocol: " + url.protocol); + return false; + } + // Quick check for QA/dev test addresses if (this._testWebAddresses.includes(url.href)) { + this.logService.info("[PhishingDataService] Found test web address: " + url.href); return true; } @@ -162,28 +172,73 @@ export class PhishingDataService { try { // Quick lookup: check direct presence of href in IndexedDB - const hasUrl = await this.indexedDbService.hasUrl(url.href); + // Also check without trailing slash since browsers add it but DB entries may not have it + const urlHref = url.href; + const urlWithoutTrailingSlash = urlHref.endsWith("/") ? urlHref.slice(0, -1) : null; + + this.logService.debug("[PhishingDataService] Checking hasUrl on this string: " + urlHref); + let hasUrl = await this.indexedDbService.hasUrl(urlHref); + + // If not found and URL has trailing slash, try without it + if (!hasUrl && urlWithoutTrailingSlash) { + this.logService.debug( + "[PhishingDataService] Checking hasUrl without trailing slash: " + + urlWithoutTrailingSlash, + ); + hasUrl = await this.indexedDbService.hasUrl(urlWithoutTrailingSlash); + } + if (hasUrl) { + this.logService.info( + "[PhishingDataService] Found phishing web address through direct lookup: " + urlHref, + ); return true; } } catch (err) { this.logService.error("[PhishingDataService] IndexedDB lookup via hasUrl failed", err); } - // If a custom matcher is provided, iterate stored entries and apply the matcher. - if (resource && resource.match) { + // If a custom matcher is provided and enabled, use cursor-based search. + // This avoids loading all URLs into memory and allows early exit on first match. + // Can be disabled via useCustomMatcher: false for performance reasons. + if (resource && resource.match && resource.useCustomMatcher !== false) { try { - const entries = await this.indexedDbService.loadAllUrls(); - for (const entry of entries) { - if (resource.match(url, entry)) { - return true; - } + this.logService.debug( + "[PhishingDataService] Starting cursor-based search for: " + url.href, + ); + const startTime = performance.now(); + + const found = await this.indexedDbService.findMatchingUrl((entry) => + resource.match(url, entry), + ); + + const endTime = performance.now(); + const duration = (endTime - startTime).toFixed(2); + this.logService.debug( + `[PhishingDataService] Cursor-based search completed in ${duration}ms for: ${url.href} (found: ${found})`, + ); + + if (found) { + this.logService.info( + "[PhishingDataService] Found phishing web address through custom matcher: " + url.href, + ); + } else { + this.logService.debug( + "[PhishingDataService] No match found, returning false for: " + url.href, + ); } + return found; } catch (err) { this.logService.error("[PhishingDataService] Error running custom matcher", err); + this.logService.debug( + "[PhishingDataService] Returning false due to error for: " + url.href, + ); + return false; } - return false; } + this.logService.debug( + "[PhishingDataService] No custom matcher, returning false for: " + url.href, + ); return false; } diff --git a/apps/browser/src/dirt/phishing-detection/services/phishing-detection.service.ts b/apps/browser/src/dirt/phishing-detection/services/phishing-detection.service.ts index 815007e1d4c..6ca5bad8942 100644 --- a/apps/browser/src/dirt/phishing-detection/services/phishing-detection.service.ts +++ b/apps/browser/src/dirt/phishing-detection/services/phishing-detection.service.ts @@ -1,10 +1,10 @@ import { - concatMap, distinctUntilChanged, EMPTY, filter, map, merge, + mergeMap, Subject, switchMap, tap, @@ -43,6 +43,7 @@ export class PhishingDetectionService { private static _tabUpdated$ = new Subject(); private static _ignoredHostnames = new Set(); private static _didInit = false; + private static _activeSearchCount = 0; static initialize( logService: LogService, @@ -63,7 +64,7 @@ export class PhishingDetectionService { tap((message) => logService.debug(`[PhishingDetectionService] user selected continue for ${message.url}`), ), - concatMap(async (message) => { + mergeMap(async (message) => { const url = new URL(message.url); this._ignoredHostnames.add(url.hostname); await BrowserApi.navigateTabToUrl(message.tabId, url); @@ -88,23 +89,40 @@ export class PhishingDetectionService { prev.ignored === curr.ignored, ), tap((event) => logService.debug(`[PhishingDetectionService] processing event:`, event)), - concatMap(async ({ tabId, url, ignored }) => { - if (ignored) { - // The next time this host is visited, block again - this._ignoredHostnames.delete(url.hostname); - return; - } - const isPhishing = await phishingDataService.isPhishingWebAddress(url); - if (!isPhishing) { - return; - } - - const phishingWarningPage = new URL( - BrowserApi.getRuntimeURL("popup/index.html#/security/phishing-warning") + - `?phishingUrl=${url.toString()}`, + // Use mergeMap for parallel processing - each tab check runs independently + // Concurrency limit of 5 prevents overwhelming IndexedDB + mergeMap(async ({ tabId, url, ignored }) => { + this._activeSearchCount++; + const searchId = `${tabId}-${Date.now()}`; + logService.debug( + `[PhishingDetectionService] Search STARTED [${searchId}] for ${url.href} (active: ${this._activeSearchCount}/5)`, ); - await BrowserApi.navigateTabToUrl(tabId, phishingWarningPage); - }), + const startTime = performance.now(); + + try { + if (ignored) { + // The next time this host is visited, block again + this._ignoredHostnames.delete(url.hostname); + return; + } + const isPhishing = await phishingDataService.isPhishingWebAddress(url); + if (!isPhishing) { + return; + } + + const phishingWarningPage = new URL( + BrowserApi.getRuntimeURL("popup/index.html#/security/phishing-warning") + + `?phishingUrl=${url.toString()}`, + ); + await BrowserApi.navigateTabToUrl(tabId, phishingWarningPage); + } finally { + this._activeSearchCount--; + const duration = (performance.now() - startTime).toFixed(2); + logService.debug( + `[PhishingDetectionService] Search FINISHED [${searchId}] for ${url.href} in ${duration}ms (active: ${this._activeSearchCount}/5)`, + ); + } + }, 5), ); const onCancelCommand$ = messageListener diff --git a/apps/browser/src/dirt/phishing-detection/services/phishing-indexeddb.service.spec.ts b/apps/browser/src/dirt/phishing-detection/services/phishing-indexeddb.service.spec.ts index 99e101cc199..98835a5b366 100644 --- a/apps/browser/src/dirt/phishing-detection/services/phishing-indexeddb.service.spec.ts +++ b/apps/browser/src/dirt/phishing-detection/services/phishing-indexeddb.service.spec.ts @@ -435,6 +435,89 @@ describe("PhishingIndexedDbService", () => { }); }); + describe("findMatchingUrl", () => { + it("returns true when matcher finds a match", async () => { + mockStore.set("https://example.com", { url: "https://example.com" }); + mockStore.set("https://phishing.net", { url: "https://phishing.net" }); + mockStore.set("https://test.org", { url: "https://test.org" }); + + const matcher = (url: string) => url.includes("phishing"); + const result = await service.findMatchingUrl(matcher); + + expect(result).toBe(true); + expect(mockDb.transaction).toHaveBeenCalledWith("phishing-urls", "readonly"); + expect(mockObjectStore.openCursor).toHaveBeenCalled(); + }); + + it("returns false when no URLs match", async () => { + mockStore.set("https://example.com", { url: "https://example.com" }); + mockStore.set("https://test.org", { url: "https://test.org" }); + + const matcher = (url: string) => url.includes("notfound"); + const result = await service.findMatchingUrl(matcher); + + expect(result).toBe(false); + }); + + it("returns false when store is empty", async () => { + const matcher = (url: string) => url.includes("anything"); + const result = await service.findMatchingUrl(matcher); + + expect(result).toBe(false); + }); + + it("exits early on first match without iterating all records", async () => { + mockStore.set("https://match1.com", { url: "https://match1.com" }); + mockStore.set("https://match2.com", { url: "https://match2.com" }); + mockStore.set("https://match3.com", { url: "https://match3.com" }); + + const matcherCallCount = jest + .fn() + .mockImplementation((url: string) => url.includes("match2")); + await service.findMatchingUrl(matcherCallCount); + + // Matcher should be called for match1.com and match2.com, but NOT match3.com + // because it exits early on first match + expect(matcherCallCount).toHaveBeenCalledWith("https://match1.com"); + expect(matcherCallCount).toHaveBeenCalledWith("https://match2.com"); + expect(matcherCallCount).not.toHaveBeenCalledWith("https://match3.com"); + expect(matcherCallCount).toHaveBeenCalledTimes(2); + }); + + it("supports complex matcher logic", async () => { + mockStore.set("https://example.com/path", { url: "https://example.com/path" }); + mockStore.set("https://test.org", { url: "https://test.org" }); + mockStore.set("https://phishing.net/login", { url: "https://phishing.net/login" }); + + const matcher = (url: string) => { + return url.includes("phishing") && url.includes("login"); + }; + const result = await service.findMatchingUrl(matcher); + + expect(result).toBe(true); + }); + + it("returns false on error", async () => { + const error = new Error("IndexedDB error"); + mockOpenRequest.error = error; + (global.indexedDB.open as jest.Mock).mockImplementation(() => { + setTimeout(() => { + mockOpenRequest.onerror?.(); + }, 0); + return mockOpenRequest; + }); + + const matcher = (url: string) => url.includes("test"); + const result = await service.findMatchingUrl(matcher); + + expect(result).toBe(false); + expect(logService.error).toHaveBeenCalledWith( + "[PhishingIndexedDbService] Cursor search failed", + expect.any(Error), + ); + }); + }); + describe("database initialization", () => { it("creates object store with keyPath on upgrade", async () => { mockDb.objectStoreNames.contains.mockReturnValue(false); diff --git a/apps/browser/src/dirt/phishing-detection/services/phishing-indexeddb.service.ts b/apps/browser/src/dirt/phishing-detection/services/phishing-indexeddb.service.ts index fe0f10da221..ea4b7987607 100644 --- a/apps/browser/src/dirt/phishing-detection/services/phishing-indexeddb.service.ts +++ b/apps/browser/src/dirt/phishing-detection/services/phishing-indexeddb.service.ts @@ -195,6 +195,60 @@ export class PhishingIndexedDbService { }); } + /** + * Checks if any URL in the database matches the given matcher function. + * Uses a cursor to iterate through records without loading all into memory. + * Returns immediately on first match for optimal performance. + * + * @param matcher - Function that tests each URL and returns true if it matches + * @returns `true` if any URL matches, `false` if none match or on error + */ + async findMatchingUrl(matcher: (url: string) => boolean): Promise { + this.logService.debug("[PhishingIndexedDbService] Searching for matching URL with cursor..."); + + let db: IDBDatabase | null = null; + try { + db = await this.openDatabase(); + return await this.cursorSearch(db, matcher); + } catch (error) { + this.logService.error("[PhishingIndexedDbService] Cursor search failed", error); + return false; + } finally { + db?.close(); + } + } + + /** + * Performs cursor-based search through all URLs. + * Tests each URL with the matcher without accumulating records in memory. + */ + private cursorSearch(db: IDBDatabase, matcher: (url: string) => boolean): Promise { + return new Promise((resolve, reject) => { + const req = db + .transaction(this.STORE_NAME, "readonly") + .objectStore(this.STORE_NAME) + .openCursor(); + req.onerror = () => reject(req.error); + req.onsuccess = (e) => { + const cursor = (e.target as IDBRequest).result; + if (cursor) { + const url = (cursor.value as PhishingUrlRecord).url; + // Test the URL immediately without accumulating in memory + if (matcher(url)) { + // Found a match + resolve(true); + return; + } + // No match, continue to next record + cursor.continue(); + } else { + // Reached end of records without finding a match + resolve(false); + } + }; + }); + } + /** * Saves phishing URLs directly from a stream. * Processes data incrementally to minimize memory usage.