1750 lines
50 KiB
JavaScript
1750 lines
50 KiB
JavaScript
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
/**
|
|
* Functionality related to categorizing SERPs.
|
|
*/
|
|
|
|
import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs";
|
|
|
|
const lazy = {};
|
|
|
|
ChromeUtils.defineESModuleGetters(lazy, {
|
|
ExperimentAPI: "resource://nimbus/ExperimentAPI.sys.mjs",
|
|
NimbusFeatures: "resource://nimbus/ExperimentAPI.sys.mjs",
|
|
Region: "resource://gre/modules/Region.sys.mjs",
|
|
RemoteSettings: "resource://services-settings/remote-settings.sys.mjs",
|
|
SearchUtils: "resource://gre/modules/SearchUtils.sys.mjs",
|
|
Sqlite: "resource://gre/modules/Sqlite.sys.mjs",
|
|
});
|
|
|
|
ChromeUtils.defineLazyGetter(lazy, "gCryptoHash", () => {
|
|
return Cc["@mozilla.org/security/hash;1"].createInstance(Ci.nsICryptoHash);
|
|
});
|
|
|
|
const CATEGORIZATION_PREF =
|
|
"browser.search.serpEventTelemetryCategorization.enabled";
|
|
const CATEGORIZATION_REGION_PREF =
|
|
"browser.search.serpEventTelemetryCategorization.regionEnabled";
|
|
|
|
XPCOMUtils.defineLazyPreferenceGetter(
|
|
lazy,
|
|
"serpEventTelemetryCategorization",
|
|
CATEGORIZATION_PREF,
|
|
false,
|
|
(aPreference, previousValue, newValue) => {
|
|
if (newValue) {
|
|
SERPCategorization.init();
|
|
} else {
|
|
SERPCategorization.uninit({ deleteMap: true });
|
|
}
|
|
}
|
|
);
|
|
|
|
ChromeUtils.defineLazyGetter(lazy, "logConsole", () => {
|
|
return console.createInstance({
|
|
prefix: "SearchTelemetry",
|
|
maxLogLevel: lazy.SearchUtils.loggingEnabled ? "Debug" : "Warn",
|
|
});
|
|
});
|
|
|
|
XPCOMUtils.defineLazyPreferenceGetter(
|
|
lazy,
|
|
"activityLimit",
|
|
"telemetry.fog.test.activity_limit",
|
|
120
|
|
);
|
|
|
|
export const TELEMETRY_CATEGORIZATION_KEY = "search-categorization";
|
|
export const TELEMETRY_CATEGORIZATION_DOWNLOAD_SETTINGS = {
|
|
// Units are in milliseconds.
|
|
base: 3600000,
|
|
minAdjust: 60000,
|
|
maxAdjust: 600000,
|
|
maxTriesPerSession: 2,
|
|
};
|
|
|
|
export const CATEGORIZATION_SETTINGS = {
|
|
STORE_SCHEMA: 1,
|
|
STORE_FILE: "domain_to_categories.sqlite",
|
|
STORE_NAME: "domain_to_categories",
|
|
MAX_DOMAINS_TO_CATEGORIZE: 10,
|
|
MINIMUM_SCORE: 0,
|
|
STARTING_RANK: 2,
|
|
IDLE_TIMEOUT_SECONDS: 60 * 60,
|
|
WAKE_TIMEOUT_MS: 60 * 60 * 1000,
|
|
PING_SUBMISSION_THRESHOLD: 10,
|
|
HAS_MATCHING_REGION: "SearchTelemetry:HasMatchingRegion",
|
|
INCONCLUSIVE: 0,
|
|
};
|
|
|
|
/**
|
|
* @typedef {object} CategorizationResult
|
|
* @property {string} organic_category
|
|
* The category for the organic result.
|
|
* @property {number} organic_num_domains
|
|
* The number of domains examined to determine the organic category result.
|
|
* @property {number} organic_num_inconclusive
|
|
* The number of inconclusive domains when determining the organic result.
|
|
* @property {number} organic_num_unknown
|
|
* The number of unknown domains when determining the organic result.
|
|
* @property {string} sponsored_category
|
|
* The category for the organic result.
|
|
* @property {number} sponsored_num_domains
|
|
* The number of domains examined to determine the sponsored category.
|
|
* @property {number} sponsored_num_inconclusive
|
|
* The number of inconclusive domains when determining the sponsored category.
|
|
* @property {number} sponsored_num_unknown
|
|
* The category for the sponsored result.
|
|
* @property {string} mappings_version
|
|
* The category mapping version used to determine the categories.
|
|
*/
|
|
|
|
/**
|
|
* @typedef {object} CategorizationExtraParams
|
|
* @property {number} num_ads_clicked
|
|
* The total number of ads clicked on a SERP.
|
|
* @property {number} num_ads_hidden
|
|
* The total number of ads hidden from the user when categorization occured.
|
|
* @property {number} num_ads_loaded
|
|
* The total number of ads loaded when categorization occured.
|
|
* @property {number} num_ads_visible
|
|
* The total number of ads visible to the user when categorization occured.
|
|
*/
|
|
|
|
/* eslint-disable jsdoc/valid-types */
|
|
/**
|
|
* @typedef {CategorizationResult & CategorizationExtraParams} RecordCategorizationParameters
|
|
*/
|
|
/* eslint-enable jsdoc/valid-types */
|
|
|
|
/**
|
|
* Categorizes SERPs.
|
|
*/
|
|
class Categorizer {
|
|
async init() {
|
|
if (this.enabled) {
|
|
lazy.logConsole.debug("Initialize SERP categorizer.");
|
|
await SERPDomainToCategoriesMap.init();
|
|
SERPCategorizationEventScheduler.init();
|
|
SERPCategorizationRecorder.init();
|
|
}
|
|
}
|
|
|
|
async uninit({ deleteMap = false } = {}) {
|
|
lazy.logConsole.debug("Uninit SERP categorizer.");
|
|
await SERPDomainToCategoriesMap.uninit(deleteMap);
|
|
SERPCategorizationEventScheduler.uninit();
|
|
SERPCategorizationRecorder.uninit();
|
|
}
|
|
|
|
get enabled() {
|
|
return lazy.serpEventTelemetryCategorization;
|
|
}
|
|
|
|
/**
|
|
* Categorizes domains extracted from SERPs. Note that we don't process
|
|
* domains if the domain-to-categories map is empty (if the client couldn't
|
|
* download Remote Settings attachments, for example).
|
|
*
|
|
* @param {Set} nonAdDomains
|
|
* Domains from organic results extracted from the page.
|
|
* @param {Set} adDomains
|
|
* Domains from ad results extracted from the page.
|
|
* @returns {CategorizationResult | null}
|
|
* The final categorization result. Returns null if the map was empty.
|
|
*/
|
|
async maybeCategorizeSERP(nonAdDomains, adDomains) {
|
|
// Per DS, if the map was empty (e.g. because of a technical issue
|
|
// downloading the data), we shouldn't report telemetry.
|
|
// Thus, there is no point attempting to categorize the SERP.
|
|
if (SERPDomainToCategoriesMap.empty) {
|
|
SERPCategorizationRecorder.recordMissingImpressionTelemetry();
|
|
return null;
|
|
}
|
|
let resultsToReport = {};
|
|
|
|
let results = await this.applyCategorizationLogic(nonAdDomains);
|
|
resultsToReport.organic_category = results.category;
|
|
resultsToReport.organic_num_domains = results.num_domains;
|
|
resultsToReport.organic_num_unknown = results.num_unknown;
|
|
resultsToReport.organic_num_inconclusive = results.num_inconclusive;
|
|
|
|
results = await this.applyCategorizationLogic(adDomains);
|
|
resultsToReport.sponsored_category = results.category;
|
|
resultsToReport.sponsored_num_domains = results.num_domains;
|
|
resultsToReport.sponsored_num_unknown = results.num_unknown;
|
|
resultsToReport.sponsored_num_inconclusive = results.num_inconclusive;
|
|
|
|
resultsToReport.mappings_version = SERPDomainToCategoriesMap.version;
|
|
|
|
return resultsToReport;
|
|
}
|
|
|
|
/**
|
|
* Applies the logic for reducing extracted domains to a single category for
|
|
* the SERP.
|
|
*
|
|
* @param {Set} domains
|
|
* The domains extracted from the page.
|
|
* @returns {object} resultsToReport
|
|
* The final categorization results. Keys are: "category", "num_domains",
|
|
* "num_unknown" and "num_inconclusive".
|
|
*/
|
|
async applyCategorizationLogic(domains) {
|
|
let domainInfo = {};
|
|
let domainsCount = 0;
|
|
let unknownsCount = 0;
|
|
let inconclusivesCount = 0;
|
|
|
|
for (let domain of domains) {
|
|
domainsCount++;
|
|
|
|
let categoryCandidates = await SERPDomainToCategoriesMap.get(domain);
|
|
|
|
if (!categoryCandidates.length) {
|
|
unknownsCount++;
|
|
continue;
|
|
}
|
|
|
|
// Inconclusive domains do not have more than one category candidate.
|
|
if (
|
|
categoryCandidates[0].category == CATEGORIZATION_SETTINGS.INCONCLUSIVE
|
|
) {
|
|
inconclusivesCount++;
|
|
continue;
|
|
}
|
|
|
|
domainInfo[domain] = categoryCandidates;
|
|
}
|
|
|
|
let finalCategory;
|
|
let topCategories = [];
|
|
// Determine if all domains were unknown or inconclusive.
|
|
if (unknownsCount + inconclusivesCount == domainsCount) {
|
|
finalCategory = CATEGORIZATION_SETTINGS.INCONCLUSIVE;
|
|
} else {
|
|
let maxScore = CATEGORIZATION_SETTINGS.MINIMUM_SCORE;
|
|
let rank = CATEGORIZATION_SETTINGS.STARTING_RANK;
|
|
for (let categoryCandidates of Object.values(domainInfo)) {
|
|
for (let { category, score } of categoryCandidates) {
|
|
let adjustedScore = score / Math.log2(rank);
|
|
if (adjustedScore > maxScore) {
|
|
maxScore = adjustedScore;
|
|
topCategories = [category];
|
|
} else if (adjustedScore == maxScore) {
|
|
topCategories.push(Number(category));
|
|
}
|
|
rank++;
|
|
}
|
|
}
|
|
finalCategory =
|
|
topCategories.length > 1
|
|
? this.#chooseRandomlyFrom(topCategories)
|
|
: topCategories[0];
|
|
}
|
|
|
|
return {
|
|
category: finalCategory,
|
|
num_domains: domainsCount,
|
|
num_unknown: unknownsCount,
|
|
num_inconclusive: inconclusivesCount,
|
|
};
|
|
}
|
|
|
|
#chooseRandomlyFrom(categories) {
|
|
let randIdx = Math.floor(Math.random() * categories.length);
|
|
return categories[randIdx];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Contains outstanding categorizations of browser objects that have yet to be
|
|
* scheduled to be reported into a Glean event.
|
|
* They are kept here until one of the conditions are met:
|
|
* 1. The browser that was tracked is no longer being tracked.
|
|
* 2. A user has been idle for IDLE_TIMEOUT_SECONDS
|
|
* 3. The user has awoken their computer and the time elapsed from the last
|
|
* categorization event exceeds WAKE_TIMEOUT_MS.
|
|
*/
|
|
class CategorizationEventScheduler {
|
|
/**
|
|
* A WeakMap containing browser objects mapped to a callback.
|
|
*
|
|
* @type {WeakMap | null}
|
|
*/
|
|
#browserToCallbackMap = null;
|
|
|
|
/**
|
|
* An instance of user idle service. Cached for testing purposes.
|
|
*
|
|
* @type {nsIUserIdleService | null}
|
|
*/
|
|
#idleService = null;
|
|
|
|
/**
|
|
* Whether it has been initialized.
|
|
*
|
|
* @type {boolean}
|
|
*/
|
|
#init = false;
|
|
|
|
/**
|
|
* The last Date.now() of a callback insertion.
|
|
*
|
|
* @type {number | null}
|
|
*/
|
|
#mostRecentMs = null;
|
|
|
|
init() {
|
|
if (this.#init) {
|
|
return;
|
|
}
|
|
|
|
lazy.logConsole.debug("Initializing categorization event scheduler.");
|
|
|
|
this.#browserToCallbackMap = new WeakMap();
|
|
|
|
// In tests, we simulate idleness as it is more reliable and easier than
|
|
// trying to replicate idleness. The way to do is so it by creating
|
|
// an mock idle service and having the component subscribe to it. If we
|
|
// used a lazy instantiation of idle service, the test could only ever be
|
|
// subscribed to the real one.
|
|
this.#idleService = Cc["@mozilla.org/widget/useridleservice;1"].getService(
|
|
Ci.nsIUserIdleService
|
|
);
|
|
|
|
this.#idleService.addIdleObserver(
|
|
this,
|
|
CATEGORIZATION_SETTINGS.IDLE_TIMEOUT_SECONDS
|
|
);
|
|
|
|
Services.obs.addObserver(this, "quit-application");
|
|
Services.obs.addObserver(this, "wake_notification");
|
|
|
|
this.#init = true;
|
|
}
|
|
|
|
uninit() {
|
|
if (!this.#init) {
|
|
return;
|
|
}
|
|
|
|
this.#browserToCallbackMap = null;
|
|
|
|
lazy.logConsole.debug("Un-initializing categorization event scheduler.");
|
|
this.#idleService.removeIdleObserver(
|
|
this,
|
|
CATEGORIZATION_SETTINGS.IDLE_TIMEOUT_SECONDS
|
|
);
|
|
|
|
Services.obs.removeObserver(this, "quit-application");
|
|
Services.obs.removeObserver(this, "wake_notification");
|
|
|
|
this.#idleService = null;
|
|
this.#init = false;
|
|
}
|
|
|
|
observe(subject, topic) {
|
|
switch (topic) {
|
|
case "idle":
|
|
lazy.logConsole.debug("Triggering all callbacks due to idle.");
|
|
this.#sendAllCallbacks();
|
|
break;
|
|
case "quit-application":
|
|
this.uninit();
|
|
break;
|
|
case "wake_notification":
|
|
if (
|
|
this.#mostRecentMs &&
|
|
Date.now() - this.#mostRecentMs >=
|
|
CATEGORIZATION_SETTINGS.WAKE_TIMEOUT_MS
|
|
) {
|
|
lazy.logConsole.debug(
|
|
"Triggering all callbacks due to a wake notification."
|
|
);
|
|
this.#sendAllCallbacks();
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
addCallback(browser, callback) {
|
|
lazy.logConsole.debug("Adding callback to queue.");
|
|
this.#mostRecentMs = Date.now();
|
|
this.#browserToCallbackMap?.set(browser, callback);
|
|
}
|
|
|
|
sendCallback(browser) {
|
|
let callback = this.#browserToCallbackMap?.get(browser);
|
|
if (callback) {
|
|
lazy.logConsole.debug("Triggering callback.");
|
|
callback();
|
|
Services.obs.notifyObservers(
|
|
null,
|
|
"recorded-single-categorization-event"
|
|
);
|
|
this.#browserToCallbackMap.delete(browser);
|
|
}
|
|
}
|
|
|
|
#sendAllCallbacks() {
|
|
let browsers = ChromeUtils.nondeterministicGetWeakMapKeys(
|
|
this.#browserToCallbackMap
|
|
);
|
|
if (browsers) {
|
|
lazy.logConsole.debug("Triggering all callbacks.");
|
|
for (let browser of browsers) {
|
|
this.sendCallback(browser);
|
|
}
|
|
}
|
|
this.#mostRecentMs = null;
|
|
Services.obs.notifyObservers(null, "recorded-all-categorization-events");
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Handles reporting SERP categorization telemetry to Glean.
|
|
*/
|
|
class CategorizationRecorder {
|
|
#init = false;
|
|
|
|
// The number of SERP categorizations that have been recorded but not yet
|
|
// reported in a Glean ping.
|
|
#serpCategorizationsCount = 0;
|
|
|
|
// When the user started interacting with the SERP.
|
|
#userInteractionStartTime = null;
|
|
|
|
async init() {
|
|
if (this.#init) {
|
|
return;
|
|
}
|
|
|
|
Services.obs.addObserver(this, "user-interaction-active");
|
|
Services.obs.addObserver(this, "user-interaction-inactive");
|
|
this.#init = true;
|
|
this.#serpCategorizationsCount = Services.prefs.getIntPref(
|
|
"browser.search.serpMetricsRecordedCounter",
|
|
0
|
|
);
|
|
Services.prefs.setIntPref("browser.search.serpMetricsRecordedCounter", 0);
|
|
this.submitPing("startup");
|
|
Services.obs.notifyObservers(null, "categorization-recorder-init");
|
|
}
|
|
|
|
uninit() {
|
|
if (this.#init) {
|
|
Services.obs.removeObserver(this, "user-interaction-active");
|
|
Services.obs.removeObserver(this, "user-interaction-inactive");
|
|
Services.prefs.setIntPref(
|
|
"browser.search.serpMetricsRecordedCounter",
|
|
this.#serpCategorizationsCount
|
|
);
|
|
|
|
this.#resetCategorizationRecorderData();
|
|
this.#init = false;
|
|
}
|
|
}
|
|
|
|
observe(subject, topic, _data) {
|
|
switch (topic) {
|
|
case "user-interaction-active": {
|
|
// If the user is already active, we don't want to overwrite the start
|
|
// time.
|
|
if (this.#userInteractionStartTime == null) {
|
|
this.#userInteractionStartTime = Date.now();
|
|
}
|
|
break;
|
|
}
|
|
case "user-interaction-inactive": {
|
|
let currentTime = Date.now();
|
|
let activityLimitInMs = lazy.activityLimit * 1000;
|
|
if (
|
|
this.#userInteractionStartTime &&
|
|
currentTime - this.#userInteractionStartTime >= activityLimitInMs
|
|
) {
|
|
this.submitPing("inactivity");
|
|
}
|
|
this.#userInteractionStartTime = null;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Helper function for recording the SERP categorization event.
|
|
*
|
|
* @param {RecordCategorizationParameters} resultToReport
|
|
* The object containing all the data required to report.
|
|
*/
|
|
recordCategorizationTelemetry(resultToReport) {
|
|
lazy.logConsole.debug(
|
|
"Reporting the following categorization result:",
|
|
resultToReport
|
|
);
|
|
Glean.serp.categorization.record(resultToReport);
|
|
|
|
this.#incrementCategorizationsCount();
|
|
}
|
|
|
|
/**
|
|
* Helper function for recording Glean telemetry when issues with the
|
|
* domain-to-categories map cause the categorization and impression not to be
|
|
* recorded.
|
|
*/
|
|
recordMissingImpressionTelemetry() {
|
|
lazy.logConsole.debug(
|
|
"Recording a missing impression due to an issue with the domain-to-categories map."
|
|
);
|
|
Glean.serp.categorizationNoMapFound.add();
|
|
this.#incrementCategorizationsCount();
|
|
}
|
|
|
|
/**
|
|
* Adds a Glean object metric to the custom SERP categorization ping if info
|
|
* about a single experiment has been requested via Nimbus config.
|
|
*/
|
|
maybeExtractAndRecordExperimentInfo() {
|
|
let targetExperiment =
|
|
lazy.NimbusFeatures.search.getVariable("targetExperiment");
|
|
if (!targetExperiment) {
|
|
lazy.logConsole.debug("No targetExperiment found.");
|
|
return;
|
|
}
|
|
|
|
lazy.logConsole.debug("Found targetExperiment:", targetExperiment);
|
|
|
|
// Try checking if an Experiment exists, otherwise check for a Rollout.
|
|
let metadata =
|
|
lazy.ExperimentAPI.getExperimentMetaData({
|
|
featureId: "search",
|
|
slug: targetExperiment,
|
|
}) ??
|
|
lazy.ExperimentAPI.getRolloutMetaData({
|
|
featureId: "search",
|
|
slug: targetExperiment,
|
|
});
|
|
if (!metadata) {
|
|
lazy.logConsole.debug(
|
|
"No experiment or rollout found that matches targetExperiment."
|
|
);
|
|
return;
|
|
}
|
|
|
|
let experimentToRecord = {
|
|
slug: metadata.slug,
|
|
branch: metadata.branch?.slug,
|
|
};
|
|
lazy.logConsole.debug("Experiment data:", experimentToRecord);
|
|
Glean.serp.experimentInfo.set(experimentToRecord);
|
|
}
|
|
|
|
submitPing(reason) {
|
|
if (!this.#serpCategorizationsCount) {
|
|
return;
|
|
}
|
|
|
|
// If experiment info has been requested via Nimbus config, we want to
|
|
// record it just before submitting the ping.
|
|
this.maybeExtractAndRecordExperimentInfo();
|
|
lazy.logConsole.debug("Submitting SERP categorization ping:", reason);
|
|
GleanPings.serpCategorization.submit(reason);
|
|
|
|
this.#serpCategorizationsCount = 0;
|
|
}
|
|
|
|
/**
|
|
* Tests are able to clear telemetry on demand. When that happens, we need to
|
|
* ensure we're doing to the same here or else the internal count in tests
|
|
* will be inaccurate.
|
|
*/
|
|
testReset() {
|
|
if (Cu.isInAutomation) {
|
|
this.#resetCategorizationRecorderData();
|
|
}
|
|
}
|
|
|
|
#incrementCategorizationsCount() {
|
|
this.#serpCategorizationsCount++;
|
|
|
|
if (
|
|
this.#serpCategorizationsCount >=
|
|
CATEGORIZATION_SETTINGS.PING_SUBMISSION_THRESHOLD
|
|
) {
|
|
this.submitPing("threshold_reached");
|
|
}
|
|
}
|
|
|
|
#resetCategorizationRecorderData() {
|
|
this.#serpCategorizationsCount = 0;
|
|
this.#userInteractionStartTime = null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @typedef {object} DomainToCategoriesRecord
|
|
* @property {boolean} isDefault
|
|
* Whether the record is a default if the user's region does not contain a
|
|
* more specific set of mappings.
|
|
* @property {Array<string>} includeRegions
|
|
* The region codes to include. If left blank, it applies to all regions.
|
|
* @property {Array<string>} excludeRegions
|
|
* The region codes to exclude.
|
|
* @property {number} version
|
|
* The version of the record.
|
|
*/
|
|
|
|
/**
|
|
* @typedef {object} DomainCategoryScore
|
|
* @property {number} category
|
|
* The index of the category.
|
|
* @property {number} score
|
|
* The score associated with the category.
|
|
*/
|
|
|
|
/**
|
|
* Maps domain to categories. Data is downloaded from Remote Settings and
|
|
* stored inside DomainToCategoriesStore.
|
|
*/
|
|
class DomainToCategoriesMap {
|
|
/**
|
|
* Latest version number of the attachments.
|
|
*
|
|
* @type {number | null}
|
|
*/
|
|
#version = null;
|
|
|
|
/**
|
|
* The Remote Settings client.
|
|
*
|
|
* @type {object | null}
|
|
*/
|
|
#client = null;
|
|
|
|
/**
|
|
* Whether this is synced with Remote Settings.
|
|
*
|
|
* @type {boolean}
|
|
*/
|
|
#init = false;
|
|
|
|
/**
|
|
* Callback when Remote Settings syncs.
|
|
*
|
|
* @type {Function | null}
|
|
*/
|
|
#onSettingsSync = null;
|
|
|
|
/**
|
|
* When downloading an attachment from Remote Settings fails, this will
|
|
* contain a timer which will eventually attempt to retry downloading
|
|
* attachments.
|
|
*/
|
|
#downloadTimer = null;
|
|
|
|
/**
|
|
* Number of times this has attempted to try another download. Will reset
|
|
* if the categorization preference has been toggled, or a sync event has
|
|
* been detected.
|
|
*
|
|
* @type {number}
|
|
*/
|
|
#downloadRetries = 0;
|
|
|
|
/**
|
|
* A reference to the data store.
|
|
*
|
|
* @type {DomainToCategoriesStore | null}
|
|
*/
|
|
#store = null;
|
|
|
|
/**
|
|
* Runs at application startup with startup idle tasks. If the SERP
|
|
* categorization preference is enabled, it creates a Remote Settings
|
|
* client to listen to updates, and populates the store.
|
|
*/
|
|
async init() {
|
|
if (this.#init) {
|
|
return;
|
|
}
|
|
lazy.logConsole.debug("Initializing domain-to-categories map.");
|
|
|
|
// Set early to allow un-init from an initialization.
|
|
this.#init = true;
|
|
|
|
try {
|
|
await this.#setupClientAndStore();
|
|
} catch (ex) {
|
|
lazy.logConsole.error(ex);
|
|
await this.uninit();
|
|
return;
|
|
}
|
|
|
|
// If we don't have a client and store, it likely means an un-init process
|
|
// started during the initialization process.
|
|
if (this.#client && this.#store) {
|
|
lazy.logConsole.debug("Initialized domain-to-categories map.");
|
|
Services.obs.notifyObservers(null, "domain-to-categories-map-init");
|
|
}
|
|
}
|
|
|
|
async uninit(shouldDeleteStore) {
|
|
if (this.#init) {
|
|
lazy.logConsole.debug("Un-initializing domain-to-categories map.");
|
|
this.#clearClient();
|
|
this.#cancelAndNullifyTimer();
|
|
|
|
if (this.#store) {
|
|
if (shouldDeleteStore) {
|
|
try {
|
|
await this.#store.dropData();
|
|
} catch (ex) {
|
|
lazy.logConsole.error(ex);
|
|
}
|
|
}
|
|
await this.#store.uninit();
|
|
this.#store = null;
|
|
}
|
|
|
|
lazy.logConsole.debug("Un-initialized domain-to-categories map.");
|
|
this.#init = false;
|
|
Services.obs.notifyObservers(null, "domain-to-categories-map-uninit");
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Given a domain, find categories and relevant scores.
|
|
*
|
|
* @param {string} domain Domain to lookup.
|
|
* @returns {Array<DomainCategoryScore>}
|
|
* An array containing categories and their respective score. If no record
|
|
* for the domain is available, return an empty array.
|
|
*/
|
|
async get(domain) {
|
|
if (!this.#store || this.#store.empty || !this.#store.ready) {
|
|
return [];
|
|
}
|
|
lazy.gCryptoHash.init(lazy.gCryptoHash.SHA256);
|
|
let bytes = new TextEncoder().encode(domain);
|
|
lazy.gCryptoHash.update(bytes, domain.length);
|
|
let hash = lazy.gCryptoHash.finish(true);
|
|
let rawValues = await this.#store.getCategories(hash);
|
|
if (rawValues?.length) {
|
|
let output = [];
|
|
// Transform data into a more readable format.
|
|
// [x, y] => { category: x, score: y }
|
|
for (let i = 0; i < rawValues.length; i += 2) {
|
|
output.push({ category: rawValues[i], score: rawValues[i + 1] });
|
|
}
|
|
return output;
|
|
}
|
|
return [];
|
|
}
|
|
|
|
/**
|
|
* If the map was initialized, returns the version number for the data.
|
|
* The version number is determined by the record with the highest version
|
|
* number. Even if the records have different versions, only records from the
|
|
* latest version should be available. Returns null if the map was not
|
|
* initialized.
|
|
*
|
|
* @returns {null | number} The version number.
|
|
*/
|
|
get version() {
|
|
return this.#version;
|
|
}
|
|
|
|
/**
|
|
* Whether the store is empty of data.
|
|
*
|
|
* @returns {boolean}
|
|
*/
|
|
get empty() {
|
|
if (!this.#store) {
|
|
return true;
|
|
}
|
|
return this.#store.empty;
|
|
}
|
|
|
|
/**
|
|
* Unit test-only function, used to override the domainToCategoriesMap so
|
|
* that tests can set it to easy to test values.
|
|
*
|
|
* @param {object} domainToCategoriesMap
|
|
* An object where the key is a hashed domain and the value is an array
|
|
* containing an arbitrary number of DomainCategoryScores.
|
|
* @param {number} version
|
|
* The version number for the store.
|
|
* @param {boolean} isDefault
|
|
* Whether the records should be considered default.
|
|
*/
|
|
async overrideMapForTests(
|
|
domainToCategoriesMap,
|
|
version = 1,
|
|
isDefault = false
|
|
) {
|
|
if (Cu.isInAutomation || Services.env.exists("XPCSHELL_TEST_PROFILE_DIR")) {
|
|
await this.#store.init();
|
|
await this.#store.dropData();
|
|
await this.#store.insertObject(domainToCategoriesMap, version, isDefault);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Given a list of records from Remote Settings, determine which ones should
|
|
* be matched based on the region.
|
|
*
|
|
* - If a set of records match the region, they should be derived from one
|
|
* source JSON file. The reason why it is split up is to make it less
|
|
* onerous to download and parse, though testing might find a single
|
|
* file to be sufficient.
|
|
* - If more than one set of records match the region, it would be from one
|
|
* set of records belonging to default mappings that apply to many regions.
|
|
* The more specific collection should override the default set.
|
|
*
|
|
* @param {Array<DomainToCategoriesRecord>} records
|
|
* The records from Remote Settings.
|
|
* @param {string|null} region
|
|
* The region to match.
|
|
* @returns {object|null}
|
|
*/
|
|
findRecordsForRegion(records, region) {
|
|
if (!region || !records?.length) {
|
|
return null;
|
|
}
|
|
|
|
let regionSpecificRecords = [];
|
|
let defaultRecords = [];
|
|
for (let record of records) {
|
|
if (this.recordMatchesRegion(record, region)) {
|
|
if (record.isDefault) {
|
|
defaultRecords.push(record);
|
|
} else {
|
|
regionSpecificRecords.push(record);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (regionSpecificRecords.length) {
|
|
return { records: regionSpecificRecords, isDefault: false };
|
|
}
|
|
|
|
if (defaultRecords.length) {
|
|
return { records: defaultRecords, isDefault: true };
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Checks the record matches the region.
|
|
*
|
|
* @param {DomainToCategoriesRecord} record
|
|
* The record to check.
|
|
* @param {string|null} region
|
|
* The region the record to be matched against.
|
|
* @returns {boolean}
|
|
*/
|
|
recordMatchesRegion(record, region) {
|
|
if (!region || !record) {
|
|
return false;
|
|
}
|
|
|
|
if (record.excludeRegions?.includes(region)) {
|
|
return false;
|
|
}
|
|
|
|
if (record.isDefault) {
|
|
return true;
|
|
}
|
|
|
|
if (!record.includeRegions?.includes(region)) {
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
async syncMayModifyStore(syncData, region) {
|
|
if (!syncData || !region) {
|
|
return false;
|
|
}
|
|
|
|
let currentResult = this.findRecordsForRegion(syncData?.current, region);
|
|
if (this.#store.empty && !currentResult) {
|
|
lazy.logConsole.debug("Store was empty and there were no results.");
|
|
return false;
|
|
}
|
|
|
|
if (!this.#store.empty && !currentResult) {
|
|
return true;
|
|
}
|
|
|
|
let storeHasDefault = await this.#store.isDefault();
|
|
if (storeHasDefault != currentResult.isDefault) {
|
|
return true;
|
|
}
|
|
|
|
const recordsDifferFromStore = records => {
|
|
let result = this.findRecordsForRegion(records, region);
|
|
return result?.records.length && storeHasDefault == result.isDefault;
|
|
};
|
|
|
|
if (
|
|
recordsDifferFromStore(syncData.created) ||
|
|
recordsDifferFromStore(syncData.deleted) ||
|
|
recordsDifferFromStore(syncData.updated.map(obj => obj.new))
|
|
) {
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Connect with Remote Settings and retrieve the records associated with
|
|
* categorization. Then, check if the records match the store version. If
|
|
* no records exist, return early. If records exist but the version stored
|
|
* on the records differ from the store version, then attempt to
|
|
* empty the store and fill it with data from downloaded attachments. Only
|
|
* reuse the store if the version in each record matches the store.
|
|
*/
|
|
async #setupClientAndStore() {
|
|
if (this.#client && !this.empty) {
|
|
return;
|
|
}
|
|
lazy.logConsole.debug("Setting up domain-to-categories map.");
|
|
this.#client = lazy.RemoteSettings(TELEMETRY_CATEGORIZATION_KEY);
|
|
|
|
this.#onSettingsSync = event => this.#sync(event.data);
|
|
this.#client.on("sync", this.#onSettingsSync);
|
|
|
|
this.#store = new DomainToCategoriesStore();
|
|
await this.#store.init();
|
|
|
|
let records = await this.#client.get();
|
|
// Even though records don't exist, we still consider the store initialized
|
|
// since a sync event from Remote Settings could populate the store with
|
|
// records eligible for the client to download.
|
|
if (!records.length) {
|
|
lazy.logConsole.debug("No records found for domain-to-categories map.");
|
|
return;
|
|
}
|
|
|
|
// At least one of the records must be eligible for the region.
|
|
let result = this.findRecordsForRegion(records, lazy.Region.home);
|
|
let matchingRecords = result?.records;
|
|
let matchingRecordsAreDefault = result?.isDefault;
|
|
let hasMatchingRecords = !!matchingRecords?.length;
|
|
Services.prefs.setBoolPref(CATEGORIZATION_REGION_PREF, hasMatchingRecords);
|
|
|
|
if (!hasMatchingRecords) {
|
|
lazy.logConsole.debug(
|
|
"No domain-to-category records match the current region:",
|
|
lazy.Region.home
|
|
);
|
|
// If no matching record was found but the store is not empty,
|
|
// the user changed their home region.
|
|
if (!this.#store.empty) {
|
|
lazy.logConsole.debug(
|
|
"Drop store because it no longer matches the home region."
|
|
);
|
|
await this.#store.dropData();
|
|
}
|
|
return;
|
|
}
|
|
|
|
this.#version = this.#retrieveLatestVersion(matchingRecords);
|
|
let storeVersion = await this.#store.getVersion();
|
|
let storeIsDefault = await this.#store.isDefault();
|
|
if (
|
|
storeVersion == this.#version &&
|
|
!this.#store.empty &&
|
|
storeIsDefault == matchingRecordsAreDefault
|
|
) {
|
|
lazy.logConsole.debug("Reuse existing domain-to-categories map.");
|
|
Services.obs.notifyObservers(
|
|
null,
|
|
"domain-to-categories-map-update-complete"
|
|
);
|
|
return;
|
|
}
|
|
|
|
await this.#clearAndPopulateStore(records);
|
|
}
|
|
|
|
#clearClient() {
|
|
if (this.#client) {
|
|
lazy.logConsole.debug("Removing Remote Settings client.");
|
|
this.#client.off("sync", this.#onSettingsSync);
|
|
this.#client = null;
|
|
this.#onSettingsSync = null;
|
|
this.#downloadRetries = 0;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Inspects a list of records from the categorization domain bucket and finds
|
|
* the maximum version score from the set of records. Each record should have
|
|
* the same version number but if for any reason one entry has a lower
|
|
* version number, the latest version can be used to filter it out.
|
|
*
|
|
* @param {Array<DomainToCategoriesRecord>} records
|
|
* An array containing the records from a Remote Settings collection.
|
|
* @returns {number}
|
|
*/
|
|
#retrieveLatestVersion(records) {
|
|
return records.reduce((version, record) => {
|
|
if (record.version > version) {
|
|
return record.version;
|
|
}
|
|
return version;
|
|
}, 0);
|
|
}
|
|
|
|
/**
|
|
* Callback when Remote Settings has indicated the collection has been
|
|
* synced. Determine if the records changed should result in updating the map,
|
|
* as some of the records changed might not affect the user's region.
|
|
* Additionally, delete any attachment for records that no longer exist.
|
|
*
|
|
* @param {object} data
|
|
* Object containing records that are current, deleted, created, or updated.
|
|
*/
|
|
async #sync(data) {
|
|
lazy.logConsole.debug("Syncing domain-to-categories with Remote Settings.");
|
|
|
|
// Remove local files of deleted records.
|
|
let toDelete = data?.deleted.filter(d => d.attachment);
|
|
await Promise.all(
|
|
toDelete.map(record => this.#client.attachments.deleteDownloaded(record))
|
|
);
|
|
|
|
let couldModify = await this.syncMayModifyStore(data, lazy.Region.home);
|
|
if (!couldModify) {
|
|
lazy.logConsole.debug(
|
|
"Domain-to-category records had no changes that matched the region."
|
|
);
|
|
return;
|
|
}
|
|
|
|
this.#downloadRetries = 0;
|
|
|
|
try {
|
|
await this.#clearAndPopulateStore(data?.current);
|
|
} catch (ex) {
|
|
lazy.logConsole.error("Error populating map: ", ex);
|
|
await this.uninit();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Clear the existing store and populate it with attachments found in the
|
|
* records. If no attachments are found, or no record containing an
|
|
* attachment contained the latest version, then nothing will change.
|
|
*
|
|
* @param {Array<DomainToCategoriesRecord>} records
|
|
* The records containing attachments.
|
|
* @throws {Error}
|
|
* Will throw if it was not able to drop the store data, or it was unable
|
|
* to insert data into the store.
|
|
*/
|
|
async #clearAndPopulateStore(records) {
|
|
// If we don't have a handle to a store, it would mean that it was removed
|
|
// during an uninitialization process.
|
|
if (!this.#store) {
|
|
lazy.logConsole.debug(
|
|
"Could not populate store because no store was available."
|
|
);
|
|
return;
|
|
}
|
|
|
|
if (!this.#store.ready) {
|
|
lazy.logConsole.debug(
|
|
"Could not populate store because it was not ready."
|
|
);
|
|
return;
|
|
}
|
|
|
|
// Empty table so that if there are errors in the download process, callers
|
|
// querying the map won't use information we know is probably outdated.
|
|
await this.#store.dropData();
|
|
|
|
this.#version = null;
|
|
this.#cancelAndNullifyTimer();
|
|
|
|
let result = this.findRecordsForRegion(records, lazy.Region.home);
|
|
let recordsMatchingRegion = result?.records;
|
|
let isDefault = result?.isDefault;
|
|
let hasMatchingRecords = !!recordsMatchingRegion?.length;
|
|
Services.prefs.setBoolPref(CATEGORIZATION_REGION_PREF, hasMatchingRecords);
|
|
|
|
// A collection with no records is still a valid init state.
|
|
if (!records?.length) {
|
|
lazy.logConsole.debug("No records found for domain-to-categories map.");
|
|
return;
|
|
}
|
|
|
|
if (!hasMatchingRecords) {
|
|
lazy.logConsole.debug(
|
|
"No domain-to-category records match the current region:",
|
|
lazy.Region.home
|
|
);
|
|
return;
|
|
}
|
|
|
|
let fileContents = [];
|
|
let start = Cu.now();
|
|
for (let record of recordsMatchingRegion) {
|
|
let fetchedAttachment;
|
|
// Downloading attachments can fail.
|
|
try {
|
|
fetchedAttachment = await this.#client.attachments.download(record);
|
|
} catch (ex) {
|
|
lazy.logConsole.error("Could not download file:", ex);
|
|
this.#createTimerToPopulateMap();
|
|
return;
|
|
}
|
|
fileContents.push(fetchedAttachment.buffer);
|
|
}
|
|
ChromeUtils.addProfilerMarker(
|
|
"SERPCategorization.#clearAndPopulateStore",
|
|
start,
|
|
"Download attachments."
|
|
);
|
|
|
|
this.#version = this.#retrieveLatestVersion(recordsMatchingRegion);
|
|
if (!this.#version) {
|
|
lazy.logConsole.debug("Could not find a version number for any record.");
|
|
return;
|
|
}
|
|
|
|
await this.#store.insertFileContents(
|
|
fileContents,
|
|
this.#version,
|
|
isDefault
|
|
);
|
|
|
|
lazy.logConsole.debug("Finished updating domain-to-categories store.");
|
|
Services.obs.notifyObservers(
|
|
null,
|
|
"domain-to-categories-map-update-complete"
|
|
);
|
|
}
|
|
|
|
#cancelAndNullifyTimer() {
|
|
if (this.#downloadTimer) {
|
|
lazy.logConsole.debug("Cancel and nullify download timer.");
|
|
this.#downloadTimer.cancel();
|
|
this.#downloadTimer = null;
|
|
}
|
|
}
|
|
|
|
#createTimerToPopulateMap() {
|
|
if (
|
|
this.#downloadRetries >=
|
|
TELEMETRY_CATEGORIZATION_DOWNLOAD_SETTINGS.maxTriesPerSession ||
|
|
!this.#client
|
|
) {
|
|
return;
|
|
}
|
|
if (!this.#downloadTimer) {
|
|
this.#downloadTimer = Cc["@mozilla.org/timer;1"].createInstance(
|
|
Ci.nsITimer
|
|
);
|
|
}
|
|
lazy.logConsole.debug("Create timer to retry downloading attachments.");
|
|
let delay =
|
|
TELEMETRY_CATEGORIZATION_DOWNLOAD_SETTINGS.base +
|
|
randomInteger(
|
|
TELEMETRY_CATEGORIZATION_DOWNLOAD_SETTINGS.minAdjust,
|
|
TELEMETRY_CATEGORIZATION_DOWNLOAD_SETTINGS.maxAdjust
|
|
);
|
|
this.#downloadTimer.initWithCallback(
|
|
async () => {
|
|
this.#downloadRetries += 1;
|
|
let records = await this.#client.get();
|
|
try {
|
|
await this.#clearAndPopulateStore(records);
|
|
} catch (ex) {
|
|
lazy.logConsole.error("Error populating store: ", ex);
|
|
await this.uninit();
|
|
}
|
|
},
|
|
delay,
|
|
Ci.nsITimer.TYPE_ONE_SHOT
|
|
);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Handles the storage of data containing domains to categories.
|
|
*/
|
|
export class DomainToCategoriesStore {
|
|
#init = false;
|
|
|
|
/**
|
|
* The connection to the store.
|
|
*
|
|
* @type {object | null}
|
|
*/
|
|
#connection = null;
|
|
|
|
/**
|
|
* Reference for the shutdown blocker in case we need to remove it before
|
|
* shutdown.
|
|
*
|
|
* @type {Function | null}
|
|
*/
|
|
#asyncShutdownBlocker = null;
|
|
|
|
/**
|
|
* Whether the store is empty of data.
|
|
*
|
|
* @type {boolean}
|
|
*/
|
|
#empty = true;
|
|
|
|
/**
|
|
* For a particular subset of errors, we'll attempt to rebuild the database
|
|
* from scratch.
|
|
*/
|
|
#rebuildableErrors = ["NS_ERROR_FILE_CORRUPTED"];
|
|
|
|
/**
|
|
* Initializes the store. If the store is initialized it should have cached
|
|
* a connection to the store and ensured the store exists.
|
|
*/
|
|
async init() {
|
|
if (this.#init) {
|
|
return;
|
|
}
|
|
lazy.logConsole.debug("Initializing domain-to-categories store.");
|
|
|
|
// Attempts to cache a connection to the store.
|
|
// If a failure occured, try to re-build the store.
|
|
let rebuiltStore = false;
|
|
try {
|
|
await this.#initConnection();
|
|
} catch (ex1) {
|
|
lazy.logConsole.error(`Error initializing a connection: ${ex1}`);
|
|
if (this.#rebuildableErrors.includes(ex1.name)) {
|
|
try {
|
|
await this.#rebuildStore();
|
|
} catch (ex2) {
|
|
await this.#closeConnection();
|
|
lazy.logConsole.error(`Could not rebuild store: ${ex2}`);
|
|
return;
|
|
}
|
|
rebuiltStore = true;
|
|
}
|
|
}
|
|
|
|
// If we don't have a connection, bail because the browser could be
|
|
// shutting down ASAP, or re-creating the store is impossible.
|
|
if (!this.#connection) {
|
|
lazy.logConsole.debug(
|
|
"Bailing from DomainToCategoriesStore.init because connection doesn't exist."
|
|
);
|
|
return;
|
|
}
|
|
|
|
// If we weren't forced to re-build the store, we only have the connection.
|
|
// We want to ensure the store exists so calls to public methods can pass
|
|
// without throwing errors due to the absence of the store.
|
|
if (!rebuiltStore) {
|
|
try {
|
|
await this.#initSchema();
|
|
} catch (ex) {
|
|
lazy.logConsole.error(`Error trying to create store: ${ex}`);
|
|
await this.#closeConnection();
|
|
return;
|
|
}
|
|
}
|
|
|
|
lazy.logConsole.debug("Initialized domain-to-categories store.");
|
|
this.#init = true;
|
|
}
|
|
|
|
async uninit() {
|
|
if (this.#init) {
|
|
lazy.logConsole.debug("Un-initializing domain-to-categories store.");
|
|
await this.#closeConnection();
|
|
this.#asyncShutdownBlocker = null;
|
|
lazy.logConsole.debug("Un-initialized domain-to-categories store.");
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Whether the store has an open connection to the physical store.
|
|
*
|
|
* @returns {boolean}
|
|
*/
|
|
get ready() {
|
|
return this.#init;
|
|
}
|
|
|
|
/**
|
|
* Whether the store is devoid of data.
|
|
*
|
|
* @returns {boolean}
|
|
*/
|
|
get empty() {
|
|
return this.#empty;
|
|
}
|
|
|
|
/**
|
|
* Clears information in the store. If dropping data encountered a failure,
|
|
* try to delete the file containing the store and re-create it.
|
|
*
|
|
* @throws {Error} Will throw if it was unable to clear information from the
|
|
* store.
|
|
*/
|
|
async dropData() {
|
|
if (!this.#connection) {
|
|
return;
|
|
}
|
|
let tableExists = await this.#connection.tableExists(
|
|
CATEGORIZATION_SETTINGS.STORE_NAME
|
|
);
|
|
if (tableExists) {
|
|
lazy.logConsole.debug("Drop domain_to_categories.");
|
|
// This can fail if the permissions of the store are read-only.
|
|
await this.#connection.executeTransaction(async () => {
|
|
await this.#connection.execute(`DROP TABLE domain_to_categories`);
|
|
const createDomainToCategoriesTable = `
|
|
CREATE TABLE IF NOT EXISTS
|
|
domain_to_categories (
|
|
string_id
|
|
TEXT PRIMARY KEY NOT NULL,
|
|
categories
|
|
TEXT
|
|
);
|
|
`;
|
|
await this.#connection.execute(createDomainToCategoriesTable);
|
|
await this.#connection.execute(`DELETE FROM moz_meta`);
|
|
await this.#connection.executeCached(
|
|
`
|
|
INSERT INTO
|
|
moz_meta (key, value)
|
|
VALUES
|
|
(:key, :value)
|
|
ON CONFLICT DO UPDATE SET
|
|
value = :value
|
|
`,
|
|
{ key: "version", value: 0 }
|
|
);
|
|
});
|
|
|
|
this.#empty = true;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Given file contents, try moving them into the store. If a failure occurs,
|
|
* it will attempt to drop existing data to ensure callers aren't accessing
|
|
* a partially filled store.
|
|
*
|
|
* @param {Array<ArrayBuffer>} fileContents
|
|
* Contents to convert.
|
|
* @param {number} version
|
|
* The version for the store.
|
|
* @param {boolean} isDefault
|
|
* Whether the file contents are from a default collection.
|
|
* @throws {Error}
|
|
* Will throw if the insertion failed and dropData was unable to run
|
|
* successfully.
|
|
*/
|
|
async insertFileContents(fileContents, version, isDefault = false) {
|
|
if (!this.#init || !fileContents?.length || !version) {
|
|
return;
|
|
}
|
|
|
|
try {
|
|
await this.#insert(fileContents, version, isDefault);
|
|
} catch (ex) {
|
|
lazy.logConsole.error(`Could not insert file contents: ${ex}`);
|
|
await this.dropData();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Convenience function to make it trivial to insert Javascript objects into
|
|
* the store. This avoids having to set up the collection in Remote Settings.
|
|
*
|
|
* @param {object} domainToCategoriesMap
|
|
* An object whose keys should be hashed domains with values containing
|
|
* an array of integers.
|
|
* @param {number} version
|
|
* The version for the store.
|
|
* @param {boolean} isDefault
|
|
* Whether the mappings are from a default record.
|
|
* @returns {boolean}
|
|
* Whether the operation was successful.
|
|
*/
|
|
async insertObject(domainToCategoriesMap, version, isDefault) {
|
|
if (!Cu.isInAutomation || !this.#init) {
|
|
return false;
|
|
}
|
|
let buffer = new TextEncoder().encode(
|
|
JSON.stringify(domainToCategoriesMap)
|
|
).buffer;
|
|
await this.insertFileContents([buffer], version, isDefault);
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Retrieves domains mapped to the key.
|
|
*
|
|
* @param {string} key
|
|
* The value to lookup in the store.
|
|
* @returns {Array<number>}
|
|
* An array of numbers corresponding to the category and score. If the key
|
|
* does not exist in the store or the store is having issues retrieving the
|
|
* value, returns an empty array.
|
|
*/
|
|
async getCategories(key) {
|
|
if (!this.#init) {
|
|
return [];
|
|
}
|
|
|
|
let rows;
|
|
try {
|
|
rows = await this.#connection.executeCached(
|
|
`
|
|
SELECT
|
|
categories
|
|
FROM
|
|
domain_to_categories
|
|
WHERE
|
|
string_id = :key
|
|
`,
|
|
{
|
|
key,
|
|
}
|
|
);
|
|
} catch (ex) {
|
|
lazy.logConsole.error(`Could not retrieve from the store: ${ex}`);
|
|
return [];
|
|
}
|
|
|
|
if (!rows.length) {
|
|
return [];
|
|
}
|
|
return JSON.parse(rows[0].getResultByName("categories")) ?? [];
|
|
}
|
|
|
|
/**
|
|
* Retrieves the version number of the store.
|
|
*
|
|
* @returns {number}
|
|
* The version number. Returns 0 if the version was never set or if there
|
|
* was an issue accessing the version number.
|
|
*/
|
|
async getVersion() {
|
|
if (this.#connection) {
|
|
let rows;
|
|
try {
|
|
rows = await this.#connection.executeCached(
|
|
`
|
|
SELECT
|
|
value
|
|
FROM
|
|
moz_meta
|
|
WHERE
|
|
key = "version"
|
|
`
|
|
);
|
|
} catch (ex) {
|
|
lazy.logConsole.error(`Could not retrieve version of the store: ${ex}`);
|
|
return 0;
|
|
}
|
|
if (rows.length) {
|
|
return parseInt(rows[0].getResultByName("value")) ?? 0;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Whether the data inside the store was derived from a default set of
|
|
* records.
|
|
*
|
|
* @returns {boolean}
|
|
*/
|
|
async isDefault() {
|
|
if (this.#connection) {
|
|
let rows;
|
|
try {
|
|
rows = await this.#connection.executeCached(
|
|
`
|
|
SELECT
|
|
value
|
|
FROM
|
|
moz_meta
|
|
WHERE
|
|
key = "is_default"
|
|
`
|
|
);
|
|
} catch (ex) {
|
|
lazy.logConsole.error(
|
|
`Could not retrieve if the store is using default records: ${ex}`
|
|
);
|
|
return false;
|
|
}
|
|
if (rows.length && parseInt(rows[0].getResultByName("value")) == 1) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Test only function allowing tests to delete the store.
|
|
*/
|
|
async testDelete() {
|
|
if (Cu.isInAutomation) {
|
|
await this.#closeConnection();
|
|
await this.#delete();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* If a connection is available, close it and remove shutdown blockers.
|
|
*/
|
|
async #closeConnection() {
|
|
this.#init = false;
|
|
this.#empty = true;
|
|
if (this.#asyncShutdownBlocker) {
|
|
lazy.Sqlite.shutdown.removeBlocker(this.#asyncShutdownBlocker);
|
|
this.#asyncShutdownBlocker = null;
|
|
}
|
|
|
|
if (this.#connection) {
|
|
lazy.logConsole.debug("Closing connection.");
|
|
// An error could occur while closing the connection. We suppress the
|
|
// error since it is not a critical part of the browser.
|
|
try {
|
|
await this.#connection.close();
|
|
} catch (ex) {
|
|
lazy.logConsole.error(ex);
|
|
}
|
|
this.#connection = null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Initialize the schema for the store.
|
|
*
|
|
* @throws {Error}
|
|
* Will throw if a permissions error prevents creating the store.
|
|
*/
|
|
async #initSchema() {
|
|
if (!this.#connection) {
|
|
return;
|
|
}
|
|
lazy.logConsole.debug("Create store.");
|
|
// Creation can fail if the store is read only.
|
|
await this.#connection.executeTransaction(async () => {
|
|
// Let outer try block handle the exception.
|
|
const createDomainToCategoriesTable = `
|
|
CREATE TABLE IF NOT EXISTS
|
|
domain_to_categories (
|
|
string_id
|
|
TEXT PRIMARY KEY NOT NULL,
|
|
categories
|
|
TEXT
|
|
) WITHOUT ROWID;
|
|
`;
|
|
await this.#connection.execute(createDomainToCategoriesTable);
|
|
const createMetaTable = `
|
|
CREATE TABLE IF NOT EXISTS
|
|
moz_meta (
|
|
key
|
|
TEXT PRIMARY KEY NOT NULL,
|
|
value
|
|
INTEGER
|
|
) WITHOUT ROWID;
|
|
`;
|
|
await this.#connection.execute(createMetaTable);
|
|
await this.#connection.setSchemaVersion(
|
|
CATEGORIZATION_SETTINGS.STORE_SCHEMA
|
|
);
|
|
});
|
|
|
|
let rows = await this.#connection.executeCached(
|
|
"SELECT count(*) = 0 FROM domain_to_categories"
|
|
);
|
|
this.#empty = !!rows[0].getResultByIndex(0);
|
|
}
|
|
|
|
/**
|
|
* Attempt to delete the store.
|
|
*
|
|
* @throws {Error}
|
|
* Will throw if the permissions for the file prevent its deletion.
|
|
*/
|
|
async #delete() {
|
|
lazy.logConsole.debug("Attempt to delete the store.");
|
|
try {
|
|
await IOUtils.remove(
|
|
PathUtils.join(
|
|
PathUtils.profileDir,
|
|
CATEGORIZATION_SETTINGS.STORE_FILE
|
|
),
|
|
{ ignoreAbsent: true }
|
|
);
|
|
} catch (ex) {
|
|
lazy.logConsole.error(ex);
|
|
}
|
|
this.#empty = true;
|
|
lazy.logConsole.debug("Store was deleted.");
|
|
}
|
|
|
|
/**
|
|
* Tries to establish a connection to the store.
|
|
*
|
|
* @throws {Error}
|
|
* Will throw if there was an issue establishing a connection or adding
|
|
* adding a shutdown blocker.
|
|
*/
|
|
async #initConnection() {
|
|
if (this.#connection) {
|
|
return;
|
|
}
|
|
|
|
// This could fail if the store is corrupted.
|
|
this.#connection = await lazy.Sqlite.openConnection({
|
|
path: PathUtils.join(
|
|
PathUtils.profileDir,
|
|
CATEGORIZATION_SETTINGS.STORE_FILE
|
|
),
|
|
});
|
|
|
|
await this.#connection.execute("PRAGMA journal_mode = TRUNCATE");
|
|
|
|
this.#asyncShutdownBlocker = async () => {
|
|
await this.#connection.close();
|
|
this.#connection = null;
|
|
};
|
|
|
|
// This could fail if we're adding it during shutdown. In this case,
|
|
// don't throw but close the connection.
|
|
try {
|
|
lazy.Sqlite.shutdown.addBlocker(
|
|
"SERPCategorization:DomainToCategoriesSqlite closing",
|
|
this.#asyncShutdownBlocker
|
|
);
|
|
} catch (ex) {
|
|
lazy.logConsole.error(ex);
|
|
await this.#closeConnection();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Inserts into the store.
|
|
*
|
|
* @param {Array<ArrayBuffer>} fileContents
|
|
* The data that should be converted and inserted into the store.
|
|
* @param {number} version
|
|
* The version number that should be inserted into the store.
|
|
* @param {boolean} isDefault
|
|
* Whether the file contents are a default set of records.
|
|
* @throws {Error}
|
|
* Will throw if a connection is not present, if the store is not
|
|
* able to be updated (permissions error, corrupted file), or there is
|
|
* something wrong with the file contents.
|
|
*/
|
|
async #insert(fileContents, version, isDefault) {
|
|
let start = Cu.now();
|
|
await this.#connection.executeTransaction(async () => {
|
|
lazy.logConsole.debug("Insert into domain_to_categories table.");
|
|
for (let fileContent of fileContents) {
|
|
await this.#connection.executeCached(
|
|
`
|
|
INSERT INTO
|
|
domain_to_categories (string_id, categories)
|
|
SELECT
|
|
json_each.key AS string_id,
|
|
json_each.value AS categories
|
|
FROM
|
|
json_each(json(:obj))
|
|
`,
|
|
{
|
|
obj: new TextDecoder().decode(fileContent),
|
|
}
|
|
);
|
|
}
|
|
// Once the insertions have successfully completed, update the version.
|
|
await this.#connection.executeCached(
|
|
`
|
|
INSERT INTO
|
|
moz_meta (key, value)
|
|
VALUES
|
|
(:key, :value)
|
|
ON CONFLICT DO UPDATE SET
|
|
value = :value
|
|
`,
|
|
{ key: "version", value: version }
|
|
);
|
|
if (isDefault) {
|
|
await this.#connection.executeCached(
|
|
`
|
|
INSERT INTO
|
|
moz_meta (key, value)
|
|
VALUES
|
|
(:key, :value)
|
|
ON CONFLICT DO UPDATE SET
|
|
value = :value
|
|
`,
|
|
{ key: "is_default", value: 1 }
|
|
);
|
|
}
|
|
});
|
|
ChromeUtils.addProfilerMarker(
|
|
"DomainToCategoriesSqlite.#insert",
|
|
start,
|
|
"Move file contents into table."
|
|
);
|
|
|
|
if (fileContents?.length) {
|
|
this.#empty = false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Deletes and re-build's the store. Used in cases where we encounter a
|
|
* failure and we want to try fixing the error by starting with an
|
|
* entirely fresh store.
|
|
*
|
|
* @throws {Error}
|
|
* Will throw if a connection could not be established, if it was
|
|
* unable to delete the store, or it was unable to build a new store.
|
|
*/
|
|
async #rebuildStore() {
|
|
lazy.logConsole.debug("Try rebuilding store.");
|
|
// Step 1. Close all connections.
|
|
await this.#closeConnection();
|
|
|
|
// Step 2. Delete the existing store.
|
|
await this.#delete();
|
|
|
|
// Step 3. Re-establish the connection.
|
|
await this.#initConnection();
|
|
|
|
// Step 4. If a connection exists, try creating the store.
|
|
await this.#initSchema();
|
|
}
|
|
}
|
|
|
|
function randomInteger(min, max) {
|
|
return Math.floor(Math.random() * (max - min + 1)) + min;
|
|
}
|
|
|
|
export var SERPDomainToCategoriesMap = new DomainToCategoriesMap();
|
|
export var SERPCategorization = new Categorizer();
|
|
export var SERPCategorizationRecorder = new CategorizationRecorder();
|
|
export var SERPCategorizationEventScheduler =
|
|
new CategorizationEventScheduler();
|