diff --git a/src/core/config/Categories.js b/src/core/config/Categories.js index 2038ad50e..187cb4057 100755 --- a/src/core/config/Categories.js +++ b/src/core/config/Categories.js @@ -189,6 +189,7 @@ const Categories = [ "Find / Replace", "Regular expression", "Offset checker", + "Hamming Distance", "Convert distance", "Convert area", "Convert mass", diff --git a/src/core/config/OperationConfig.js b/src/core/config/OperationConfig.js index 8b3b61ffd..41fbfe077 100755 --- a/src/core/config/OperationConfig.js +++ b/src/core/config/OperationConfig.js @@ -3913,6 +3913,29 @@ const OperationConfig = { } ] }, + "Hamming Distance": { + module: "Default", + description: "In information theory, the Hamming distance between two strings of equal length is the number of positions at which the corresponding symbols are different. In other words, it measures the minimum number of substitutions required to change one string into the other, or the minimum number of errors that could have transformed one string into the other. In a more general context, the Hamming distance is one of several string metrics for measuring the edit distance between two sequences.", + inputType: "string", + outputType: "string", + args: [ + { + name: "Delimiter", + type: "binaryShortString", + value: StrUtils.HAMMING_DELIM + }, + { + name: "Unit", + type: "option", + value: StrUtils.HAMMING_UNIT + }, + { + name: "Input type", + type: "option", + value: StrUtils.HAMMING_INPUT_TYPE + } + ] + } }; diff --git a/src/core/config/modules/Default.js b/src/core/config/modules/Default.js index b36e00aa6..3963d9430 100644 --- a/src/core/config/modules/Default.js +++ b/src/core/config/modules/Default.js @@ -110,6 +110,7 @@ OpModules.Default = { "Unescape string": StrUtils.runUnescape, "Head": StrUtils.runHead, "Tail": StrUtils.runTail, + "Hamming Distance": StrUtils.runHamming, "Remove whitespace": Tidy.runRemoveWhitespace, "Remove null bytes": Tidy.runRemoveNulls, "Drop bytes": Tidy.runDropBytes, diff --git a/src/core/operations/StrUtils.js b/src/core/operations/StrUtils.js index 23b5eb26d..dd15b3277 100755 --- a/src/core/operations/StrUtils.js +++ b/src/core/operations/StrUtils.js @@ -509,6 +509,75 @@ const StrUtils = { }, + /** + * @constant + * @default + */ + HAMMING_DELIM: "\\n\\n", + /** + * @constant + * @default + */ + HAMMING_INPUT_TYPE: ["Raw string", "Hex"], + /** + * @constant + * @default + */ + HAMMING_UNIT: ["Byte", "Bit"], + + /** + * Hamming Distance operation. + * + * @author GCHQ Contributor [2] + * + * @param {string} input + * @param {Object[]} args + * @returns {string} + */ + runHamming: function(input, args) { + const delim = args[0], + byByte = args[1] === "Byte", + inputType = args[2], + samples = input.split(delim); + + if (samples.length !== 2) { + return "Error: You can only calculae the edit distance between 2 strings. Please ensure exactly two inputs are provided, separated by the specified delimiter."; + } + + if (samples[0].length !== samples[1].length) { + return "Error: Both inputs must be of the same length."; + } + + if (inputType === "Hex") { + samples[0] = Utils.fromHex(samples[0]); + samples[1] = Utils.fromHex(samples[1]); + } else { + samples[0] = Utils.strToByteArray(samples[0]); + samples[1] = Utils.strToByteArray(samples[1]); + } + + let dist = 0; + + for (let i = 0; i < samples[0].length; i++) { + const lhs = samples[0][i], + rhs = samples[1][i]; + + if (byByte && lhs !== rhs) { + dist++; + } else if (!byByte) { + let xord = lhs ^ rhs; + + while (xord) { + dist++; + xord &= xord - 1; + } + } + } + + return dist.toString(); + }, + + /** * Adds HTML highlights to matches within a string. *