返回可能的百分比

小开

PHP.js 库的函数 similar_text怎么样？

它基于一个带同一个名字的 PHP 函数。

function similar_text (first, second) {
// Calculates the similarity between two strings
// discuss at: http://phpjs.org/functions/similar_text


if (first === null || second === null || typeof first === 'undefined' || typeof second === 'undefined') {
return 0;
}


first += '';
second += '';


var pos1 = 0,
pos2 = 0,
max = 0,
firstLength = first.length,
secondLength = second.length,
p, q, l, sum;


max = 0;


for (p = 0; p < firstLength; p++) {
for (q = 0; q < secondLength; q++) {
for (l = 0;
(p + l < firstLength) && (q + l < secondLength) && (first.charAt(p + l) === second.charAt(q + l)); l++);
if (l > max) {
max = l;
pos1 = p;
pos2 = q;
}
}
}


sum = max;


if (sum) {
if (pos1 && pos2) {
sum += this.similar_text(first.substr(0, pos2), second.substr(0, pos2));
}


if ((pos1 + max < firstLength) && (pos2 + max < secondLength)) {
sum += this.similar_text(first.substr(pos1 + max, firstLength - pos1 - max), second.substr(pos2 + max, secondLength - pos2 - max));
}
}


return sum;
}

小开

只有一个我很快写出来的，可能对你的目的足够好:

function Compare(strA,strB){
for(var result = 0, i = strA.length; i--;){
if(typeof strB[i] == 'undefined' || strA[i] == strB[i]);
else if(strA[i].toLowerCase() == strB[i].toLowerCase())
result++;
else
result += 4;
}
return 1 - (result + 4*Math.abs(strA.length - strB.length))/(2*(strA.length+strB.length));
}

这会对相同但大小写不同的字符进行权衡，其重量相当于完全不同或缺少的字符的四分之一。它返回一个介于0和1之间的数字，这意味着字符串是相同的。0意味着它们没有相似之处。例子:

Compare("Apple", "Apple")    // 1
Compare("Apples", "Apple")   // 0.8181818181818181
Compare("Apples", "apple")   // 0.7727272727272727
Compare("a", "A")            // 0.75
Compare("Apples", "appppp")  // 0.45833333333333337
Compare("a", "b")            // 0

小开

下面是一个非常简单的函数，它进行比较并根据等效性返回一个百分比。虽然还没有对所有可能的场景进行测试，但它可能会帮助您开始。

function similar(a,b) {
var equivalency = 0;
var minLength = (a.length > b.length) ? b.length : a.length;
var maxLength = (a.length < b.length) ? b.length : a.length;
for(var i = 0; i < minLength; i++) {
if(a[i] == b[i]) {
equivalency++;
}
}
    



var weight = equivalency / maxLength;
return (weight * 100) + "%";
}
alert(similar("test","tes"));   // 75%
alert(similar("test","test"));  // 100%
alert(similar("test","testt")); // 80%
alert(similar("test","tess"));  // 75%

小开

Here's an answer based on Levenshtein distance https://en.wikipedia.org/wiki/Levenshtein_distance

function similarity(s1, s2) {
var longer = s1;
var shorter = s2;
if (s1.length < s2.length) {
longer = s2;
shorter = s1;
}
var longerLength = longer.length;
if (longerLength == 0) {
return 1.0;
}
return (longerLength - editDistance(longer, shorter)) / parseFloat(longerLength);
}

用于计算编辑距离

function editDistance(s1, s2) {
s1 = s1.toLowerCase();
s2 = s2.toLowerCase();


var costs = new Array();
for (var i = 0; i <= s1.length; i++) {
var lastValue = i;
for (var j = 0; j <= s2.length; j++) {
if (i == 0)
costs[j] = j;
else {
if (j > 0) {
var newValue = costs[j - 1];
if (s1.charAt(i - 1) != s2.charAt(j - 1))
newValue = Math.min(Math.min(newValue, lastValue),
costs[j]) + 1;
costs[j - 1] = lastValue;
lastValue = newValue;
}
}
}
if (i > 0)
costs[s2.length] = lastValue;
}
return costs[s2.length];
}

用法

similarity('Stack Overflow','Stack Ovrflw')

returns 0.8571428571428571

你可以在下面玩这个游戏:

function checkSimilarity(){
var str1 = document.getElementById("lhsInput").value;
var str2 = document.getElementById("rhsInput").value;
document.getElementById("output").innerHTML = similarity(str1, str2);
}


function similarity(s1, s2) {
var longer = s1;
var shorter = s2;
if (s1.length < s2.length) {
longer = s2;
shorter = s1;
}
var longerLength = longer.length;
if (longerLength == 0) {
return 1.0;
}
return (longerLength - editDistance(longer, shorter)) / parseFloat(longerLength);
}


function editDistance(s1, s2) {
s1 = s1.toLowerCase();
s2 = s2.toLowerCase();


var costs = new Array();
for (var i = 0; i <= s1.length; i++) {
var lastValue = i;
for (var j = 0; j <= s2.length; j++) {
if (i == 0)
costs[j] = j;
else {
if (j > 0) {
var newValue = costs[j - 1];
if (s1.charAt(i - 1) != s2.charAt(j - 1))
newValue = Math.min(Math.min(newValue, lastValue),
costs[j]) + 1;
costs[j - 1] = lastValue;
lastValue = newValue;
}
}
}
if (i > 0)
costs[s2.length] = lastValue;
}
return costs[s2.length];
}

<div><label for="lhsInput">String 1:</label> <input type="text" id="lhsInput" oninput="checkSimilarity()" /></div>
<div><label for="rhsInput">String 2:</label> <input type="text" id="rhsInput" oninput="checkSimilarity()" /></div>
<div>Match: <span id="output">No Input</span></div>

小开

使用 < a href = “ https://www.npmjs.com/package/string-same”rel = “ noReferrer”> this 库的字符串相似性工作就像一个魅力为我！

这里的例子-

var similarity = stringSimilarity.compareTwoStrings("Apples","apple");    // => 0.88

小开

Fuzzyset -用于 javascript 的模糊字符串集。 Fuzzyset 是一种数据结构，它对数据执行类似于全文搜索的操作，以确定可能的拼写错误和字符串近似匹配。注意，这是一个 python 库的 javascript 端口。

小开

查找两个字符串之间的相似程度; 我们可以使用一个或两个以上的方法，但我主要倾向于使用 'Dice's Coefficient'。哪个更好！以我的知识而不是使用 “一个莱文斯坦距离 =”https://en.wikipedia.org/wiki/Levenshtein _ length“ rel =”noReferrer“ >

使用这个来自 npm 的“ 字符串相似性”包，您将能够处理我上面所说的内容。

一些简单的使用例子是

var stringSimilarity = require('string-similarity');


var similarity = stringSimilarity.compareTwoStrings('healed', 'sealed');


var matches = stringSimilarity.findBestMatch('healed', ['edward', 'sealed', 'theatre']);

如欲了解更多，请访问上面给出的链接。谢谢。

小开

在某种程度上，我喜欢嵌入在字符串相似性模块中的骰子系数思想。但是我觉得仅仅考虑双字符串而不考虑它们的多样性是缺少一些重要数据的。下面是一个也处理多重性的版本，我认为总的来说是一个更简单的实现。我没有尝试使用他们的 API，只是提供了一个函数，比较两个字符串经过一些操作(删除非字母数字字符，小写的一切，压缩但不删除空格) ，建立在一个比较它们没有操作。在他们的 API 中重新包装这个应用程序是很容易的，但是我认为没有什么必要。

const stringSimilarity = (a, b) =>
_stringSimilarity (prep (a), prep (b))


const _stringSimilarity = (a, b) => {
const bg1 = bigrams (a)
const bg2 = bigrams (b)
const c1 = count (bg1)
const c2 = count (bg2)
const combined = uniq ([... bg1, ... bg2])
.reduce ((t, k) => t + (Math .min (c1 [k] || 0, c2 [k] || 0)), 0)
return 2 * combined / (bg1 .length + bg2 .length)
}


const prep = (str) => // TODO: unicode support?
str .toLowerCase () .replace (/[^\w\s]/g, ' ') .replace (/\s+/g, ' ')


const bigrams = (str) =>
[...str] .slice (0, -1) .map ((c, i) => c + str [i + 1])


const count = (xs) =>
xs .reduce ((a, x) => ((a [x] = (a [x] || 0) + 1), a), {})


const uniq = (xs) =>
[... new Set (xs)]


console .log (stringSimilarity (
'foobar',
'Foobar'
)) //=> 1


console .log (stringSimilarity (
"healed",
"sealed"
))//=> 0.8


console .log (stringSimilarity (
"Olive-green table for sale, in extremely good condition.",
"For sale: table in very good  condition, olive green in colour."
)) //=> 0.7787610619469026


console .log (stringSimilarity (
"Olive-green table for sale, in extremely good condition.",
"For sale: green Subaru Impreza, 210,000 miles"
)) //=> 0.38636363636363635


console .log (stringSimilarity (
"Olive-green table for sale, in extremely good condition.",
"Wanted: mountain bike with at least 21 gears."
)) //=> 0.1702127659574468


console .log (stringSimilarity (
"The rain in Spain falls mainly on the plain.",
"The run in Spun falls munly on the plun.",
)) //=> 0.7560975609756098


console .log (stringSimilarity (
"Fa la la la la, la la la la",
"Fa la la la la, la la",
)) //=> 0.8636363636363636


console .log (stringSimilarity (
"car crash",
"carcrash",
)) //=> 0.8


console .log (stringSimilarity (
"Now is the time for all good men to come to the aid of their party.",
"Huh?",
)) //=> 0

.as-console-wrapper {max-height: 100% !important; top: 0}

Some of the test cases are from string-similarity, others are my own. They show some significant differences from that package, but nothing untoward. The only one I would call out is the difference between "car crash" and "carcrash", which string-similarity sees as identical and I report with a similarity of 0.8. My version finds more similarity in all the olive-green test-cases than does string-similarity, but as these are in any case fairly arbitrary numbers, I'm not sure how much difference it makes; they certainly position them in the same relative order.

小开

字符串相似性 lib vs Top answer(by@overloard1234) 性能比较，你可以在下面找到

根据@Tushar Walzade 关于使用字符串相似性库的建议，您可以发现，例如

stringSimilatityLib.findBestMatch('KIA','Kia').bestMatch.rating

将返回0.0

看起来像 最好用小写字母来比较。

更好的基本用法(对于数组) :

findBestMatch(str, strArr) {
const lowerCaseArr = strArr.map(element => element.toLowerCase());//creating lower case array
const match = stringSimilatityLib.findBestMatch(str.toLowerCase(), lowerCaseArr).bestMatch; //trying to find bestMatch
if (match.rating > 0) {
const foundIndex = lowerCaseArr.findIndex(x => x === match.target); //finding the index of found best case
return strArr[foundIndex]; //returning initial value from array
}
return null;
},

Performance

另外，我比较了这里的最佳答案(由@overloard1234提供)和字符串相似性库(v4.0.4)。

你可以在这里找到结果: https://jsbench.me/szkzojoskq/1

结果 : 字符串相似性快了 ~ 两倍

只是为了好玩: 2.0版的字符串相似性库比最新的4.0.4版慢大约2.2倍。因此，如果您仍在使用 < 3.0:) ，请更新它

小开

  const str1 = " pARTH PARmar r  ";
const str2 = "  parmar r par     ";


function calculateSimilarity(str1 = "", str2 = "") {
let longer = str1.trim();
let shorter = str2.trim();


let a1 = longer.toLowerCase().split(" ");
let b1 = shorter.toLowerCase().split(" ");
let result = a1.every((aa, i) => aa[0] === b1[i][0]);


if (longer.length < shorter.length)  [longer,shorter] = [shorter,longer];


var arr = [];
let count = 0;
for(var i = 0;i<longer.length;i++){
if(shorter && shorter.includes(longer[i])) {
shorter = shorter.replace(longer[i],"")
count++
};
}


return {
score : (count*100)/longer.length,
result
}
}


console.log(calculateSimilarity(str1, str2));

小开

我使用了@overord1234函数，但纠正了 ь: ''，因为英语单词没有这个字母，接下来需要 return a[char] ?? char而不是 return a[char] || char