Skip to content

Commit

Permalink
update ngram and qgram with constructor
Browse files Browse the repository at this point in the history
  • Loading branch information
sumn2u committed Apr 10, 2024
1 parent c571430 commit 82e1835
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 75 deletions.
28 changes: 17 additions & 11 deletions lib/nGram.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,23 @@
*/
class Ngram {
/**
* @method calculateSimilarity
* @constructor
* @param {number} n - The size of the n-grams to use for similarity calculation. Defaults to 2.
*/
constructor(n = 2) {
this.n = n;
}

/**
* @method calculateSimilarity
* @param {string} str1 - The first string to compare.
* @param {string} str2 - The second string to compare.
* @returns {number} - A number between 0 and 1 representing the similarity between the two strings.
* @description Calculates the similarity between two strings using n-grams.
*/
static calculateSimilarity(n = 2, str1, str2) {
const ngrams1 = this.getNgrams(str1, n);
const ngrams2 = this.getNgrams(str2, n);
calculateSimilarity(str1, str2) {
const ngrams1 = this.getNgrams(str1);
const ngrams2 = this.getNgrams(str2);
const intersection = this.getIntersection(ngrams1, ngrams2);
const union = this.getUnion(ngrams1, ngrams2);

Expand All @@ -23,14 +30,13 @@ class Ngram {
/**
* @method getNgrams
* @param {string} str - The string to generate n-grams from.
* @param {number} n - The size of the n-grams.
* @returns {string[]} - An array of n-grams from the input string.
* @description Generates all n-grams from the input string.
*/
static getNgrams(str, n) {
getNgrams(str) {
const ngrams = [];
for (let i = 0; i < str.length - n + 1; i++) {
ngrams.push(str.substring(i, i + n));
for (let i = 0; i < str.length - this.n + 1; i++) {
ngrams.push(str.substring(i, i + this.n));
}
return ngrams;
}
Expand All @@ -42,7 +48,7 @@ class Ngram {
* @returns {string[]} - An array of n-grams that are present in both sets.
* @description Gets the intersection of two sets of n-grams.
*/
static getIntersection(set1, set2) {
getIntersection(set1, set2) {
return set1.filter((ngram) => set2.includes(ngram));
}

Expand All @@ -53,9 +59,9 @@ class Ngram {
* @returns {string[]} - An array of all unique n-grams from both sets.
* @description Gets the union of two sets of n-grams.
*/
static getUnion(set1, set2) {
getUnion(set1, set2) {
return [...new Set([...set1, ...set2])];
}
}

export default Ngram;
113 changes: 59 additions & 54 deletions lib/qGram.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,60 +3,65 @@
* @description This class calculates the similarity between two strings using q-grams.
*/
class Qgram {
/**
* @method calculateSimilarity
* @param {number} q - The size of the q-grams to use for similarity calculation. Defaults to 2.
* @param {string} str1 - The first string to compare.
* @param {string} str2 - The second string to compare.
* @returns {number} - A number between 0 and 1 representing the similarity between the two strings.
* @description Calculates the similarity between two strings using q-grams.
*/
static calculateSimilarity(q = 2, str1, str2) {
const qgrams1 = this.getQgrams(str1, q);
const qgrams2 = this.getQgrams(str2, q);
const intersection = this.getIntersection(qgrams1, qgrams2);
const union = this.getUnion(qgrams1, qgrams2);

return intersection.length / union.length;
}

/**
* @method getQgrams
* @param {string} str - The string to generate q-grams from.
* @param {number} q - The size of the q-grams.
* @returns {string[]} - An array of q-grams from the input string.
* @description Generates all q-grams from the input string.
*/
static getQgrams(str, q) {
const qgrams = [];
for (let i = 0; i < str.length - q + 1; i++) {
qgrams.push(str.substring(i, i + q));
}
return qgrams;
}

/**
* @method getIntersection
* @param {string[]} set1 - The first set of q-grams.
* @param {string[]} set2 - The second set of q-grams.
* @returns {string[]} - An array of q-grams that are present in both sets.
* @description Gets the intersection of two sets of q-grams.
*/
static getIntersection(set1, set2) {
return set1.filter((qgram) => set2.includes(qgram));
}

/**
* @method getUnion
* @param {string[]} set1 - The first set of q-grams.
* @param {string[]} set2 - The second set of q-grams.
* @returns {string[]} - An array of all unique q-grams from both sets.
* @description Gets the union of two sets of q-grams.
*/
static getUnion(set1, set2) {
return [...new Set([...set1, ...set2])];
/**
* @constructor
* @param {number} q - The size of the q-grams to use for similarity calculation. Defaults to 2.
*/
constructor(q = 2) {
this.q = q;
}

/**
* @method calculateSimilarity
* @param {string} str1 - The first string to compare.
* @param {string} str2 - The second string to compare.
* @returns {number} - A number between 0 and 1 representing the similarity between the two strings.
* @description Calculates the similarity between two strings using q-grams.
*/
calculateSimilarity(str1, str2) {
const qgrams1 = this.getQgrams(str1);
const qgrams2 = this.getQgrams(str2);
const intersection = this.getIntersection(qgrams1, qgrams2);
const union = this.getUnion(qgrams1, qgrams2);

return intersection.length / union.length;
}

/**
* @method getQgrams
* @param {string} str - The string to generate q-grams from.
* @returns {string[]} - An array of q-grams from the input string.
* @description Generates all q-grams from the input string.
*/
getQgrams(str) {
const qgrams = [];
for (let i = 0; i < str.length - this.q + 1; i++) {
qgrams.push(str.substring(i, i + this.q));
}
return qgrams;
}

/**
* @method getIntersection
* @param {string[]} set1 - The first set of q-grams.
* @param {string[]} set2 - The second set of q-grams.
* @returns {string[]} - An array of q-grams that are present in both sets.
* @description Gets the intersection of two sets of q-grams.
*/
getIntersection(set1, set2) {
return set1.filter((qgram) => set2.includes(qgram));
}

/**
* @method getUnion
* @param {string[]} set1 - The first set of q-grams.
* @param {string[]} set2 - The second set of q-grams.
* @returns {string[]} - An array of all unique q-grams from both sets.
* @description Gets the union of two sets of q-grams.
*/
getUnion(set1, set2) {
return [...new Set([...set1, ...set2])];
}
}

export default Qgram;
export default Qgram;
12 changes: 7 additions & 5 deletions test/nGram.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,20 @@ import Ngram from '../lib/nGram';

describe("Ngram", function () {
it("calculates similarity between two strings", function () {
const similarity = Ngram.calculateSimilarity(2, "hello world", "world hello");
const ngram = new Ngram();
const similarity = ngram.calculateSimilarity("hello world", "world hello");
expect(similarity).toBeGreaterThan(0.5); // Allow for slight variations due to ordering
});

it("handles different n-gram sizes", function () {
const similarity = Ngram.calculateSimilarity(3, "computer science", "computational sciences");
const trigram = new Ngram(3);
const similarity = trigram.calculateSimilarity("computer science", "computational sciences");
expect(similarity).toBeGreaterThan(0.4); // Adjust threshold based on n-gram size
});

it("returns 0 for completely dissimilar strings", function () {
const similarity = Ngram.calculateSimilarity(2, "apple banana", "cat dog");
const ngram = new Ngram();
const similarity = ngram.calculateSimilarity("apple banana", "cat dog");
expect(similarity).toEqual(0);
});
});

});
13 changes: 8 additions & 5 deletions test/qGram.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,20 @@ import Qgram from '../lib/qGram';

describe("Qgram", function () {
it("calculates similarity between two strings", function () {
const similarity = Qgram.calculateSimilarity(2, "hello world", "world hello");
expect(similarity).toBeGreaterThan(0.5); // Allow for slight variations due to ordering
const qgram = new Qgram(2); // Create a Qgram object with q=2
const similarity = qgram.calculateSimilarity("hello world", "world hello");
expect(similarity).toBeGreaterThan(0);
});

it("handles different q-gram sizes", function () {
const similarity = Qgram.calculateSimilarity(3, "computer science", "computational sciences");
expect(similarity).toBeGreaterThan(0.4); // Adjust threshold based on q-gram size
const qgram = new Qgram(3); // Create a Qgram object with q=3
const similarity = qgram.calculateSimilarity("computer science", "computational sciences");
expect(similarity).toBeGreaterThan(0.4);
});

it("returns 0 for completely dissimilar strings", function () {
const similarity = Qgram.calculateSimilarity(2, "apple banana", "cat dog");
const qgram = new Qgram(2); // Create a Qgram object with q=2
const similarity = qgram.calculateSimilarity("apple banana", "cat dog");
expect(similarity).toEqual(0);
});
});
Expand Down

0 comments on commit 82e1835

Please sign in to comment.