|
|
@ -1,55 +1,38 @@ |
|
|
|
const express = require('express'); |
|
|
|
const bodyParser = require('body-parser'); |
|
|
|
const vptree = require('vptree'); // Assuming you have the vptree library available
|
|
|
|
const vptree = require('vptree'); |
|
|
|
const similarity = require('compute-cosine-similarity'); |
|
|
|
const fs = require('fs'); |
|
|
|
const path = require('path'); |
|
|
|
|
|
|
|
const app = express(); |
|
|
|
const port = 3000; |
|
|
|
const port = 2000; |
|
|
|
|
|
|
|
app.use(bodyParser.json()); |
|
|
|
|
|
|
|
// Function to load JSON data
|
|
|
|
function loadJSON(filePath, callback) { |
|
|
|
fs.readFile(filePath, 'utf8', (err, data) => { |
|
|
|
if (err) { |
|
|
|
return callback(err, null); |
|
|
|
} |
|
|
|
try { |
|
|
|
const jsonData = JSON.parse(data); |
|
|
|
callback(null, jsonData); |
|
|
|
} catch (parseError) { |
|
|
|
callback(parseError, null); |
|
|
|
} |
|
|
|
}); |
|
|
|
} |
|
|
|
|
|
|
|
var dataset = [ |
|
|
|
'culture', |
|
|
|
'democracy', |
|
|
|
'metaphor', |
|
|
|
'irony', |
|
|
|
'hypothesis', |
|
|
|
'science', |
|
|
|
'fastuous', |
|
|
|
'integrity', |
|
|
|
'synonym', |
|
|
|
'empathy' // and on and on...
|
|
|
|
]; |
|
|
|
|
|
|
|
// Example Levenshtein distance function
|
|
|
|
function levenshteinDistance(a, b) { |
|
|
|
// Implement or use an existing Levenshtein distance function
|
|
|
|
// Here's a simple implementation:
|
|
|
|
const d = []; |
|
|
|
const alen = a.length; |
|
|
|
const blen = b.length; |
|
|
|
|
|
|
|
for (let i = 0; i <= alen; i++) d[i] = [i]; |
|
|
|
for (let j = 0; j <= blen; j++) d[0][j] = j; |
|
|
|
|
|
|
|
for (let i = 1; i <= alen; i++) { |
|
|
|
for (let j = 1; j <= blen; j++) { |
|
|
|
const cost = a[i - 1] === b[j - 1] ? 0 : 1; |
|
|
|
d[i][j] = Math.min( |
|
|
|
d[i - 1][j] + 1, |
|
|
|
d[i][j - 1] + 1, |
|
|
|
d[i - 1][j - 1] + cost |
|
|
|
); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
return d[alen][blen]; |
|
|
|
function cosineDistanceMatching(vector1, vector2) { |
|
|
|
let cosineSimilarity = similarity(vector1, vector2); |
|
|
|
let distance = 2 * (1 - cosineSimilarity); |
|
|
|
return Math.sqrt(distance); |
|
|
|
} |
|
|
|
|
|
|
|
let tree |
|
|
|
let tree, vectors, jsonEmbeddings |
|
|
|
|
|
|
|
buildVPTree() |
|
|
|
loadVPTree() |
|
|
|
|
|
|
|
// Load tree endpoint
|
|
|
@ -61,9 +44,14 @@ app.post('/search/:word', (req, res) => { |
|
|
|
return res.status(400).send({ success: false, message: 'No word provided' }); |
|
|
|
} |
|
|
|
|
|
|
|
let idx = queryVPTree("democratic") |
|
|
|
// vector = [angry, disgust, fear, happy, sad, suprise, neutral]
|
|
|
|
let query_vec = [0, 0, 1.0, 0, 0, 0, 0] |
|
|
|
let idx = queryVPTree(query_vec) |
|
|
|
|
|
|
|
// Retrieve the filename from the stored JSON data
|
|
|
|
let filename = idx !== undefined && idx < jsonEmbeddings.length ? jsonEmbeddings[idx].filename : null; |
|
|
|
|
|
|
|
res.send({idx}); |
|
|
|
res.send({filename}); |
|
|
|
}); |
|
|
|
|
|
|
|
function queryVPTree(value){ |
|
|
@ -73,8 +61,31 @@ function queryVPTree(value){ |
|
|
|
} |
|
|
|
|
|
|
|
function buildVPTree(){ |
|
|
|
// building the tree
|
|
|
|
tree = vptree.build(stringList, levenshteinDistance) |
|
|
|
// Load JSON data from embeddings.json
|
|
|
|
const jsonFilePath = path.join(__dirname, 'embeddings.json'); |
|
|
|
loadJSON(jsonFilePath, (err, jsonData) => { |
|
|
|
if (err) { |
|
|
|
console.error('Error loading JSON:', err); |
|
|
|
return; |
|
|
|
} |
|
|
|
// Extract vectors from the JSON data
|
|
|
|
jsonEmbeddings = jsonData |
|
|
|
vectors = jsonEmbeddings.map(item => item.vector); |
|
|
|
tree = vptree.build(vectors, cosineDistanceMatching) |
|
|
|
const treeString = tree.stringify(); |
|
|
|
const fileName = "tree" |
|
|
|
const filePath = path.join(__dirname, `${fileName}.txt`); |
|
|
|
|
|
|
|
fs.writeFile(filePath, treeString, 'utf8', (err) => { |
|
|
|
if (err) { |
|
|
|
console.log("Tree did not save to file.") |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
console.log("Tree saved successfully.") |
|
|
|
}); |
|
|
|
}); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
function saveVPTree(fileName){ |
|
|
@ -94,7 +105,7 @@ function saveVPTree(fileName){ |
|
|
|
} |
|
|
|
|
|
|
|
function loadVPTree(){ |
|
|
|
loadTreeFromDisk("vp-tree-new", (err, treeData) => { |
|
|
|
loadTreeFromDisk("tree", (err, treeData) => { |
|
|
|
if (err) { |
|
|
|
console.error('Failed to load tree from disk:', err); |
|
|
|
return; |
|
|
@ -102,7 +113,7 @@ function loadVPTree(){ |
|
|
|
|
|
|
|
// Rebuild the VP-tree using the saved structure
|
|
|
|
try { |
|
|
|
tree = vptree.load(dataset, levenshteinDistance, treeData); |
|
|
|
tree = vptree.load(vectors, cosineDistanceMatching, treeData); |
|
|
|
console.log('Tree loaded successfully.'); |
|
|
|
|
|
|
|
} catch (loadError) { |
|
|
|