vp tree builder and query for node js
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

145 lines
4.1 KiB

2 months ago
const express = require('express');
const bodyParser = require('body-parser');
const vptree = require('vptree');
const similarity = require('compute-cosine-similarity');
2 months ago
const fs = require('fs');
const path = require('path');
const app = express();
const port = 2000;
2 months ago
app.use(bodyParser.json());
// Function to load JSON data
function loadJSON(filePath, callback) {
fs.readFile(filePath, 'utf8', (err, data) => {
if (err) {
return callback(err, null);
}
try {
const jsonData = JSON.parse(data);
callback(null, jsonData);
} catch (parseError) {
callback(parseError, null);
}
});
}
2 months ago
function cosineDistanceMatching(vector1, vector2) {
let cosineSimilarity = similarity(vector1, vector2);
let distance = 2 * (1 - cosineSimilarity);
return Math.sqrt(distance);
2 months ago
}
let tree, vectors, jsonEmbeddings
2 months ago
buildVPTree()
2 months ago
loadVPTree()
// Load tree endpoint
app.post('/search/', (req, res) => {
const { vector } = req.body;
2 months ago
if (!Array.isArray(vector) || vector.length !== 7 || !vector.every(num => typeof num === 'number')) {
return res.status(400).send({ success: false, message: 'Invalid vector provided. It must be an array of 7 floating-point numbers.' });
2 months ago
}
let idx = queryVPTree(vector)
// Retrieve the filename from the stored JSON data
let filename = idx !== undefined && idx < jsonEmbeddings.length ? jsonEmbeddings[idx].filename : null;
2 months ago
res.send({filename});
2 months ago
});
function queryVPTree(value){
let nearest = tree.search(value)
let index = nearest[0].i
return index
}
function buildVPTree(){
// Load JSON data from embeddings.json
const jsonFilePath = path.join(__dirname, 'embeddings.json');
loadJSON(jsonFilePath, (err, jsonData) => {
if (err) {
console.error('Error loading JSON:', err);
return;
}
// Extract vectors from the JSON data
jsonEmbeddings = jsonData
vectors = jsonEmbeddings.map(item => item.vector);
tree = vptree.build(vectors, cosineDistanceMatching)
const treeString = tree.stringify();
const fileName = "tree"
const filePath = path.join(__dirname, `${fileName}.txt`);
fs.writeFile(filePath, treeString, 'utf8', (err) => {
if (err) {
console.log("Tree did not save to file.")
return
}
console.log("Tree saved successfully.")
});
});
2 months ago
}
function saveVPTree(fileName){
const treeString = tree.stringify();
const filePath = path.join(__dirname, `${fileName}.txt`);
fs.writeFile(filePath, treeString, 'utf8', (err) => {
if (err) {
console.log("Tree did not save to file.")
return
}
console.log("Tree saved successfully.")
});
}
function loadVPTree(){
loadTreeFromDisk("tree", (err, treeData) => {
2 months ago
if (err) {
console.error('Failed to load tree from disk:', err);
return;
}
// Rebuild the VP-tree using the saved structure
try {
tree = vptree.load(vectors, cosineDistanceMatching, treeData);
2 months ago
console.log('Tree loaded successfully.');
} catch (loadError) {
console.error('Error loading the VP-tree:', loadError);
}
});
}
function loadTreeFromDisk(fileName, callback){
const filePath = path.join(__dirname, `${fileName}.txt`);
fs.readFile(filePath, 'utf8', (err, data) => {
if (err) {
console.error('Error reading the file:', err);
return callback(err, null);
}
// Parse the data as a JavaScript object
try {
const treeData = eval(`(${data})`); // Using eval to convert to object
callback(null, treeData);
} catch (parseError) {
console.error('Error parsing the tree data:', parseError);
callback(parseError, null);
}
});
}
app.listen(port, () => {
console.log(`Server listening at http://localhost:${port}`);
});