Vector Similarity Search

JS
S
JavaScript

Simple Node.js application that demonstrates vector similarity search using a local database The application now uses the OpenAI API to convert text to vectors and stores them in the SQLite database. It also provides a /search endpoint that accepts text, converts it to a vector, and searches the database for the most similar vectors. Please note that to properly calculate vector similarity, you should use a more sophisticated algorithm than the placeholder provided in the similarities map function. In practice, cosine similarity is often used for this purpose, but implementing this requires additional code. Applications: Product Recommendations: Suggest similar products to customers based on their preferences. Search Engines: Retrieve relevant documents or websites matching a text query. Word/Sentence Similarity: Find words or sentences with similar meanings in NLP tasks. Image Search: Locate visually similar images in a database. Facial Recognition: Identify or authenticate individuals by comparing facial features. Voice Identification: Match voice samples for recognition or verification. Anomaly Detection: Spot unusual patterns in data, such as potential security breaches. Drug Research: Discover compounds with similar structures for developing new medications. Genetic Research: Compare DNA sequences to study genetic relationships or mutations. Customer Insights: Analyze reviews or feedback to gauge market trends and sentiments.

1const express = require('express');
2const sqlite3 = require('sqlite3').verbose();
3const { Configuration, OpenAIApi } = require('openai');
4const app = express();
5const port = 3000;
6
7// Configuration for OpenAI API
8const configuration = new Configuration({
9  apiKey: process.env.OPENAI_API_KEY, // Set your API key in an environment variable
10});
11const openai = new OpenAIApi(configuration);
12
13app.use(express.json()); // Middleware to parse JSON bodies
14
15// Connect to SQLite database
16const db = new sqlite3.Database(':memory:', (err) => {
17  if (err) {
18    return console.error(err.message);
19  }
20  console.log('Connected to the in-memory SQLite database.');
21});
22
23// Create a table to store vectors
24db.run('CREATE TABLE vectors (id INTEGER PRIMARY KEY, vector TEXT)');
25
26// Function to convert text to a vector using OpenAI's API
27async function textToVector(text) {
28  try {
29    const response = await openai.createEmbedding({
30      model: "text-similarity-babbage-001", // Choose the appropriate model for your use case
31      input: text
32    });
33    return response.data.data[0].embedding; // Assuming the first result is the most relevant
34  } catch (error) {
35    console.error(error);
36    return null;
37  }
38}
39
40// Endpoint to add a new vector
41app.post('/add-vector', async (req, res) => {
42  const text = req.body.text;
43  if (!text) {
44    return res.status(400).send('No text provided');
45  }
46
47  const vector = await textToVector(text);
48  if (vector) {
49    db.run('INSERT INTO vectors (vector) VALUES (?)', [JSON.stringify(vector)], function(err) {
50      if (err) {
51        return console.error(err.message);
52      }
53      res.send(`Vector added with ID: ${this.lastID}`);
54    });
55  } else {
56    res.status(500).send('Failed to generate vector');
57  }
58});
59
60// Endpoint to search for similar vectors
61app.get('/search', async (req, res) => {
62  const text = req.query.text;
63  if (!text) {
64    return res.status(400).send('No text provided for search');
65  }
66
67  const searchVector = await textToVector(text);
68  if (!searchVector) {
69    return res.status(500).send('Failed to generate vector for search');
70  }
71
72  db.all('SELECT * FROM vectors', [], (err, rows) => {
73    if (err) {
74      throw err;
75    }
76    const similarities = rows.map(row => {
77      const storedVector = JSON.parse(row.vector);
78      // Placeholder for a proper vector similarity function
79      const similarity = storedVector.reduce((acc, val, idx) => acc + val * searchVector[idx], 0);
80      return {
81        id: row.id,
82        similarity: similarity
83      };
84    });
85    // Sort by similarity - highest first
86    similarities.sort((a, b) => b.similarity - a.similarity);
87    res.json(similarities);
88  });
89});
90
91// Start server
92app.listen(port, () => {
93  console.log(`Server running on port ${port}`);
94});

Created on 3/1/2024