mirror of
https://github.com/pupperpowell/bibdle.git
synced 2026-02-04 10:54:44 -05:00
created embeddings
This commit is contained in:
@@ -1,6 +1,9 @@
|
||||
import type { Handle } from '@sveltejs/kit';
|
||||
import * as auth from '$lib/server/auth';
|
||||
|
||||
import { initializeEmbeddings } from '$lib/server/bible-embeddings';
|
||||
import { getAllNKJVVerses } from '$lib/server/xml-bible';
|
||||
|
||||
const handleAuth: Handle = async ({ event, resolve }) => {
|
||||
const sessionToken = event.cookies.get(auth.sessionCookieName);
|
||||
|
||||
@@ -26,3 +29,7 @@ const handleAuth: Handle = async ({ event, resolve }) => {
|
||||
};
|
||||
|
||||
export const handle: Handle = handleAuth;
|
||||
|
||||
// Initialize embeddings on server start (runs once on module load)
|
||||
const verses = getAllNKJVVerses();
|
||||
await initializeEmbeddings(verses);
|
||||
98
src/lib/server/bible-embeddings.ts
Normal file
98
src/lib/server/bible-embeddings.ts
Normal file
@@ -0,0 +1,98 @@
|
||||
import { pipeline } from '@xenova/transformers';
|
||||
import type { FeatureExtractionPipeline, Tensor } from '@xenova/transformers';
|
||||
import fs from 'fs/promises';
|
||||
|
||||
let extractor: FeatureExtractionPipeline | null = null;
|
||||
const EMBEDDING_DIM = 384;
|
||||
let verseEmbeddings: Float32Array[] = [];
|
||||
let verses: Array<{ text: string; book: string; chapter: number; verse: number }> = [];
|
||||
|
||||
// Initialize once on server startup
|
||||
export async function initializeEmbeddings(bibleVerses: Array<{ text: string; book: string; chapter: number; verse: number; }>) {
|
||||
if (extractor) return; // Already initialized
|
||||
|
||||
console.log('Loading embedding model...');
|
||||
extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L12-v2');
|
||||
// main ^
|
||||
// extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2'); not used
|
||||
// extractor = await pipeline('feature-extraction', 'Xenova/gte-base'); // testing
|
||||
|
||||
verses = bibleVerses;
|
||||
|
||||
const CACHE_PATH = './embeddings-cache-L12.json';
|
||||
// main ^
|
||||
// const CACHE_PATH = './embeddings-cache-L6.json'; not used
|
||||
// const CACHE_PATH = './embeddings-cache-GTE-base.json'; // testing
|
||||
|
||||
try {
|
||||
await fs.access(CACHE_PATH);
|
||||
const cachedStr = await fs.readFile(CACHE_PATH, 'utf-8');
|
||||
const cached = JSON.parse(cachedStr);
|
||||
verseEmbeddings = cached.embeddings.map((arr: number[]) => Float32Array.from(arr));
|
||||
verses = cached.verses;
|
||||
console.log('Loaded embeddings from cache!');
|
||||
return;
|
||||
} catch {
|
||||
console.log('No cache found, computing embeddings...');
|
||||
}
|
||||
|
||||
console.log(`Encoding ${verses.length} verses in small batches to manage memory...`);
|
||||
|
||||
const BATCH_SIZE = 128;
|
||||
const texts = verses.map((v) => v.text);
|
||||
verseEmbeddings = [];
|
||||
for (let start = 0; start < texts.length; start += BATCH_SIZE) {
|
||||
const batchTexts = texts.slice(start, start + BATCH_SIZE);
|
||||
console.log(`Processing batch ${Math.floor(start / BATCH_SIZE) + 1} (${batchTexts.length} verses)...`);
|
||||
|
||||
const output = await extractor!(batchTexts, { pooling: 'mean', normalize: true });
|
||||
const data = output.data as Float32Array;
|
||||
const embeddingDim = EMBEDDING_DIM;
|
||||
for (let k = 0; k < batchTexts.length; k++) {
|
||||
verseEmbeddings.push(Float32Array.from(data.slice(k * embeddingDim, (k + 1) * embeddingDim)));
|
||||
}
|
||||
}
|
||||
|
||||
// Save to cache
|
||||
const embeddingsData = {
|
||||
embeddings: verseEmbeddings.map(e => Array.from(e)),
|
||||
verses: verses
|
||||
};
|
||||
await fs.writeFile(CACHE_PATH, JSON.stringify(embeddingsData));
|
||||
console.log('Embeddings computed and cached to disk!');
|
||||
}
|
||||
|
||||
function cosineSimilarity(a: Float32Array, b: Float32Array): number {
|
||||
let sum = 0;
|
||||
for (let i = 0; i < a.length; i++) {
|
||||
sum += a[i] * b[i];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
export async function findSimilarVerses(sentence: string, topK: number = 10) {
|
||||
if (!extractor || verseEmbeddings.length === 0) {
|
||||
throw new Error('Embeddings not initialized');
|
||||
}
|
||||
if (verseEmbeddings.length !== verses.length) {
|
||||
throw new Error(`Embeddings/verses length mismatch: ${verseEmbeddings.length} != ${verses.length}`);
|
||||
}
|
||||
|
||||
// Encode query sentence
|
||||
const queryOutput = await extractor(sentence, { pooling: 'mean', normalize: true });
|
||||
const queryEmbedding = queryOutput.data as Float32Array;
|
||||
if (queryEmbedding.length !== EMBEDDING_DIM) {
|
||||
throw new Error(`Query embedding dim mismatch: ${queryEmbedding.length} != ${EMBEDDING_DIM}`);
|
||||
}
|
||||
|
||||
// Calculate similarities
|
||||
const scores = verses.map((verse, idx) => ({
|
||||
...verse,
|
||||
score: cosineSimilarity(queryEmbedding, verseEmbeddings[idx])
|
||||
}));
|
||||
|
||||
// Sort and return top K
|
||||
return scores
|
||||
.sort((a, b) => b.score - a.score)
|
||||
.slice(0, topK);
|
||||
}
|
||||
20
src/routes/api/similar-verses/+server.ts
Normal file
20
src/routes/api/similar-verses/+server.ts
Normal file
@@ -0,0 +1,20 @@
|
||||
import { json } from '@sveltejs/kit';
|
||||
import { findSimilarVerses } from '$lib/server/bible-embeddings';
|
||||
import type { RequestHandler } from './$types';
|
||||
|
||||
export const POST: RequestHandler = async ({ request }) => {
|
||||
const formData = await request.json();
|
||||
const { sentence, topK = 10 }: { sentence: string; topK?: number } = formData;
|
||||
|
||||
if (!sentence || typeof sentence !== 'string') {
|
||||
return json({ error: 'Invalid sentence' }, { status: 400 });
|
||||
}
|
||||
|
||||
try {
|
||||
const results = await findSimilarVerses(sentence, topK);
|
||||
return json({ results });
|
||||
} catch (error) {
|
||||
console.error('Error finding similar verses:', error);
|
||||
return json({ error: 'Failed to find similar verses' }, { status: 500 });
|
||||
}
|
||||
};
|
||||
214
src/routes/imposter/+page.svelte
Normal file
214
src/routes/imposter/+page.svelte
Normal file
@@ -0,0 +1,214 @@
|
||||
<script lang="ts">
|
||||
let sentence = $state("");
|
||||
let results = $state<
|
||||
Array<{
|
||||
book: string;
|
||||
chapter: number;
|
||||
verse: number;
|
||||
text: string;
|
||||
score: number;
|
||||
}>
|
||||
>([]);
|
||||
let loading = $state(false);
|
||||
|
||||
async function searchVerses() {
|
||||
loading = true;
|
||||
try {
|
||||
const response = await fetch("/api/similar-verses", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ sentence, topK: 10 }),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
if (data.error) {
|
||||
throw new Error(data.error);
|
||||
}
|
||||
results = data.results || [];
|
||||
} catch (error) {
|
||||
console.error("Search error:", error);
|
||||
results = [];
|
||||
} finally {
|
||||
loading = false;
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<div class="page">
|
||||
<h1 class="title">Similar Verse Finder</h1>
|
||||
|
||||
<div class="search-section">
|
||||
<input
|
||||
bind:value={sentence}
|
||||
placeholder="Enter a sentence to find similar Bible verses..."
|
||||
class="input"
|
||||
/>
|
||||
<button onclick={searchVerses} disabled={loading} class="button">
|
||||
{loading ? "Searching..." : "Find Similar Verses"}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{#if results.length > 0}
|
||||
<div class="results">
|
||||
{#each results as result, i (i)}
|
||||
<article class="result">
|
||||
<header>
|
||||
<strong>{result.book} {result.chapter}:{result.verse}</strong>
|
||||
<span class="score">Score: {result.score.toFixed(3)}</span>
|
||||
</header>
|
||||
<p>{result.text}</p>
|
||||
</article>
|
||||
{/each}
|
||||
</div>
|
||||
{:else if sentence.trim() && !loading}
|
||||
<p class="no-results">No similar verses found. Try another sentence!</p>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
<style>
|
||||
.page {
|
||||
max-width: 900px;
|
||||
margin: 0 auto;
|
||||
padding: 1rem 0.75rem;
|
||||
font-family:
|
||||
system-ui,
|
||||
-apple-system,
|
||||
sans-serif;
|
||||
}
|
||||
|
||||
.title {
|
||||
text-align: center;
|
||||
margin-bottom: 1.75rem;
|
||||
font-size: clamp(2rem, 5vw, 3rem);
|
||||
color: #2c3e50;
|
||||
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
||||
-webkit-background-clip: text;
|
||||
-webkit-text-fill-color: transparent;
|
||||
background-clip: text;
|
||||
}
|
||||
|
||||
.search-section {
|
||||
display: flex;
|
||||
gap: 0.75rem;
|
||||
margin-bottom: 1.5rem;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.input {
|
||||
flex: 1;
|
||||
min-width: 300px;
|
||||
padding: 0.75rem 1rem;
|
||||
border: 2px solid #e1e5e9;
|
||||
border-radius: 12px;
|
||||
font-size: 1.1rem;
|
||||
transition: all 0.2s ease;
|
||||
background: #fafbfc;
|
||||
}
|
||||
|
||||
.input:focus {
|
||||
outline: none;
|
||||
border-color: #667eea;
|
||||
box-shadow: 0 0 0 4px rgba(102, 126, 234, 0.1);
|
||||
background: white;
|
||||
}
|
||||
|
||||
.button {
|
||||
padding: 0.75rem 1.5rem;
|
||||
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
||||
color: white;
|
||||
border: none;
|
||||
border-radius: 12px;
|
||||
font-size: 1.1rem;
|
||||
font-weight: 500;
|
||||
cursor: pointer;
|
||||
transition: all 0.2s ease;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
.button:hover:not(:disabled) {
|
||||
transform: translateY(-1px);
|
||||
box-shadow: 0 8px 25px rgba(102, 126, 234, 0.3);
|
||||
}
|
||||
|
||||
.button:disabled {
|
||||
background: #a0aec0;
|
||||
cursor: not-allowed;
|
||||
transform: none;
|
||||
box-shadow: none;
|
||||
}
|
||||
|
||||
.results {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 1rem;
|
||||
}
|
||||
|
||||
.result {
|
||||
background: linear-gradient(145deg, #ffffff 0%, #f8fafc 100%);
|
||||
border: 1px solid #e2e8f0;
|
||||
border-radius: 16px;
|
||||
padding: 1.25rem;
|
||||
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.08);
|
||||
transition: all 0.2s ease;
|
||||
}
|
||||
|
||||
.result:hover {
|
||||
transform: translateY(-2px);
|
||||
box-shadow: 0 12px 24px rgba(0, 0, 0, 0.15);
|
||||
}
|
||||
|
||||
.result header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: flex-start;
|
||||
margin-bottom: 0.75rem;
|
||||
gap: 0.75rem;
|
||||
}
|
||||
|
||||
.result strong {
|
||||
font-size: 1.3rem;
|
||||
color: #1a202c;
|
||||
}
|
||||
|
||||
.score {
|
||||
font-size: 1rem;
|
||||
color: #718096;
|
||||
font-weight: 500;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
.result p {
|
||||
margin: 0;
|
||||
line-height: 1.7;
|
||||
color: #4a5568;
|
||||
font-size: 1.1rem;
|
||||
}
|
||||
|
||||
.no-results {
|
||||
text-align: center;
|
||||
padding: 1.75rem 0.75rem;
|
||||
color: #a0aec0;
|
||||
font-size: 1.2rem;
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
@media (max-width: 768px) {
|
||||
.search-section {
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
.input {
|
||||
min-width: unset;
|
||||
}
|
||||
|
||||
.result header {
|
||||
flex-direction: column;
|
||||
align-items: flex-start;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
Reference in New Issue
Block a user