embeddings fix

This commit is contained in:
George Powell
2025-12-26 01:11:45 -05:00
parent 0daefcb080
commit 280ed3424c
3 changed files with 63 additions and 2 deletions

View File

@@ -0,0 +1,28 @@
# Amongus Mode Implementation
Amongus Mode, or Imposter Mode, is the development name for the variation of Bibdle where users are shown four random bible verses, one of which is a completely fake verse. The overall structural changes needed to implement this feature are vast.
## Features/changes needed
- User signup/login, session storage
- verse_submissions table and accepted_verse_submissions table
- daily_verses table with date, verses 1-3 and fake verse columns
- Random_verse function
### UI (mobile-first, single-column by default)
- UI for introducing users to this new mode
- Possible menu screen on root URL / that gives options of daily mode or amongus mode
- UI for submitting user-generated verses
- UI for seeing the state of your submitted verses, including approved/declined, amt of people who identified it successfuly, and amt of people who were fooled by it (estimated difficulty?)
- UI for reviewing user-submitted verses, including
- similarity scores to existing bible verses (complete) using embeddings and Xenova/all-MiniLM-L12-v2 embedding transformer model (embeddings are saved in ./embeddings-cache-L12.json)
- Options to approve or decline, with reasons including too similar, etc.
## Submission guidelines
- 1. Sufficiently different from existing verses in the Bible
- 2. Could be plausibly confused for an actual bible verse.
- 3. Respectful of the original source material. (No profanity, etc.)

View File

@@ -47,9 +47,13 @@ export async function initializeEmbeddings(bibleVerses: Array<{ text: string; bo
const output = await extractor!(batchTexts, { pooling: 'mean', normalize: true });
const data = output.data as Float32Array;
const embeddingDim = EMBEDDING_DIM;
const embeddingDim = data.length / batchTexts.length;
if (Number.isInteger(embeddingDim) === false || embeddingDim !== EMBEDDING_DIM) {
throw new Error(`Invalid embedding shape: data.length=${data.length}, batch=${batchTexts.length}, dim=${embeddingDim}`);
}
for (let k = 0; k < batchTexts.length; k++) {
verseEmbeddings.push(Float32Array.from(data.slice(k * embeddingDim, (k + 1) * embeddingDim)));
const sliceData = data.slice(k * embeddingDim, (k + 1) * embeddingDim);
verseEmbeddings.push(Float32Array.from(sliceData));
}
}

View File

@@ -362,3 +362,32 @@ export function formatReference(bookName: string, chapter: number, startVerse: n
}
return `${bookName} ${chapter}:${startVerse}-${endVerse}`;
}
/**
* Get all NKJV verses with metadata for embeddings
*/
export function getAllNKJVVerses(): Array<{ text: string; book: string; chapter: number; verse: number }> {
const bible = loadBibleXml();
const verses: Array<{ text: string; book: string; chapter: number; verse: number }> = [];
for (const testament of bible.bible.testament) {
for (const bookData of testament.book) {
const bookNumber = +bookData.number;
const bookInfo = getBookByNumber(bookNumber);
const bookName = bookInfo ? bookInfo.name : `Book ${bookNumber}`;
for (const chapterData of bookData.chapter) {
const chapterNum = +chapterData.number;
for (const verseData of chapterData.verse) {
const verseNum = +verseData.number;
verses.push({
text: verseData._text || '',
book: bookName,
chapter: chapterNum,
verse: verseNum
});
}
}
}
}
return verses;
}