Skip to content

Commit

Permalink
Merge pull request #11 from oramasearch/feat/code-embeddings
Browse files Browse the repository at this point in the history
feat: adds custom models
  • Loading branch information
micheleriva authored Nov 5, 2024
2 parents 37c1f10 + 6e339cc commit ec297e0
Show file tree
Hide file tree
Showing 11 changed files with 440 additions and 37 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@

posting_storage
.idea
.custom_models
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions collection_manager/src/collection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ use std::{
use anyhow::anyhow;
use dashmap::DashMap;
use document_storage::DocumentStorage;
use nlp::{locales::Locale, TextParser};
use ordered_float::NotNan;
use nlp::locales::Locale;
use nlp::Parser;
use serde_json::Value;
use storage::Storage;
use string_index::{
Expand Down
3 changes: 2 additions & 1 deletion embeddings/.gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
.fastembed_cache
.fastembed_cache
.custom_models
6 changes: 6 additions & 0 deletions embeddings/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ edition = "2021"
name = "pq"
path = "./src/bin/pq.rs"

[[bin]]
name = "embeddings"
path = "./src/bin/embeddings.rs"

[dependencies]
anyhow = "1.0.92"
fastembed = { version = "4.1.0", features = ["ort-download-binaries"] }
Expand All @@ -20,3 +24,5 @@ rand = "0.8.5"
num-traits = "0.2.19"
reductive = { version = "0.9.0" }
rand_chacha = "0.3.1"
reqwest = { version = "0.12.9", features = ["blocking"] }
strum_macros = "0.26.4"
37 changes: 37 additions & 0 deletions embeddings/src/bin/embeddings.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
use anyhow::Result;
use embeddings::custom_models::{CustomModel, ModelFileConfig};
use embeddings::{load_models, OramaModels};

fn main() -> Result<()> {
let models = load_models();

let embedding = models.embed(
OramaModels::JinaV2BaseCode,
vec![r"
/**
* This method is needed to used because of issues like: https://github.com/askorama/orama/issues/301
* that issue is caused because the array that is pushed is huge (>100k)
*
* @example
* ```ts
* safeArrayPush(myArray, [1, 2])
* ```
*/
export function safeArrayPush<T>(arr: T[], newArr: T[]): void {
if (newArr.length < MAX_ARGUMENT_FOR_STACK) {
Array.prototype.push.apply(arr, newArr)
} else {
const newArrLength = newArr.length
for (let i = 0; i < newArrLength; i += MAX_ARGUMENT_FOR_STACK) {
Array.prototype.push.apply(arr, newArr.slice(i, i + MAX_ARGUMENT_FOR_STACK))
}
}
}
".to_string()],
Some(1),
)?;

dbg!(embedding.first().unwrap());

Ok(())
}
54 changes: 40 additions & 14 deletions embeddings/src/bin/pq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,49 @@ use embeddings::pq;
fn main() -> Result<()> {
let models = load_models();

let vectors = models.embed(embeddings::OramaModels::MultilingualE5Small, vec![
"CASUAL COMFORT, SPORTY STYLE.Slide into comfort in the lightweight and sporty Nike Benassi JDI Slide. It features the Nike logo on the foot strap, which is lined in super soft fabric. The foam midsole brings that beach feeling to your feet and adds spring to your kicked-back style.Benefits1-piece, synthetic leather strap is lined with super soft, towel-like fabric.The foam midsole doubles as an outsole, adding lightweight cushioning.Flex grooves let you move comfortably.Shown: Black/WhiteStyle: 343880-090".to_string(),
"CLASSIC SUPPORT AND COMFORT.The Nike Air Monarch IV gives you classic style with real leather and plenty of lightweight Nike Air cushioning to keep you moving in comfort.BenefitsLeather and synthetic leather team up for durability and classic comfort.An Air-Sole unit runs the length of your foot for cushioning, comfort and support.Rubber sole is durable and provides traction".to_string(),
"STAY TRUE TO YOUR TEAM ALL DAY, EVERY DAY, GAME DAY.\nRep your favorite team and player anytime in the NFL Baltimore Ravens Game Jersey, inspired by what they&apos;re wearing on the field and designed for total comfort.\nTAILORED FIT\nThis jersey features a tailored fit designed for movement.\n\nLIGHT, SOFT FEEL\nScreen-print numbers provide a light and soft feel".to_string(),
"STAY TRUE TO YOUR TEAM ALL DAY, EVERY DAY, GAME DAY.\nRep your favorite team and player anytime in the NFL Indianapolis Colts Game Jersey, inspired by what they&apos;re wearing on the field and designed for total comfort.\nTAILORED FIT\nThis jersey features a tailored fit designed for movement.\n\nCLEAN COMFORT\nThe no-tag neck label offers clean comfort.\n\nLIGHT, SOFT FEEL\nScreen-print numbers provide a light and soft feel.\n\nAdditional Details\n\n\nStrategic ventilation for breathability\nWoven jock tag at front lower left\nTPU shield at V-neck\n\n\n\nFabric: 100% recycled polyester\nMachine Wash\nImportedShown: Gym BlueStyle: 468955-442".to_string(),
"A GAME-DAY ESSENTIAL.Featuring comfortable, absorbent fabric, the Nike Swoosh Wristbands stretch with you and keep your hands dry, so you can play your best even when the game heat up.Product DetailsWidth: 3&quot;Sold in pairsSwoosh design embroideryFabric: 72% cotton/12% nylon/11% polyester/4% rubber/1% spandexMachine washImportedShown: White/BlackStyle: NNN04-101".to_string(),
"MATCH-READY COMFORT FOR YOUR FEET.The Nike Academy Socks are designed to keep you comfortable during play with soft, sweat-wicking fabric with arch support.BenefitsNike Dri-FIT technology moves sweat away from your skin for quicker evaporation, helping you stay dry and comfortable.Reinforced heel and toe add durability in high-wear areas.Snug band wraps around the arch for a supportive feel.Product DetailsLeft/right specific98% nylon/2% spandexMachine washImportedShown: Varsity Royal/WhiteStyle: SX4120-402".to_string()
], None)?;
// let vectors = models.embed(embeddings::OramaModels::MultilingualE5Small, vec![
// "CASUAL COMFORT, SPORTY STYLE.Slide into comfort in the lightweight and sporty Nike Benassi JDI Slide. It features the Nike logo on the foot strap, which is lined in super soft fabric. The foam midsole brings that beach feeling to your feet and adds spring to your kicked-back style.Benefits1-piece, synthetic leather strap is lined with super soft, towel-like fabric.The foam midsole doubles as an outsole, adding lightweight cushioning.Flex grooves let you move comfortably.Shown: Black/WhiteStyle: 343880-090".to_string(),
// "CLASSIC SUPPORT AND COMFORT.The Nike Air Monarch IV gives you classic style with real leather and plenty of lightweight Nike Air cushioning to keep you moving in comfort.BenefitsLeather and synthetic leather team up for durability and classic comfort.An Air-Sole unit runs the length of your foot for cushioning, comfort and support.Rubber sole is durable and provides traction".to_string(),
// "STAY TRUE TO YOUR TEAM ALL DAY, EVERY DAY, GAME DAY.\nRep your favorite team and player anytime in the NFL Baltimore Ravens Game Jersey, inspired by what they&apos;re wearing on the field and designed for total comfort.\nTAILORED FIT\nThis jersey features a tailored fit designed for movement.\n\nLIGHT, SOFT FEEL\nScreen-print numbers provide a light and soft feel".to_string(),
// "STAY TRUE TO YOUR TEAM ALL DAY, EVERY DAY, GAME DAY.\nRep your favorite team and player anytime in the NFL Indianapolis Colts Game Jersey, inspired by what they&apos;re wearing on the field and designed for total comfort.\nTAILORED FIT\nThis jersey features a tailored fit designed for movement.\n\nCLEAN COMFORT\nThe no-tag neck label offers clean comfort.\n\nLIGHT, SOFT FEEL\nScreen-print numbers provide a light and soft feel.\n\nAdditional Details\n\n\nStrategic ventilation for breathability\nWoven jock tag at front lower left\nTPU shield at V-neck\n\n\n\nFabric: 100% recycled polyester\nMachine Wash\nImportedShown: Gym BlueStyle: 468955-442".to_string(),
// "A GAME-DAY ESSENTIAL.Featuring comfortable, absorbent fabric, the Nike Swoosh Wristbands stretch with you and keep your hands dry, so you can play your best even when the game heat up.Product DetailsWidth: 3&quot;Sold in pairsSwoosh design embroideryFabric: 72% cotton/12% nylon/11% polyester/4% rubber/1% spandexMachine washImportedShown: White/BlackStyle: NNN04-101".to_string(),
// "MATCH-READY COMFORT FOR YOUR FEET.The Nike Academy Socks are designed to keep you comfortable during play with soft, sweat-wicking fabric with arch support.BenefitsNike Dri-FIT technology moves sweat away from your skin for quicker evaporation, helping you stay dry and comfortable.Reinforced heel and toe add durability in high-wear areas.Snug band wraps around the arch for a supportive feel.Product DetailsLeft/right specific98% nylon/2% spandexMachine washImportedShown: Varsity Royal/WhiteStyle: SX4120-402".to_string()
// ], None)?;
//
// let new_vector = models.embed(embeddings::OramaModels::MultilingualE5Small, vec![
// "COMFORTABLE COVERAGE FOR YOUR SHINS.Designed to take the impacts of the game, the Nike J Shin Guards are made with a tough composite shell and perforations for ventilated comfort.BenefitsAnatomical left/right construction contours for comfort.Perforations enhance ventilation.EVA foam provides soft cushioning.Product DetailsMaterials: 80% polyethylene/20% EVAImportedShown: Black/WhiteStyle: SP0040-009".to_string()
// ], None)?;
//
// let quantizer = pq::ProductQuantizer::try_new(vectors)?;
// let quantized = quantizer.quantize(new_vector);

let new_vector = models.embed(embeddings::OramaModels::MultilingualE5Small, vec![
"COMFORTABLE COVERAGE FOR YOUR SHINS.Designed to take the impacts of the game, the Nike J Shin Guards are made with a tough composite shell and perforations for ventilated comfort.BenefitsAnatomical left/right construction contours for comfort.Perforations enhance ventilation.EVA foam provides soft cushioning.Product DetailsMaterials: 80% polyethylene/20% EVAImportedShown: Black/WhiteStyle: SP0040-009".to_string()
], None)?;
let vector = models.embed(
embeddings::OramaModels::JinaV2BaseCode,
vec![r"
import { create, insert, search } from '@orama/orama'
const db = create({
schema: {
title: 'string',
description: 'string'
}
})
let quantizer = pq::ProductQuantizer::try_new(vectors)?;
let quantized = quantizer.quantize(new_vector);
insert(db, {
title: 'foo',
description: 'bar'
})
search(db, {
term: 'foo'
})
dbg!(quantized);
"
.to_string()],
Some(1),
)?;

dbg!(vector);

Ok(())
}
Loading

0 comments on commit ec297e0

Please sign in to comment.