Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: initialize this project #1

Merged
merged 22 commits into from
Dec 2, 2024
52 changes: 52 additions & 0 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
name: Rust
on:
push:
branches:
- main
pull_request:

env:
# This env var is used by Swatinem/rust-cache@v2 for the cache
# key, so we set it to make sure it is always consistent.
CARGO_TERM_COLOR: always
# Disable full debug symbol generation to speed up CI build and keep memory down
# "1" means line tables only, which is useful for panic tracebacks.
RUSTFLAGS: "-C debuginfo=1"
RUST_BACKTRACE: "1"
# according to: https://matklad.github.io/2021/09/04/fast-rust-builds.html
# CI builds are faster with incremental disabled.
CARGO_INCREMENTAL: "0"
CARGO_BUILD_JOBS: "1"

jobs:
lint:
runs-on: ubuntu-24.04
timeout-minutes: 30
steps:
- uses: actions/checkout@v4
- uses: actions-rust-lang/setup-rust-toolchain@v1
with:
toolchain: 1.83
components: rustfmt, clippy
- uses: Swatinem/rust-cache@v2
- name: Check formatting
run: cargo fmt -- --check
- name: Check clippy
run: cargo clippy --tests --benches -- -D warnings
test:
strategy:
matrix:
machine:
- ubuntu-24.04
# - ubuntu-2404-4x-arm64
- macos-14
runs-on: ${{ matrix.machine }}
steps:
- uses: actions/checkout@v4
- uses: actions-rust-lang/setup-rust-toolchain@v1
with:
toolchain: 1.83
- uses: rui314/setup-mold@v1
- uses: Swatinem/rust-cache@v2
- name: Run tests
run: cargo test
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,8 @@ Cargo.lock
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
#.idea/

# Added by cargo

/target
30 changes: 30 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
[package]
name = "ocra"
version = "0.1.0"
authors = ["[email protected]"]
description = "OCRA: A Rust implementation of Cache in arrow-rs' ObjectStore interface"
edition = "2021"
license-file = "LICENSE"
keywords = ["cache", "object-store", "arrow"]
categories = ["caching"]

[dependencies]
async-trait = "0.1"

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

~

bytes = "~1.9"
futures = "0.3"

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

~

log = "~0.4"
moka = { version = "~0.12", features = ["future"] }
num_cpus = "1.16"
object_store = "~0.11"

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should have a range here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is 0.11,<0.12? Can we use more than that?

sysinfo = "~0.32"
tokio = { version = "1", features = ["sync"] }

[dev-dependencies]
criterion = { version = "~0.5", features = ["async_tokio"] }
tempfile = "~3.14"
tokio = { version = "1", features = ["full"] }
rand = "~0.8"

[[bench]]
name = "memory"
harness = false
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
# ocra
OCRA: Object-store Cache in Rust for All
# OCRA

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we need an okra emoji lol

**OCRA**: (**A**) (**R**)ust (**C**)ache implementation using _arrow-rs_ [(**O**)bjectStore](https://docs.rs/object_store/latest/object_store/) trait.
85 changes: 85 additions & 0 deletions benches/memory.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
//! Benchmark for in-memory page cache.
//!
//!

use std::{fs::File, io::Write, sync::Arc};

use criterion::{criterion_group, criterion_main, Criterion};
use object_store::{path::Path, ObjectStore};
use rand::Rng;

use ocra::{memory::InMemoryCache, paging::PageCache};

fn memory_cache_bench(c: &mut Criterion) {
let rt = tokio::runtime::Runtime::new().unwrap();
let mut rng = rand::thread_rng();

const FILE_SIZE: usize = 1024 * 1024 * 1024;
// TODO: support other object stores later
let store: Arc<dyn ObjectStore> = Arc::new(object_store::local::LocalFileSystem::new());
let temp_file = tempfile::NamedTempFile::new().unwrap().into_temp_path();
{
let mut writer = File::create(temp_file.to_str().unwrap()).unwrap();
let mut buf = vec![0_u8; 128 * 1024];

for _ in 0..FILE_SIZE / (128 * 1024) {
rng.fill(&mut buf[..]);
writer.write_all(&buf).unwrap();
}
}

for page_size in &[1024 * 1024, 8 * 1024 * 1024] {
let cache = Arc::new(InMemoryCache::new(FILE_SIZE + 32 * 1024, *page_size));
let location = Path::from(temp_file.to_str().unwrap());

// Warm up the cache
println!("Starting warm up cache with page size: {}", page_size);
rt.block_on(async {
let loc = location.clone();
for i in 0..FILE_SIZE / page_size {
let data = cache
.get_with(&loc, i as u32, {
let store = store.clone();
let location = loc.clone();
async move {
store
.get_range(&location, i * page_size..(i + 1) * page_size)
.await
}
})
.await
.unwrap();
assert!(!data.is_empty());
}
});
println!("Warm up cache done");

c.bench_function(
format!("memory_cache,warm,page_size={}", page_size).as_str(),
|b| {
b.to_async(&rt).iter(|| {
let mut rng = rand::thread_rng();
let cache = cache.clone();
let loc = location.clone();
async move {
let page_id = rng.gen_range(0..FILE_SIZE / page_size);

let _data = cache
.get_with(&loc, page_id as u32, async {
panic!("Should not be called page_id={}", page_id)
})
.await
.unwrap();
}
})
},
);
}
}

criterion_group!(
name=benches;
config = Criterion::default().significance_level(0.1).sample_size(10);
targets = memory_cache_bench);

criterion_main!(benches);
40 changes: 40 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
//! **OCRA**: (**A**) (**R**)ust (**C**)ache implementation for *arrow-rs*
//! [(**O**)bjectStore](object_store::ObjectStore).
//!
//! It offers a few `ObjectStore` implementations that work with
//! caches.
//!
//! For example, you can use [`ReadThroughCache`] to wrap an existing
//! `ObjectStore` instance with a [`PageCache`](paging::PageCache).
//!
//! ```no_run
//! # use std::sync::Arc;
//! # use tokio::runtime::Runtime;
//! use object_store::{ObjectStore, local::LocalFileSystem, path::Path};
//! use ocra::{ReadThroughCache, memory::InMemoryCache};
//!
//! # let mut rt = Runtime::new().unwrap();
//! # rt.block_on(async {
//! let fs = Arc::new(LocalFileSystem::new());
//! // Use 75% of system memory for cache
//! let memory_cache = Arc::new(
//! InMemoryCache::with_sys_memory(0.75).build());
//! let cached_store: Arc<dyn ObjectStore> =
//! Arc::new(ReadThroughCache::new(fs, memory_cache));
//!
//! // Now you can use `cached_store` as a regular ObjectStore
//! let path = Path::from("my-key");
//! let data = cached_store.get_range(&path, 1024..2048).await.unwrap();
//! # })
//! ```

// pub mod error;
pub mod memory;
pub mod paging;
mod read_through;

// We reuse `object_store` Error and Result to make this crate work well
// with the rest of object_store implementations.
pub use object_store::{Error, Result};

pub use read_through::ReadThroughCache;
Loading