-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: initialize this project #1
Changes from all commits
33aa751
5fc3dbe
e9d5e88
1936912
765d66b
b2333cf
8eec09a
b348979
c59b95b
9982f08
df38009
918e25a
925fc6a
1f468c9
a5ee3f8
27eceeb
0e47986
8c83417
4b7fbf7
2f4fe61
28eada3
a9aacb2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
name: Rust | ||
on: | ||
push: | ||
branches: | ||
- main | ||
pull_request: | ||
|
||
env: | ||
# This env var is used by Swatinem/rust-cache@v2 for the cache | ||
# key, so we set it to make sure it is always consistent. | ||
CARGO_TERM_COLOR: always | ||
# Disable full debug symbol generation to speed up CI build and keep memory down | ||
# "1" means line tables only, which is useful for panic tracebacks. | ||
RUSTFLAGS: "-C debuginfo=1" | ||
RUST_BACKTRACE: "1" | ||
# according to: https://matklad.github.io/2021/09/04/fast-rust-builds.html | ||
# CI builds are faster with incremental disabled. | ||
CARGO_INCREMENTAL: "0" | ||
CARGO_BUILD_JOBS: "1" | ||
|
||
jobs: | ||
lint: | ||
runs-on: ubuntu-24.04 | ||
timeout-minutes: 30 | ||
steps: | ||
- uses: actions/checkout@v4 | ||
- uses: actions-rust-lang/setup-rust-toolchain@v1 | ||
with: | ||
toolchain: 1.83 | ||
components: rustfmt, clippy | ||
- uses: Swatinem/rust-cache@v2 | ||
- name: Check formatting | ||
run: cargo fmt -- --check | ||
- name: Check clippy | ||
run: cargo clippy --tests --benches -- -D warnings | ||
test: | ||
strategy: | ||
matrix: | ||
machine: | ||
- ubuntu-24.04 | ||
# - ubuntu-2404-4x-arm64 | ||
- macos-14 | ||
runs-on: ${{ matrix.machine }} | ||
steps: | ||
- uses: actions/checkout@v4 | ||
- uses: actions-rust-lang/setup-rust-toolchain@v1 | ||
with: | ||
toolchain: 1.83 | ||
- uses: rui314/setup-mold@v1 | ||
- uses: Swatinem/rust-cache@v2 | ||
- name: Run tests | ||
run: cargo test |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
[package] | ||
name = "ocra" | ||
version = "0.1.0" | ||
authors = ["[email protected]"] | ||
description = "OCRA: A Rust implementation of Cache in arrow-rs' ObjectStore interface" | ||
edition = "2021" | ||
license-file = "LICENSE" | ||
keywords = ["cache", "object-store", "arrow"] | ||
categories = ["caching"] | ||
|
||
[dependencies] | ||
async-trait = "0.1" | ||
bytes = "~1.9" | ||
futures = "0.3" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
log = "~0.4" | ||
moka = { version = "~0.12", features = ["future"] } | ||
num_cpus = "1.16" | ||
object_store = "~0.11" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we should have a range here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is |
||
sysinfo = "~0.32" | ||
tokio = { version = "1", features = ["sync"] } | ||
|
||
[dev-dependencies] | ||
criterion = { version = "~0.5", features = ["async_tokio"] } | ||
tempfile = "~3.14" | ||
tokio = { version = "1", features = ["full"] } | ||
rand = "~0.8" | ||
|
||
[[bench]] | ||
name = "memory" | ||
harness = false |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
# ocra | ||
OCRA: Object-store Cache in Rust for All | ||
# OCRA | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we need an okra emoji lol |
||
**OCRA**: (**A**) (**R**)ust (**C**)ache implementation using _arrow-rs_ [(**O**)bjectStore](https://docs.rs/object_store/latest/object_store/) trait. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
//! Benchmark for in-memory page cache. | ||
//! | ||
//! | ||
|
||
use std::{fs::File, io::Write, sync::Arc}; | ||
|
||
use criterion::{criterion_group, criterion_main, Criterion}; | ||
use object_store::{path::Path, ObjectStore}; | ||
use rand::Rng; | ||
|
||
use ocra::{memory::InMemoryCache, paging::PageCache}; | ||
|
||
fn memory_cache_bench(c: &mut Criterion) { | ||
let rt = tokio::runtime::Runtime::new().unwrap(); | ||
let mut rng = rand::thread_rng(); | ||
|
||
const FILE_SIZE: usize = 1024 * 1024 * 1024; | ||
// TODO: support other object stores later | ||
let store: Arc<dyn ObjectStore> = Arc::new(object_store::local::LocalFileSystem::new()); | ||
let temp_file = tempfile::NamedTempFile::new().unwrap().into_temp_path(); | ||
{ | ||
let mut writer = File::create(temp_file.to_str().unwrap()).unwrap(); | ||
let mut buf = vec![0_u8; 128 * 1024]; | ||
|
||
for _ in 0..FILE_SIZE / (128 * 1024) { | ||
rng.fill(&mut buf[..]); | ||
writer.write_all(&buf).unwrap(); | ||
} | ||
} | ||
|
||
for page_size in &[1024 * 1024, 8 * 1024 * 1024] { | ||
let cache = Arc::new(InMemoryCache::new(FILE_SIZE + 32 * 1024, *page_size)); | ||
let location = Path::from(temp_file.to_str().unwrap()); | ||
|
||
// Warm up the cache | ||
println!("Starting warm up cache with page size: {}", page_size); | ||
rt.block_on(async { | ||
let loc = location.clone(); | ||
for i in 0..FILE_SIZE / page_size { | ||
let data = cache | ||
.get_with(&loc, i as u32, { | ||
let store = store.clone(); | ||
let location = loc.clone(); | ||
async move { | ||
store | ||
.get_range(&location, i * page_size..(i + 1) * page_size) | ||
.await | ||
} | ||
}) | ||
.await | ||
.unwrap(); | ||
assert!(!data.is_empty()); | ||
} | ||
}); | ||
println!("Warm up cache done"); | ||
|
||
c.bench_function( | ||
format!("memory_cache,warm,page_size={}", page_size).as_str(), | ||
|b| { | ||
b.to_async(&rt).iter(|| { | ||
let mut rng = rand::thread_rng(); | ||
let cache = cache.clone(); | ||
let loc = location.clone(); | ||
async move { | ||
let page_id = rng.gen_range(0..FILE_SIZE / page_size); | ||
|
||
let _data = cache | ||
.get_with(&loc, page_id as u32, async { | ||
panic!("Should not be called page_id={}", page_id) | ||
}) | ||
.await | ||
.unwrap(); | ||
} | ||
}) | ||
}, | ||
); | ||
} | ||
} | ||
|
||
criterion_group!( | ||
name=benches; | ||
config = Criterion::default().significance_level(0.1).sample_size(10); | ||
targets = memory_cache_bench); | ||
|
||
criterion_main!(benches); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
//! **OCRA**: (**A**) (**R**)ust (**C**)ache implementation for *arrow-rs* | ||
//! [(**O**)bjectStore](object_store::ObjectStore). | ||
//! | ||
//! It offers a few `ObjectStore` implementations that work with | ||
//! caches. | ||
//! | ||
//! For example, you can use [`ReadThroughCache`] to wrap an existing | ||
//! `ObjectStore` instance with a [`PageCache`](paging::PageCache). | ||
//! | ||
//! ```no_run | ||
//! # use std::sync::Arc; | ||
//! # use tokio::runtime::Runtime; | ||
//! use object_store::{ObjectStore, local::LocalFileSystem, path::Path}; | ||
//! use ocra::{ReadThroughCache, memory::InMemoryCache}; | ||
//! | ||
//! # let mut rt = Runtime::new().unwrap(); | ||
//! # rt.block_on(async { | ||
//! let fs = Arc::new(LocalFileSystem::new()); | ||
//! // Use 75% of system memory for cache | ||
//! let memory_cache = Arc::new( | ||
//! InMemoryCache::with_sys_memory(0.75).build()); | ||
//! let cached_store: Arc<dyn ObjectStore> = | ||
//! Arc::new(ReadThroughCache::new(fs, memory_cache)); | ||
//! | ||
//! // Now you can use `cached_store` as a regular ObjectStore | ||
//! let path = Path::from("my-key"); | ||
//! let data = cached_store.get_range(&path, 1024..2048).await.unwrap(); | ||
//! # }) | ||
//! ``` | ||
|
||
// pub mod error; | ||
pub mod memory; | ||
pub mod paging; | ||
mod read_through; | ||
|
||
// We reuse `object_store` Error and Result to make this crate work well | ||
// with the rest of object_store implementations. | ||
pub use object_store::{Error, Result}; | ||
|
||
pub use read_through::ReadThroughCache; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
~