Skip to content

Commit

Permalink
Merge branch 'main' into LanceSparkId
Browse files Browse the repository at this point in the history
  • Loading branch information
SaintBacchus authored Jan 13, 2025
2 parents 7cba14f + 2142594 commit 6df2f49
Show file tree
Hide file tree
Showing 23 changed files with 239 additions and 56 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/cargo-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ on:
workflow_dispatch:
inputs:
tag:
description: 'Tag to publish (e.g., v1.0.0)'
description: "Tag to publish (e.g., v1.0.0)"
required: true
type: string

Expand All @@ -24,7 +24,7 @@ jobs:
env:
# Need up-to-date compilers for kernels
CC: clang-18
CXX: clang-18
CXX: clang++-18
defaults:
run:
working-directory: .
Expand Down Expand Up @@ -53,5 +53,5 @@ jobs:
- uses: albertlockett/[email protected]
with:
registry-token: ${{ secrets.CARGO_REGISTRY_TOKEN }}
args: '--all-features'
args: "--all-features"
path: .
2 changes: 1 addition & 1 deletion .github/workflows/ci-benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
env:
# Need up-to-date compilers for kernels
CC: clang-18
CXX: clang-18
CXX: clang++-18
defaults:
run:
shell: bash
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
env:
# Need up-to-date compilers for kernels
CC: clang-18
CXX: clang-18
CXX: clang++-18
steps:
- uses: actions/checkout@v4
with:
Expand Down Expand Up @@ -67,8 +67,9 @@ jobs:
sudo apt install -y protobuf-compiler libssl-dev
- name: Lint Rust
run: |
ALL_FEATURES=`cargo metadata --format-version=1 --no-deps | jq -r '.packages[] | .features | keys | .[]' | grep -v protoc | sort | uniq | paste -s -d "," -`
cargo fmt --all -- --check
cargo clippy --locked --all-features --tests -- -D warnings
cargo clippy --locked --features ${ALL_FEATURES} --tests -- -D warnings
- name: Build
run: |
python -m venv venv
Expand Down
43 changes: 29 additions & 14 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,9 @@ jobs:
sudo apt install -y protobuf-compiler libssl-dev
- name: Run clippy
run: |
ALL_FEATURES=`cargo metadata --format-version=1 --no-deps | jq -r '.packages[] | .features | keys | .[]' | grep -v protoc | sort | uniq | paste -s -d "," -`
cargo clippy --version
cargo clippy --locked --all-features --tests --benches -- -D warnings
cargo clippy --locked --features ${ALL_FEATURES} --tests --benches -- -D warnings
linux-build:
runs-on: "ubuntu-24.04"
timeout-minutes: 45
Expand All @@ -59,7 +60,7 @@ jobs:
env:
# Need up-to-date compilers for kernels
CC: clang
CXX: clang
CXX: clang++
steps:
- uses: actions/checkout@v4
# pin the toolchain version to avoid surprises
Expand All @@ -81,13 +82,18 @@ jobs:
- name: Run tests
if: ${{ matrix.toolchain == 'stable' }}
run: |
cargo llvm-cov --locked --workspace --codecov --output-path coverage.codecov --all-features
ALL_FEATURES=`cargo metadata --format-version=1 --no-deps | jq -r '.packages[] | .features | keys | .[]' | grep -v protoc | sort | uniq | paste -s -d "," -`
cargo llvm-cov --locked --workspace --codecov --output-path coverage.codecov --features ${ALL_FEATURES}
- name: Build tests (nightly)
run: cargo test --locked --all-features --workspace --no-run
if: ${{ matrix.toolchain != 'stable' }}
run: |
ALL_FEATURES=`cargo metadata --format-version=1 --no-deps | jq -r '.packages[] | .features | keys | .[]' | grep -v protoc | sort | uniq | paste -s -d "," -`
cargo test --locked --features ${ALL_FEATURES} --workspace --no-run
- name: Run tests (nightly)
if: ${{ matrix.toolchain != 'stable' }}
run: |
cargo test --all-features --workspace
ALL_FEATURES=`cargo metadata --format-version=1 --no-deps | jq -r '.packages[] | .features | keys | .[]' | grep -v protoc | sort | uniq | paste -s -d "," -`
cargo test --features ${ALL_FEATURES} --workspace
- name: Upload coverage to Codecov
if: ${{ matrix.toolchain == 'stable' }}
uses: codecov/codecov-action@v4
Expand All @@ -113,20 +119,22 @@ jobs:
sudo apt install -y protobuf-compiler libssl-dev pkg-config
- name: Build tests
run: |
cargo test --locked --all-features --no-run
ALL_FEATURES=`cargo metadata --format-version=1 --no-deps | jq -r '.packages[] | .features | keys | .[]' | grep -v protoc | sort | uniq | paste -s -d "," -`
cargo test --locked --features ${ALL_FEATURES} --no-run
- name: Start DynamoDB local for tests
run: |
docker run -d -e AWS_ACCESS_KEY_ID=DUMMYKEY -e AWS_SECRET_ACCESS_KEY=DUMMYKEY -p 8000:8000 amazon/dynamodb-local
- name: Run tests
run: |
cargo test --locked --all-features
ALL_FEATURES=`cargo metadata --format-version=1 --no-deps | jq -r '.packages[] | .features | keys | .[]' | grep -v protoc | sort | uniq | paste -s -d "," -`
cargo test --locked --features ${ALL_FEATURES}
build-no-lock:
runs-on: ubuntu-24.04
timeout-minutes: 30
env:
# Need up-to-date compilers for kernels
CC: clang
CXX: clang
CXX: clang++
steps:
- uses: actions/checkout@v4
# Remote cargo.lock to force a fresh build
Expand All @@ -139,7 +147,9 @@ jobs:
sudo apt update
sudo apt install -y protobuf-compiler libssl-dev
- name: Build all
run: cargo build --benches --all-features --tests
run: |
ALL_FEATURES=`cargo metadata --format-version=1 --no-deps | jq -r '.packages[] | .features | keys | .[]' | grep -v protoc | sort | uniq | paste -s -d "," -`
cargo build --benches --features ${ALL_FEATURES} --tests
mac-build:
runs-on: "macos-14"
timeout-minutes: 45
Expand All @@ -165,11 +175,14 @@ jobs:
run: |
rustup update ${{ matrix.toolchain }} && rustup default ${{ matrix.toolchain }}
- name: Build tests
run: cargo test --locked --all-features --no-run
run: |
cargo test --locked --features fp16kernels,cli,tensorflow,dynamodb,substrait --no-run
- name: Run tests
run: cargo test --all-features
run: |
cargo test --features fp16kernels,cli,tensorflow,dynamodb,substrait
- name: Check benchmarks
run: cargo check --benches --all-features
run: |
cargo check --benches --features fp16kernels,cli,tensorflow,dynamodb,substrait
windows-build:
runs-on: windows-latest
defaults:
Expand Down Expand Up @@ -203,7 +216,7 @@ jobs:
env:
# Need up-to-date compilers for kernels
CC: clang
CXX: clang
CXX: clang++
steps:
- uses: actions/checkout@v4
with:
Expand All @@ -218,4 +231,6 @@ jobs:
with:
toolchain: ${{ matrix.msrv }}
- name: cargo +${{ matrix.msrv }} check
run: cargo check --workspace --tests --benches --all-features
run: |
ALL_FEATURES=`cargo metadata --format-version=1 --no-deps | jq -r '.packages[] | .features | keys | .[]' | grep -v protoc | sort | uniq | paste -s -d "," -`
cargo check --workspace --tests --benches --features ${ALL_FEATURES}
25 changes: 23 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 0 additions & 2 deletions python/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 6 additions & 18 deletions python/python/lance/ray/sink.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,23 +46,8 @@ def _pd_to_arrow(
tbl.schema = tbl.schema.remove_metadata()
return tbl
elif isinstance(df, pa.Table):
fields = df.schema.names
new_columns = []
new_fields = []
for field in fields:
col = df[field]
new_field = pa.field(field, col.type)
if (
pa.types.is_null(col.type)
and schema.field_by_name(field).type == pa.string()
):
new_field = pa.field(field, pa.string())
col = pa.compute.if_else(pa.compute.is_null(col), NONE_ARROW_STR, col)
new_columns.append(col)
new_fields.append(new_field)
new_schema = pa.schema(fields=new_fields)
new_table = pa.Table.from_arrays(new_columns, schema=new_schema)
return new_table
if schema is not None:
return df.cast(schema)
return df


Expand Down Expand Up @@ -439,6 +424,7 @@ def write_lance(
output_uri: str,
*,
schema: Optional[pa.Schema] = None,
mode: Literal["create", "append", "overwrite"] = "create",
transform: Optional[
Callable[[pa.Table], Union[pa.Table, Generator[None, pa.Table, None]]]
] = None,
Expand Down Expand Up @@ -485,7 +471,9 @@ def write_lance(
),
batch_size=max_rows_per_file,
).write_datasink(
LanceCommitter(output_uri, schema=schema, storage_options=storage_options)
LanceCommitter(
output_uri, schema=schema, mode=mode, storage_options=storage_options
)
)


Expand Down
23 changes: 23 additions & 0 deletions python/python/tests/test_ray.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,3 +138,26 @@ def f(row):
assert len(pylist) == 10
for item in pylist:
assert item is None


@pytest.mark.filterwarnings("ignore::DeprecationWarning")
def test_ray_write_lance_none_str_datasink(tmp_path: Path):
def f(row):
return {
"id": row["id"],
"str": None,
}

schema = pa.schema([pa.field("id", pa.int64()), pa.field("str", pa.string())])

sink = LanceDatasink(tmp_path, schema=schema)
(ray.data.range(10).map(f).write_datasink(sink))
ds = lance.dataset(tmp_path)
ds.count_rows() == 10
assert ds.schema == schema

tbl = ds.to_table()
pylist = tbl["str"].to_pylist()
assert len(pylist) == 10
for item in pylist:
assert item is None
8 changes: 8 additions & 0 deletions rust/lance-encoding-datafusion/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,17 @@ lance-datagen.workspace = true

[build-dependencies]
prost-build.workspace = true
protobuf-src = { version = "2.1", optional = true }

[target.'cfg(target_os = "linux")'.dev-dependencies]
pprof = { workspace = true }

[features]
protoc = ["dep:protobuf-src"]

[package.metadata.docs.rs]
# docs.rs uses an older version of Ubuntu that does not have the necessary protoc version
features = ["protoc"]

[lints]
workspace = true
4 changes: 4 additions & 0 deletions rust/lance-encoding-datafusion/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ use std::io::Result;
fn main() -> Result<()> {
println!("cargo:rerun-if-changed=protos");

#[cfg(feature = "protoc")]
// Use vendored protobuf compiler if requested.
std::env::set_var("PROTOC", protobuf_src::protoc());

let mut prost_build = prost_build::Config::new();
prost_build.extern_path(".lance.encodings", "::lance_encoding::format::pb");
prost_build.protoc_arg("--experimental_allow_proto3_optional");
Expand Down
8 changes: 8 additions & 0 deletions rust/lance-encoding/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,18 @@ rand_xoshiro = "0.6.0"

[build-dependencies]
prost-build.workspace = true
protobuf-src = { version = "2.1", optional = true }

[target.'cfg(target_os = "linux")'.dev-dependencies]
pprof = { workspace = true }

[features]
protoc = ["dep:protobuf-src"]

[package.metadata.docs.rs]
# docs.rs uses an older version of Ubuntu that does not have the necessary protoc version
features = ["protoc"]

[[bench]]
name = "decoder"
harness = false
Expand Down
Loading

0 comments on commit 6df2f49

Please sign in to comment.