Skip to content

Commit

Permalink
feat(test): add Huggingface behavior test (apache#3712)
Browse files Browse the repository at this point in the history
* feat: add huggingface fs behavior support

* feat: add huggingface scheme to builder

* chore: change root dir

* feat: add huggingface to java/nodejs/python binding

* chore: fix typo

* chore: rename folder

* feat: disable random root of huggingface

* feat: allow StatusCode 206 in read operation

* chore: drop duplicated status code checking

* feat: fix miss set content length from resp body

* chore: remove unused import.

* feat: use relative path in lister
  • Loading branch information
morristai authored Dec 7, 2023
1 parent 989dedb commit bb1d328
Show file tree
Hide file tree
Showing 10 changed files with 57 additions and 13 deletions.
7 changes: 6 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -174,4 +174,9 @@ OPENDAL_B2_ROOT=/path/to/dir
OPENDAL_B2_BUCKET=<bucket>
OPENDAL_B2_BUCKET_ID=<bucket_id>
OPENDAL_B2_APPLICATION_KEY_ID=<key_id>
OPENDAL_B2_APPLICATION_KEY=<application_key>
OPENDAL_B2_APPLICATION_KEY=<application_key>
# huggingface
OPENDAL_HUGGINGFACE_REPO_TYPE=dataset
OPENDAL_HUGGINGFACE_REPO_ID=opendal/huggingface-testdata
OPENDAL_HUGGINGFACE_REVISION=main
OPENDAL_HUGGINGFACE_ROOT=/testdata/
33 changes: 33 additions & 0 deletions .github/services/huggingface/huggingface/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

name: huggingface
description: "Behavior test for Huggingface File System"

runs:
using: "composite"
steps:
- name: Setup
shell: bash
run: |
cat << EOF >> $GITHUB_ENV
OPENDAL_HUGGINGFACE_REPO_TYPE=dataset
OPENDAL_HUGGINGFACE_REPO_ID=opendal/huggingface-testdata
OPENDAL_HUGGINGFACE_REVISION=main
OPENDAL_HUGGINGFACE_ROOT=/testdata/
OPENDAL_DISABLE_RANDOM_ROOT=true
EOF
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ jobs:
# FIXME how to support HDFS services in other platforms?
# services-hdfs
services-http
services-huggingface
services-ipfs
services-ipmfs
services-memcached
Expand Down
2 changes: 2 additions & 0 deletions bindings/java/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ services-all = [
"services-gdrive",
# FIXME how to support HDFS services in bindings?
# "services-hdfs",
"services-huggingface",
"services-ipfs",
"services-memcached",
"services-mini-moka",
Expand Down Expand Up @@ -120,6 +121,7 @@ services-ftp = ["opendal/services-ftp"]
services-gdrive = ["opendal/services-gdrive"]
services-gridfs = ["opendal/services-gridfs"]
services-hdfs = ["opendal/services-hdfs"]
services-huggingface = ["opendal/services-huggingface"]
services-ipfs = ["opendal/services-ipfs"]
services-libsql = ["opendal/services-libsql"]
services-memcached = ["opendal/services-memcached"]
Expand Down
2 changes: 2 additions & 0 deletions bindings/nodejs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ services-all = [
"services-gdrive",
# FIXME how to support HDFS services in bindings?
# "services-hdfs",
"services-huggingface",
"services-ipfs",
"services-memcached",
"services-mini-moka",
Expand Down Expand Up @@ -115,6 +116,7 @@ services-ftp = ["opendal/services-ftp"]
services-gdrive = ["opendal/services-gdrive"]
services-gridfs = ["opendal/services-gridfs"]
services-hdfs = ["opendal/services-hdfs"]
services-huggingface = ["opendal/services-huggingface"]
services-ipfs = ["opendal/services-ipfs"]
services-libsql = ["opendal/services-libsql"]
services-memcached = ["opendal/services-memcached"]
Expand Down
2 changes: 2 additions & 0 deletions bindings/python/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ services-all = [
"services-gdrive",
# FIXME how to support HDFS services in bindings?
# "services-hdfs",
"services-huggingface",
"services-ipfs",
"services-memcached",
"services-mini-moka",
Expand Down Expand Up @@ -114,6 +115,7 @@ services-ftp = ["opendal/services-ftp"]
services-gdrive = ["opendal/services-gdrive"]
services-gridfs = ["opendal/services-gridfs"]
services-hdfs = ["opendal/services-hdfs"]
services-huggingface = ["opendal/services-huggingface"]
services-ipfs = ["opendal/services-ipfs"]
services-libsql = ["opendal/services-libsql"]
services-memcached = ["opendal/services-memcached"]
Expand Down
4 changes: 3 additions & 1 deletion core/src/services/huggingface/backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ impl Accessor for HuggingfaceBackend {
let status = resp.status();

match status {
StatusCode::OK => {
StatusCode::OK | StatusCode::PARTIAL_CONTENT => {
let size = parse_content_length(resp.headers())?;
Ok((RpRead::new().with_size(size), resp.into_body()))
}
Expand Down Expand Up @@ -310,6 +310,8 @@ impl Accessor for HuggingfaceBackend {
)?);
}

meta.set_content_length(status.size);

match status.type_.as_str() {
"directory" => meta.set_mode(EntryMode::DIR),
"file" => meta.set_mode(EntryMode::FILE),
Expand Down
12 changes: 2 additions & 10 deletions core/src/services/huggingface/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,12 @@
use std::fmt::Debug;

use bytes::Bytes;
use http::header;
use http::Request;
use http::Response;
use http::{header, StatusCode};
use serde::Deserialize;

use super::backend::RepoType;
use super::error::parse_error;
use crate::raw::*;
use crate::*;

Expand Down Expand Up @@ -162,14 +161,7 @@ impl HuggingfaceCore {
.body(AsyncBody::Empty)
.map_err(new_request_build_error)?;

let resp = self.client.send(req).await?;

let status = resp.status();

match status {
StatusCode::OK => Ok(resp),
_ => Err(parse_error(resp).await?),
}
self.client.send(req).await
}
}

Expand Down
5 changes: 4 additions & 1 deletion core/src/services/huggingface/lister.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,10 @@ impl oio::PageList for HuggingfaceLister {
status.path.clone()
};

ctx.entries.push_back(oio::Entry::new(&path, meta));
ctx.entries.push_back(oio::Entry::new(
&build_rel_path(&self.core.root, &path),
meta,
));
}

Ok(())
Expand Down
2 changes: 2 additions & 0 deletions core/src/types/operator/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,8 @@ impl Operator {
Scheme::Hdfs => Self::from_map::<services::Hdfs>(map)?.finish(),
#[cfg(feature = "services-http")]
Scheme::Http => Self::from_map::<services::Http>(map)?.finish(),
#[cfg(feature = "services-huggingface")]
Scheme::Huggingface => Self::from_map::<services::Huggingface>(map)?.finish(),
#[cfg(feature = "services-ipfs")]
Scheme::Ipfs => Self::from_map::<services::Ipfs>(map)?.finish(),
#[cfg(feature = "services-ipmfs")]
Expand Down

0 comments on commit bb1d328

Please sign in to comment.