Skip to content

Commit

Permalink
feat(ExcelSheet): added a "visible" attribute (#285)
Browse files Browse the repository at this point in the history
closes #281

Signed-off-by: Luka Peschke <[email protected]>
  • Loading branch information
lukapeschke authored Sep 17, 2024
1 parent 7260b11 commit 21ea958
Show file tree
Hide file tree
Showing 6 changed files with 104 additions and 35 deletions.
6 changes: 6 additions & 0 deletions python/fastexcel/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
DTypeMap: TypeAlias = "dict[str | int, DType]"
ColumnNameFrom: TypeAlias = Literal["provided", "looked_up", "generated"]
DTypeFrom: TypeAlias = Literal["provided_by_index", "provided_by_name", "guessed"]
SheetVisible: TypeAlias = Literal["visible", "hidden", "veryhidden"]


class ExcelSheet:
Expand Down Expand Up @@ -81,6 +82,11 @@ def specified_dtypes(self) -> DTypeMap | None:
"""The dtypes specified for the sheet"""
return self._sheet.specified_dtypes

@property
def visible(self) -> SheetVisible:
"""The visibility of the sheet"""
return self._sheet.visible

def to_arrow(self) -> pa.RecordBatch:
"""Converts the sheet to a pyarrow `RecordBatch`"""
return self._sheet.to_arrow()
Expand Down
4 changes: 4 additions & 0 deletions python/fastexcel/_fastexcel.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ DType = Literal["null", "int", "float", "string", "boolean", "datetime", "date",
DTypeMap = dict[str | int, DType]
ColumnNameFrom = Literal["provided", "looked_up", "generated"]
DTypeFrom = Literal["provided_by_index", "provided_by_name", "guessed"]
SheetVisible = Literal["visible", "hidden", "veryhidden"]

class ColumnInfo:
def __init__(
Expand Down Expand Up @@ -56,6 +57,9 @@ class _ExcelSheet:
@property
def specified_dtypes(self) -> DTypeMap | None:
"""The dtypes specified for the sheet"""
@property
def visible(self) -> SheetVisible:
"""The visibility of the sheet"""
def to_arrow(self) -> pa.RecordBatch:
"""Converts the sheet to a pyarrow `RecordBatch`"""

Expand Down
Binary file not shown.
13 changes: 13 additions & 0 deletions python/tests/test_sheet_visibility.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import fastexcel

from utils import path_for_fixture


def test_sheet_visibilities() -> None:
file_path = path_for_fixture("fixture-sheets-different-visibilities.xlsx")

reader = fastexcel.read_excel(file_path)

assert reader.load_sheet(0).visible == "visible"
assert reader.load_sheet(1).visible == "hidden"
assert reader.load_sheet(2).visible == "veryhidden"
64 changes: 38 additions & 26 deletions src/types/python/excelreader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ use std::{

use arrow::{pyarrow::ToPyArrow, record_batch::RecordBatch};
use calamine::{
open_workbook_auto, open_workbook_auto_from_rs, Data, DataRef, Range, Reader, Sheets,
open_workbook_auto, open_workbook_auto_from_rs, Data, DataRef, Range, Reader,
Sheet as CalamineSheet, Sheets,
};
use pyo3::{prelude::PyObject, pyclass, pymethods, Bound, IntoPy, PyAny, PyResult, Python};

Expand Down Expand Up @@ -44,10 +45,10 @@ impl ExcelSheets {
}

#[allow(dead_code)]
fn sheet_names(&self) -> Vec<String> {
fn sheet_metadata(&self) -> &[CalamineSheet] {
match self {
Self::File(sheets) => sheets.sheet_names(),
Self::Bytes(sheets) => sheets.sheet_names(),
ExcelSheets::File(sheets) => sheets.sheets_metadata(),
ExcelSheets::Bytes(sheets) => sheets.sheets_metadata(),
}
}

Expand All @@ -74,8 +75,7 @@ impl ExcelSheets {
#[pyclass(name = "_ExcelReader")]
pub(crate) struct ExcelReader {
sheets: ExcelSheets,
#[pyo3(get)]
sheet_names: Vec<String>,
sheet_metadata: Vec<CalamineSheet>,
source: String,
}

Expand All @@ -92,10 +92,10 @@ impl ExcelReader {
let sheets = open_workbook_auto(path)
.map_err(|err| FastExcelErrorKind::CalamineError(err).into())
.with_context(|| format!("Could not open workbook at {path}"))?;
let sheet_names = sheets.sheet_names().to_owned();
let sheet_metadata = sheets.sheets_metadata().to_owned();
Ok(Self {
sheets: ExcelSheets::File(sheets),
sheet_names,
sheet_metadata,
source: path.to_owned(),
})
}
Expand Down Expand Up @@ -140,7 +140,7 @@ impl ExcelReader {
#[allow(clippy::too_many_arguments)]
fn build_sheet(
&mut self,
name: String,
sheet_meta: CalamineSheet,
header_row: Option<usize>,
column_names: Option<Vec<String>>,
skip_rows: usize,
Expand All @@ -155,7 +155,10 @@ impl ExcelReader {
let header = Header::new(header_row, column_names);
let selected_columns = Self::build_selected_columns(use_columns).into_pyresult()?;
if eager && self.sheets.supports_by_ref() {
let range = self.sheets.worksheet_range_ref(&name).into_pyresult()?;
let range = self
.sheets
.worksheet_range_ref(&sheet_meta.name)
.into_pyresult()?;
let pagination = Pagination::new(skip_rows, n_rows, &range).into_pyresult()?;
Self::load_sheet_eager(
&range.into(),
Expand All @@ -169,10 +172,13 @@ impl ExcelReader {
.into_pyresult()
.and_then(|rb| rb.to_pyarrow(py))
} else {
let range = self.sheets.worksheet_range(&name).into_pyresult()?;
let range = self
.sheets
.worksheet_range(&sheet_meta.name)
.into_pyresult()?;
let pagination = Pagination::new(skip_rows, n_rows, &range).into_pyresult()?;
let sheet = ExcelSheet::try_new(
name,
sheet_meta,
range.into(),
header,
pagination,
Expand Down Expand Up @@ -200,10 +206,10 @@ impl TryFrom<&[u8]> for ExcelReader {
let sheets = open_workbook_auto_from_rs(cursor)
.map_err(|err| FastExcelErrorKind::CalamineError(err).into())
.with_context(|| "Could not open workbook from bytes")?;
let sheet_names = sheets.sheet_names().to_owned();
let sheet_metadata = sheets.sheets_metadata().to_owned();
Ok(Self {
sheets: ExcelSheets::Bytes(sheets),
sheet_names,
sheet_metadata,
source: "bytes".to_owned(),
})
}
Expand Down Expand Up @@ -243,37 +249,35 @@ impl ExcelReader {
eager: bool,
py: Python<'_>,
) -> PyResult<PyObject> {
let name = idx_or_name
let sheet = idx_or_name
.try_into()
.and_then(|idx_or_name| match idx_or_name {
IdxOrName::Name(name) => {
if self.sheet_names.contains(&name) {
Ok(name)
if let Some(sheet) = self.sheet_metadata.iter().find(|s| s.name == name) {
Ok(sheet)
} else {
Err(FastExcelErrorKind::SheetNotFound(IdxOrName::Name(name.clone())).into()).with_context(|| {
let available_sheets = self.sheet_names.iter().map(|s| format!("\"{s}\"")).collect::<Vec<_>>().join(", ");
let available_sheets = self.sheet_metadata.iter().map(|s| format!("\"{}\"", s.name)).collect::<Vec<_>>().join(", ");
format!(
"Sheet \"{name}\" not found in file. Available sheets: {available_sheets}."
)
})
}
}
IdxOrName::Idx(idx) => self
.sheet_names
.get(idx)
.sheet_metadata .get(idx)
.ok_or_else(|| FastExcelErrorKind::SheetNotFound(IdxOrName::Idx(idx)).into())
.with_context(|| {
format!(
.with_context(|| { format!(
"Sheet index {idx} is out of range. File has {} sheets.",
self.sheet_names.len()
self.sheet_metadata.len()
)
})
.map(ToOwned::to_owned),
,
})
.into_pyresult()?;
.into_pyresult()?.to_owned();

self.build_sheet(
name,
sheet,
header_row,
column_names,
skip_rows,
Expand All @@ -286,4 +290,12 @@ impl ExcelReader {
py,
)
}

#[getter]
pub fn sheet_names(&self) -> Vec<&str> {
self.sheet_metadata
.iter()
.map(|s| s.name.as_str())
.collect()
}
}
52 changes: 43 additions & 9 deletions src/types/python/excelsheet/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
pub(crate) mod column_info;
pub(crate) mod sheet_data;

use calamine::{CellType, Range};
use calamine::{CellType, Range, Sheet as CalamineSheet, SheetVisible as CalamineSheetVisible};
use sheet_data::ExcelSheetData;
use std::{cmp, collections::HashSet, fmt::Debug, str::FromStr, sync::Arc};

Expand Down Expand Up @@ -334,10 +334,28 @@ impl TryFrom<Option<&Bound<'_, PyAny>>> for SelectedColumns {
}
}

#[derive(Clone, Debug)]
struct SheetVisible(CalamineSheetVisible);

impl ToPyObject for &SheetVisible {
fn to_object(&self, py: Python<'_>) -> PyObject {
match self.0 {
CalamineSheetVisible::Visible => "visible".to_object(py),
CalamineSheetVisible::Hidden => "hidden".to_object(py),
CalamineSheetVisible::VeryHidden => "veryhidden".to_object(py),
}
}
}

impl From<CalamineSheetVisible> for SheetVisible {
fn from(value: CalamineSheetVisible) -> Self {
Self(value)
}
}

#[pyclass(name = "_ExcelSheet")]
pub(crate) struct ExcelSheet {
#[pyo3(get)]
pub(crate) name: String,
sheet_meta: CalamineSheet,
header: Header,
pagination: Pagination,
data: ExcelSheetData<'static>,
Expand All @@ -358,7 +376,7 @@ impl ExcelSheet {

#[allow(clippy::too_many_arguments)]
pub(crate) fn try_new(
name: String,
sheet_meta: CalamineSheet,
data: ExcelSheetData<'static>,
header: Header,
pagination: Pagination,
Expand All @@ -370,7 +388,7 @@ impl ExcelSheet {
let available_columns_info =
build_available_columns_info(&data, &selected_columns, &header)?;
let mut sheet = ExcelSheet {
name,
sheet_meta,
header,
pagination,
data,
Expand Down Expand Up @@ -528,7 +546,7 @@ impl TryFrom<&ExcelSheet> for RecordBatch {
(field_name, array, nullable)
}))
.map_err(|err| FastExcelErrorKind::ArrowError(err.to_string()).into())
.with_context(|| format!("could not convert sheet {} to RecordBatch", sheet.name))
.with_context(|| format!("could not convert sheet {} to RecordBatch", sheet.name()))
}
}
}
Expand Down Expand Up @@ -582,24 +600,40 @@ impl ExcelSheet {
self.dtypes.as_ref().map(|dtypes| dtypes.to_object(py))
}

#[getter]
pub fn name(&self) -> &str {
&self.sheet_meta.name
}

#[getter]
pub fn visible<'p>(&'p self, py: Python<'p>) -> PyObject {
let visible: SheetVisible = self.sheet_meta.visible.into();
(&visible).to_object(py)
}

pub fn to_arrow(&self, py: Python<'_>) -> PyResult<PyObject> {
RecordBatch::try_from(self)
.with_context(|| format!("could not create RecordBatch from sheet \"{}\"", &self.name))
.with_context(|| {
format!(
"could not create RecordBatch from sheet \"{}\"",
self.name()
)
})
.and_then(|rb| {
rb.to_pyarrow(py)
.map_err(|err| FastExcelErrorKind::ArrowError(err.to_string()).into())
})
.with_context(|| {
format!(
"could not convert RecordBatch to pyarrow for sheet \"{}\"",
self.name
self.name()
)
})
.into_pyresult()
}

pub fn __repr__(&self) -> String {
format!("ExcelSheet<{}>", self.name)
format!("ExcelSheet<{}>", self.name())
}
}

Expand Down

0 comments on commit 21ea958

Please sign in to comment.