Skip to content

Commit

Permalink
[misc] read all formula attributes (#1250)
Browse files Browse the repository at this point in the history
* [load] add todo for more formula attributes

* [misc] formula attributes to `f_attr`

This should allow reading all attributes, even currently unhandled. This requires working with this combined string when reading or writing formulas

* [misc] indent

* [read] check first for shared string

* [doc] roxygenize
  • Loading branch information
JanMarvin authored Jan 26, 2025
1 parent 840ba70 commit 93fcb9f
Show file tree
Hide file tree
Showing 17 changed files with 207 additions and 203 deletions.
2 changes: 1 addition & 1 deletion R/class-sheet-data.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ wb_sheet_data <- function() {
empty_sheet_data_cc <- function(n) {
create_char_dataframe(
colnames = c("r", "row_r", "c_r", "c_s", "c_t", "c_cm", "c_ph", "c_vm",
"v", "f", "f_t", "f_ref", "f_ca", "f_si", "is", "typ"),
"v", "f", "f_attr", "is", "typ"),
n = n
)
}
Expand Down
4 changes: 2 additions & 2 deletions R/class-workbook.R
Original file line number Diff line number Diff line change
Expand Up @@ -4042,7 +4042,7 @@ wbWorkbook <- R6::R6Class(

if (as_ref) {
from_sheet_name <- self$get_sheet_names(escape = TRUE)[[from_sheet]]
to_cc[c("c_t", "c_cm", "c_ph", "c_vm", "v", "f", "f_t", "f_ref", "f_ca", "f_si", "is")] <- ""
to_cc[c("c_t", "c_cm", "c_ph", "c_vm", "v", "f", "f_attr", "is")] <- ""
to_cc[c("f")] <- paste0(shQuote(from_sheet_name, type = "sh"), "!", from_dims)
}

Expand Down Expand Up @@ -9803,7 +9803,7 @@ wbWorkbook <- R6::R6Class(
ws$sheet_data$row_attr <- rows_attr[order(as.numeric(rows_attr[, "r"])), ]

cc_rows <- ws$sheet_data$row_attr$r
cc_out <- cc[cc$row_r %in% cc_rows, c("row_r", "c_r", "r", "v", "c_t", "c_s", "c_cm", "c_ph", "c_vm", "f", "f_t", "f_ref", "f_ca", "f_si", "is")]
cc_out <- cc[cc$row_r %in% cc_rows, c("row_r", "c_r", "r", "v", "c_t", "c_s", "c_cm", "c_ph", "c_vm", "f", "f_attr", "is")]

ws$sheet_data$cc_out <- cc_out[order(as.integer(cc_out[, "row_r"]), col2int(cc_out[, "c_r"])), ]
} else {
Expand Down
4 changes: 2 additions & 2 deletions R/class-worksheet.R
Original file line number Diff line number Diff line change
Expand Up @@ -634,11 +634,11 @@ wbWorksheet <- R6::R6Class(

if (numbers)
cc[sel & cc$c_t %in% c("b", "e", "n", ""),
c("c_t", "v", "f", "f_t", "f_ref", "f_ca", "f_si", "is")] <- ""
c("c_t", "v", "f", "f_attr", "is")] <- ""

if (characters)
cc[sel & cc$c_t %in% c("inlineStr", "s", "str"),
c("c_t", "c_ph", "v", "f", "f_t", "f_ref", "f_ca", "f_si", "is")] <- ""
c("c_t", "c_ph", "v", "f", "f_attr", "is")] <- ""

if (styles)
cc[sel, c("c_s", "c_cm", "c_vm")] <- ""
Expand Down
15 changes: 9 additions & 6 deletions R/helper-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -1345,6 +1345,10 @@ wb_upd_custom_pid <- function(wb) {
#' @param cc_shared a subset of the full frame with shared formulas
#' @noRd
shared_as_fml <- function(cc, cc_shared) {

ff <- rbindlist(xml_attr(paste0("<f ", cc_shared$f_attr, "/>"), "f"))
cc_shared$f_si <- ff$si

cc_shared <- cc_shared[order(as.integer(cc_shared$f_si)), ]

# carry forward the shared formula
Expand All @@ -1362,12 +1366,11 @@ shared_as_fml <- function(cc, cc_shared) {
repls[[i]] <- next_cell(cells[[i]], cc_shared$cols[i], cc_shared$rows[i])
}

cc_shared$f <- replace_a1_notation(cc_shared$f, repls)
cc_shared$cols <- NULL
cc_shared$rows <- NULL
cc_shared$f_t <- ""
cc_shared$f_si <- ""
cc_shared$f_ref <- ""
cc_shared$f <- replace_a1_notation(cc_shared$f, repls)
cc_shared$cols <- NULL
cc_shared$rows <- NULL
cc_shared$f_attr <- ""
cc_shared$f_si <- NULL

# reduce and assign
cc_shared <- cc_shared[which(cc_shared$r %in% cc$r), ]
Expand Down
6 changes: 4 additions & 2 deletions R/read.R
Original file line number Diff line number Diff line change
Expand Up @@ -440,12 +440,14 @@ wb_to_df <- function(

if (show_formula) {

if (any(cc$f_t == "shared")) {
if (any(grepl("shared", cc$f_attr))) {

f_t <- rbindlist(xml_attr(paste0("<f ", cc$f_attr, "/>"), "f"))$t
# depending on the sheet, this might require updates to many cells
# TODO reduce this to cells, that are part of `cc`. Currently we
# might waste time, updating cells that are not visible to the user
cc_shared <- wb$worksheets[[sheet]]$sheet_data$cc
cc_shared <- cc_shared[cc_shared$f_t == "shared", ]
cc_shared <- cc_shared[f_t == "shared", ]
cc <- shared_as_fml(cc, cc_shared)
}

Expand Down
21 changes: 14 additions & 7 deletions R/write.R
Original file line number Diff line number Diff line change
Expand Up @@ -97,17 +97,18 @@ inner_update <- function(
}

replacement <- c("r", cell_style, "c_t", "c_cm", "c_ph", "c_vm", "v",
"f", "f_t", "f_ref", "f_ca", "f_si", "is", "typ")
"f", "f_attr", "is", "typ")

sel <- match(x$r, cc$r)

# to avoid bricking the worksheet, we make sure that we do not overwrite the
# reference cell of a shared formula. To be on the save side, we replace all
# values with the formula. If the entire cc is replaced with x, we can skip.
if (length(sf <- cc$f_si[sel & cc$f_t[sel] == "shared" & cc$f_ref[sel] != ""]) && !all(cc$r %in% x$r)) {
ff <- rbindlist(xml_attr(paste0("<f ", cc$f_attr, "/>"), "f"))
if (length(sf <- ff$si[sel & ff$t[sel] == "shared" & ff$ref[sel] != ""]) && !all(cc$r %in% x$r)) {

# collect all the shared formulas that we have to convert
sel_fsi <- cc$f_si %in% unique(sf)
sel_fsi <- ff$si %in% unique(sf)

cc_shared <- cc[sel_fsi, , drop = FALSE]

Expand Down Expand Up @@ -508,7 +509,11 @@ write_data2 <- function(
## only the reference cell has a formula
## only the reference cell has the formula reference

uni_si <- unique(wb$worksheets[[sheetno]]$sheet_data$cc$f_si)

uni_attrs <- unique(wb$worksheets[[sheetno]]$sheet_data$cc$f_attr)
f_xml <- paste0("<f ", uni_attrs, "/>")
uni_si <- unique(rbindlist(xml_attr(f_xml, "f"))$si)

int_si <- as.integer(
replace(
uni_si,
Expand All @@ -517,10 +522,12 @@ write_data2 <- function(
)
)

cc$f_t <- "shared"
cc[1, "f_ref"] <- dims
int_si <- max(int_si, -1L) + 1L

cc$f_attr <- sprintf("t=\"%s\"", "shared")
cc[1, "f_attr"] <- paste(cc[1, "f_attr"], sprintf("ref=\"%s\"", dims))
cc[2:nrow(cc), "f"] <- ""
cc$f_si <- max(int_si, -1L) + 1L
cc$f_attr <- paste(cc$f_attr, sprintf("si=\"%s\"", int_si))
}

if (is.null(wb$worksheets[[sheetno]]$sheet_data$cc)) {
Expand Down
2 changes: 1 addition & 1 deletion man/wb_dims.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

39 changes: 20 additions & 19 deletions src/helper_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,7 @@ void wide_to_long(
for (size_t i = 0; i < static_cast<size_t>(m); ++i) {
scols[i] = int_to_col(static_cast<size_t>(start_col) + i);
}
std::string f_attr;

bool has_refs = refed.isNotNull();

Expand All @@ -398,25 +399,23 @@ void wide_to_long(

// pointer magic. even though these are extracted, they just point to the
// memory in the data frame
Rcpp::CharacterVector zz_row_r = Rcpp::as<Rcpp::CharacterVector>(zz["row_r"]);
Rcpp::CharacterVector zz_c_cm = Rcpp::as<Rcpp::CharacterVector>(zz["c_cm"]);
Rcpp::CharacterVector zz_c_r = Rcpp::as<Rcpp::CharacterVector>(zz["c_r"]);
Rcpp::CharacterVector zz_v = Rcpp::as<Rcpp::CharacterVector>(zz["v"]);
Rcpp::CharacterVector zz_c_t = Rcpp::as<Rcpp::CharacterVector>(zz["c_t"]);
Rcpp::CharacterVector zz_is = Rcpp::as<Rcpp::CharacterVector>(zz["is"]);
Rcpp::CharacterVector zz_f = Rcpp::as<Rcpp::CharacterVector>(zz["f"]);
Rcpp::CharacterVector zz_f_t = Rcpp::as<Rcpp::CharacterVector>(zz["f_t"]);
Rcpp::CharacterVector zz_f_ref = Rcpp::as<Rcpp::CharacterVector>(zz["f_ref"]);
Rcpp::CharacterVector zz_typ = Rcpp::as<Rcpp::CharacterVector>(zz["typ"]);
Rcpp::CharacterVector zz_r = Rcpp::as<Rcpp::CharacterVector>(zz["r"]);
Rcpp::CharacterVector zz_row_r = Rcpp::as<Rcpp::CharacterVector>(zz["row_r"]);
Rcpp::CharacterVector zz_c_cm = Rcpp::as<Rcpp::CharacterVector>(zz["c_cm"]);
Rcpp::CharacterVector zz_c_r = Rcpp::as<Rcpp::CharacterVector>(zz["c_r"]);
Rcpp::CharacterVector zz_v = Rcpp::as<Rcpp::CharacterVector>(zz["v"]);
Rcpp::CharacterVector zz_c_t = Rcpp::as<Rcpp::CharacterVector>(zz["c_t"]);
Rcpp::CharacterVector zz_is = Rcpp::as<Rcpp::CharacterVector>(zz["is"]);
Rcpp::CharacterVector zz_f = Rcpp::as<Rcpp::CharacterVector>(zz["f"]);
Rcpp::CharacterVector zz_f_attr = Rcpp::as<Rcpp::CharacterVector>(zz["f_attr"]);
Rcpp::CharacterVector zz_typ = Rcpp::as<Rcpp::CharacterVector>(zz["typ"]);
Rcpp::CharacterVector zz_r = Rcpp::as<Rcpp::CharacterVector>(zz["r"]);

// Convert na_strings only once outside the loop.
na_strings = inline_strings ? txt_to_is(na_strings, 0, 1, 1) : txt_to_si(na_strings, 0, 1, 1);

R_xlen_t idx = 0;

SEXP blank_sexp = Rf_mkChar("");
SEXP array_sexp = Rf_mkChar("array");
SEXP inlineStr_sexp = Rf_mkChar("inlineStr");
SEXP bool_sexp = Rf_mkChar("b");
SEXP expr_sexp = Rf_mkChar("e");
Expand Down Expand Up @@ -506,16 +505,18 @@ void wide_to_long(
break;
case array_formula:
// f, f_t = "array", and f_ref
SET_STRING_ELT(zz_f, pos, vals_sexp);
SET_STRING_ELT(zz_f_t, pos, array_sexp);
SET_STRING_ELT(zz_f_ref, pos, Rf_mkChar(ref_str.c_str()));
f_attr = "t=\"array\" ref=\"" + ref_str + "\"";

SET_STRING_ELT(zz_f, pos, vals_sexp);
SET_STRING_ELT(zz_f_attr, pos, Rf_mkChar(f_attr.c_str()));
break;
case cm_formula:
// c_cm, f, f_t = "array", and f_ref
SET_STRING_ELT(zz_c_cm, pos, Rf_mkChar(c_cm.c_str()));
SET_STRING_ELT(zz_f, pos, vals_sexp);
SET_STRING_ELT(zz_f_t, pos, array_sexp);
SET_STRING_ELT(zz_f_ref, pos, Rf_mkChar(ref_str.c_str()));
f_attr = "t=\"array\" ref=\"" + ref_str + "\"";

SET_STRING_ELT(zz_c_cm, pos, Rf_mkChar(c_cm.c_str()));
SET_STRING_ELT(zz_f, pos, vals_sexp);
SET_STRING_ELT(zz_f_attr, pos, Rf_mkChar(f_attr.c_str()));
break;
}

Expand Down
27 changes: 8 additions & 19 deletions src/load_workbook.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,13 +171,10 @@ void loadvals(Rcpp::Environment sheet_data, XPtrXML doc) {
const std::string s_str = "s";
const std::string t_str = "t";
const std::string v_str = "v";
const std::string ca_str = "ca";
const std::string cm_str = "cm";
const std::string is_str = "is";
const std::string ph_str = "ph";
const std::string si_str = "si";
const std::string vm_str = "vm";
const std::string ref_str = "ref";

/*****************************************************************************
* Row information is returned as list of lists returning as much as possible.
Expand Down Expand Up @@ -256,25 +253,17 @@ void loadvals(Rcpp::Environment sheet_data, XPtrXML doc) {
single_xml_col.is = oss.str();
} // </is>

// <f>
if (val_name == f_str) {

if (val_name == f_str) { // <f>
// Store the content of <f> as single_xml_col.f
single_xml_col.f = val.text().get();

// additional attributes to <f>
// This currently handles
// * t=
// * ref=
// * ca=
// * si=
for (auto cattr : val.attributes()) {
buffer = cattr.value();
cattr_name = cattr.name();
if (cattr_name == t_str) single_xml_col.f_t = buffer;
if (cattr_name == ref_str) single_xml_col.f_ref = buffer;
if (cattr_name == ca_str) single_xml_col.f_ca = buffer;
if (cattr_name == si_str) single_xml_col.f_si = buffer;
// Serialize the attributes of <f> as single_xml_col.f_attr
std::ostringstream attr_stream;
for (auto f_attr : val.attributes()) {
attr_stream << f_attr.name() << "=\"" << f_attr.value() << "\" ";
}

single_xml_col.f_attr = attr_stream.str();
} // </f>

// <v>
Expand Down
82 changes: 35 additions & 47 deletions src/openxlsx2_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,16 @@
typedef struct {
std::string r;
std::string row_r;
std::string c_r; // CellReference
std::string c_s; // StyleIndex
std::string c_t; // DataType
std::string c_cm; // CellMetaIndex
std::string c_ph; // ShowPhonetic
std::string c_vm; // ValueMetaIndex
std::string v; // CellValue
std::string f; // CellFormula
std::string f_t;
std::string f_ref;
std::string f_ca;
std::string f_si;
std::string is; // inlineStr
std::string c_r; // CellReference
std::string c_s; // StyleIndex
std::string c_t; // DataType
std::string c_cm; // CellMetaIndex
std::string c_ph; // ShowPhonetic
std::string c_vm; // ValueMetaIndex
std::string v; // CellValue
std::string f; // CellFormula
std::string f_attr;
std::string is; // inlineStr
} xml_col;

typedef std::vector<std::string> vec_string;
Expand Down Expand Up @@ -77,57 +74,48 @@ inline SEXP wrap(const std::vector<xml_col>& x) {

Rcpp::CharacterVector v(no_init(n)); // <v> tag
Rcpp::CharacterVector f(no_init(n)); // <f> tag
Rcpp::CharacterVector f_t(no_init(n)); // <f t=""> attribute most likely shared
Rcpp::CharacterVector f_ref(no_init(n)); // <f ref=""> attribute most likely reference
Rcpp::CharacterVector f_ca(no_init(n)); // <f ca=""> attribute most likely conditional formatting
Rcpp::CharacterVector f_si(no_init(n)); // <f si=""> attribute most likely sharedString
Rcpp::CharacterVector f_attr(no_init(n)); // <f /> attributes
Rcpp::CharacterVector is(no_init(n)); // <is> tag

// struct to vector
// We have to convert utf8 inputs via Rcpp::String for non unicode R sessions
// Ideally there would be a function that calls Rcpp::String only if needed
for (R_xlen_t i = 0; i < n; ++i) {
size_t ii = static_cast<size_t>(i);
if (!x[ii].r.empty()) r[i] = std::string(x[ii].r);
if (!x[ii].row_r.empty()) row_r[i] = std::string(x[ii].row_r);
if (!x[ii].c_r.empty()) c_r[i] = std::string(x[ii].c_r);
if (!x[ii].c_s.empty()) c_s[i] = std::string(x[ii].c_s);
if (!x[ii].c_t.empty()) c_t[i] = std::string(x[ii].c_t);
if (!x[ii].c_cm.empty()) c_cm[i] = std::string(x[ii].c_cm);
if (!x[ii].c_ph.empty()) c_ph[i] = Rcpp::String(x[ii].c_ph);
if (!x[ii].c_vm.empty()) c_vm[i] = std::string(x[ii].c_vm);
if (!x[ii].r.empty()) r[i] = std::string(x[ii].r);
if (!x[ii].row_r.empty()) row_r[i] = std::string(x[ii].row_r);
if (!x[ii].c_r.empty()) c_r[i] = std::string(x[ii].c_r);
if (!x[ii].c_s.empty()) c_s[i] = std::string(x[ii].c_s);
if (!x[ii].c_t.empty()) c_t[i] = std::string(x[ii].c_t);
if (!x[ii].c_cm.empty()) c_cm[i] = std::string(x[ii].c_cm);
if (!x[ii].c_ph.empty()) c_ph[i] = Rcpp::String(x[ii].c_ph);
if (!x[ii].c_vm.empty()) c_vm[i] = std::string(x[ii].c_vm);
if (!x[ii].v.empty()) { // can only be utf8 if c_t = "str"
if (x[ii].c_t.empty() && x[ii].f_t.empty())
if (x[ii].c_t.empty() && x[ii].f_attr.empty())
v[i] = std::string(x[ii].v);
else
v[i] = Rcpp::String(x[ii].v);
}
if (!x[ii].f.empty()) f[i] = Rcpp::String(x[ii].f);
if (!x[ii].f_t.empty()) f_t[i] = std::string(x[ii].f_t);
if (!x[ii].f_ref.empty()) f_ref[i] = std::string(x[ii].f_ref);
if (!x[ii].f_ca.empty()) f_ca[i] = std::string(x[ii].f_ca);
if (!x[ii].f_si.empty()) f_si[i] = std::string(x[ii].f_si);
if (!x[ii].is.empty()) is[i] = Rcpp::String(x[ii].is);
if (!x[ii].f.empty()) f[i] = Rcpp::String(x[ii].f);
if (!x[ii].f_attr.empty()) f_attr[i] = std::string(x[ii].f_attr);
if (!x[ii].is.empty()) is[i] = Rcpp::String(x[ii].is);
}

// Assign and return a dataframe
return Rcpp::wrap(
Rcpp::DataFrame::create(
Rcpp::Named("r") = r,
Rcpp::Named("row_r") = row_r,
Rcpp::Named("c_r") = c_r,
Rcpp::Named("c_s") = c_s,
Rcpp::Named("c_t") = c_t,
Rcpp::Named("c_cm") = c_cm,
Rcpp::Named("c_ph") = c_ph,
Rcpp::Named("c_vm") = c_vm,
Rcpp::Named("v") = v,
Rcpp::Named("f") = f,
Rcpp::Named("f_t") = f_t,
Rcpp::Named("f_ref") = f_ref,
Rcpp::Named("f_ca") = f_ca,
Rcpp::Named("f_si") = f_si,
Rcpp::Named("is") = is,
Rcpp::Named("r") = r,
Rcpp::Named("row_r") = row_r,
Rcpp::Named("c_r") = c_r,
Rcpp::Named("c_s") = c_s,
Rcpp::Named("c_t") = c_t,
Rcpp::Named("c_cm") = c_cm,
Rcpp::Named("c_ph") = c_ph,
Rcpp::Named("c_vm") = c_vm,
Rcpp::Named("v") = v,
Rcpp::Named("f") = f,
Rcpp::Named("f_attr") = f_attr,
Rcpp::Named("is") = is,
Rcpp::Named("stringsAsFactors") = false
)
);
Expand Down
Loading

0 comments on commit 93fcb9f

Please sign in to comment.