From 68c82bbd18dca921eec28b552379beef846850f4 Mon Sep 17 00:00:00 2001 From: Jan Marvin Garbuszus Date: Fri, 10 Jan 2025 20:29:16 +0100 Subject: [PATCH] [xlsb] add xml_order_children() function and add test --- NAMESPACE | 1 + R/RcppExports.R | 4 +++ R/pugixml.R | 25 ++++++++++++++++++ R/wb_load.R | 24 ++++++++++++++++++ man/xml_order_children.Rd | 22 ++++++++++++++++ src/RcppExports.cpp | 15 +++++++++++ src/pugi.cpp | 45 +++++++++++++++++++++++++++++++++ src/xlsb.cpp | 43 +++++++++++++++---------------- tests/testthat/test-read_xlsb.R | 13 ++++++++++ 9 files changed, 170 insertions(+), 22 deletions(-) create mode 100644 man/xml_order_children.Rd diff --git a/NAMESPACE b/NAMESPACE index 9c07aff12..538f21581 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -169,6 +169,7 @@ export(xml_attr_mod) export(xml_node) export(xml_node_create) export(xml_node_name) +export(xml_order_children) export(xml_rm_child) export(xml_value) import(R6) diff --git a/R/RcppExports.R b/R/RcppExports.R index 77566d09a..ad5e073ca 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -244,6 +244,10 @@ xml_remove_child3 <- function(node, child, level1, level2, which, pointer) { .Call(`_openxlsx2_xml_remove_child3`, node, child, level1, level2, which, pointer) } +xml_order_children1 <- function(node, child, order, pointer) { + .Call(`_openxlsx2_xml_order_children1`, node, child, order, pointer) +} + xml_si_to_txt <- function(doc) { .Call(`_openxlsx2_xml_si_to_txt`, doc) } diff --git a/R/pugixml.R b/R/pugixml.R index 57716efee..c7d6f45a4 100644 --- a/R/pugixml.R +++ b/R/pugixml.R @@ -374,3 +374,28 @@ xml_rm_child <- function(xml_node, xml_child, level, which = 0, pointer = FALSE, return(z) } + +#' order xml children in node +#' @param xml_node an xml structure +#' @param level the xml root +#' @param order the wanted order as numeric +#' @param pointer pointer +#' @param ... additional arguments passed to `read_xml()` +#' @export +xml_order_children <- function(xml_node, level, order, pointer = FALSE, ...) { + + if (missing(xml_node)) + stop("need xml_node") + + if (missing(level)) + stop("need level") + + if (missing(order)) + stop("need order") + + if (!inherits(xml_node, "pugi_xml")) xml_node <- read_xml(xml_node, ...) + assert_class(level, "character") + + xml_order_children1(node = xml_node, child = level, order = order, pointer = pointer) + +} diff --git a/R/wb_load.R b/R/wb_load.R index ee346be9b..69d440a10 100644 --- a/R/wb_load.R +++ b/R/wb_load.R @@ -1538,6 +1538,30 @@ wb_load <- function( # correct sheet references and replace our replacement with it. if (!data_only && length(workbookBIN)) { + # we need to update the order of customSheetView children. Incorrect orders + # causes spreadsheet software to be unable to load and recover the file. + for (sheet in seq_along(wb$worksheets)) { + if (length(wb$worksheets[[sheet]]$customSheetViews) == 0) next + cvs <- xml_node(wb$worksheets[[sheet]]$customSheetViews, "customSheetViews", "customSheetView") + + for (i in seq_along(cvs)) { + exp_nams <- c("pane", "selection", "rowBreaks", "colBreaks", "pageMargins", "printOptions", "pageSetup", "headerFooter", "autoFilter", "extLst") + cv_nms <- xml_node_name(cvs[i], "customSheetView") + + ordr <- match(exp_nams, cv_nms) + ordr <- ordr[!is.na(ordr)] - 1L + ordr <- ordr + + cvs[i] <- xml_order_children(xml_node = cvs[i], level = "customSheetView", order = ordr, pointer = FALSE) + + # headerFooter cause issues. they are (a) not added to the correct node + # and (b) brick the entire XML structure + if ("headerFooter" %in% cv_nms) cvs[i] <- xml_rm_child(cvs[i], "headerFooter") + } + + wb$worksheets[[sheet]]$customSheetViews <- xml_node_create("customSheetViews", xml_children = cvs) + } + if (length(wb$workbook$xti)) { # create data frame containing sheet names for Xti entries xti <- rbindlist(xml_attr(wb$workbook$xti, "xti")) diff --git a/man/xml_order_children.Rd b/man/xml_order_children.Rd new file mode 100644 index 000000000..1e4d53d2f --- /dev/null +++ b/man/xml_order_children.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pugixml.R +\name{xml_order_children} +\alias{xml_order_children} +\title{order xml children in node} +\usage{ +xml_order_children(xml_node, level, order, pointer = FALSE, ...) +} +\arguments{ +\item{xml_node}{an xml structure} + +\item{level}{the xml root} + +\item{order}{the wanted order as numeric} + +\item{pointer}{pointer} + +\item{...}{additional arguments passed to \code{read_xml()}} +} +\description{ +order xml children in node +} diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 87e042c7f..9e43f53ef 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -583,6 +583,20 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// xml_order_children1 +SEXP xml_order_children1(XPtrXML node, std::string child, const std::vector& order, bool pointer); +RcppExport SEXP _openxlsx2_xml_order_children1(SEXP nodeSEXP, SEXP childSEXP, SEXP orderSEXP, SEXP pointerSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< XPtrXML >::type node(nodeSEXP); + Rcpp::traits::input_parameter< std::string >::type child(childSEXP); + Rcpp::traits::input_parameter< const std::vector& >::type order(orderSEXP); + Rcpp::traits::input_parameter< bool >::type pointer(pointerSEXP); + rcpp_result_gen = Rcpp::wrap(xml_order_children1(node, child, order, pointer)); + return rcpp_result_gen; +END_RCPP +} // xml_si_to_txt SEXP xml_si_to_txt(XPtrXML doc); RcppExport SEXP _openxlsx2_xml_si_to_txt(SEXP docSEXP) { @@ -1015,6 +1029,7 @@ static const R_CallMethodDef CallEntries[] = { {"_openxlsx2_xml_remove_child1", (DL_FUNC) &_openxlsx2_xml_remove_child1, 4}, {"_openxlsx2_xml_remove_child2", (DL_FUNC) &_openxlsx2_xml_remove_child2, 5}, {"_openxlsx2_xml_remove_child3", (DL_FUNC) &_openxlsx2_xml_remove_child3, 6}, + {"_openxlsx2_xml_order_children1", (DL_FUNC) &_openxlsx2_xml_order_children1, 4}, {"_openxlsx2_xml_si_to_txt", (DL_FUNC) &_openxlsx2_xml_si_to_txt, 1}, {"_openxlsx2_is_to_txt", (DL_FUNC) &_openxlsx2_is_to_txt, 1}, {"_openxlsx2_si_to_txt", (DL_FUNC) &_openxlsx2_si_to_txt, 1}, diff --git a/src/pugi.cpp b/src/pugi.cpp index e321fbf40..5135a395a 100644 --- a/src/pugi.cpp +++ b/src/pugi.cpp @@ -750,3 +750,48 @@ SEXP xml_remove_child3(XPtrXML node, std::string child, std::string level1, std: return Rcpp::wrap(Rcpp::String(oss.str())); } } + +// [[Rcpp::export]] +SEXP xml_order_children1(XPtrXML node, std::string child, const std::vector& order, bool pointer) { + + uint32_t pugi_format_flags = pugi_format(node); + + pugi::xml_node root = node->child(child.c_str()); + if (!root) { + Rcpp::stop("Root node not found."); + } + + std::vector children; + for (pugi::xml_node child : root.children()) { + if (child.type() == pugi::node_element) { + children.push_back(child); + } + } + + if (order.size() != children.size()) { + Rcpp::stop("Order size (%d) does not match the number of children (%d).", + order.size(), children.size()); + } + + std::vector reordered_children(order.size()); + for (size_t i = 0; i < order.size(); ++i) { + if (order[i] < 0 || static_cast(order[i]) >= children.size()) { + Rcpp::stop("Invalid order index: %d", order[i]); + } + reordered_children[i] = children[order[i]]; + } + + root.remove_children(); + + for (const auto& child : reordered_children) { + root.append_copy(child); + } + + if (pointer) { + return node; + } else { + std::ostringstream oss; + root.print(oss, " ", pugi_format_flags); + return Rcpp::wrap(Rcpp::String(oss.str())); + } +} diff --git a/src/xlsb.cpp b/src/xlsb.cpp index 72ef2cbc9..72232c71c 100644 --- a/src/xlsb.cpp +++ b/src/xlsb.cpp @@ -2524,15 +2524,14 @@ int32_t worksheet_bin(std::string filePath, bool chartsheet, std::string outPath stHeaderEven << ": " << stFooterEven << ": " << stHeaderFirst << ": " << stFooterFirst << std::endl; - out << "" << - "" << stHeader <<"" << - "" << stFooter <<"" << - "" << stHeaderFirst <<"" << - "" << stFooterFirst <<"" << - "" << stHeaderEven <<"" << - "" << stHeaderEven <<"" << - // "" << <<"" << - "" << std::endl; + out << "" << std::endl; + if (!stHeader.empty()) out << "" << stHeader <<"" << std::endl; + if (!stFooter.empty()) out << "" << stFooter <<"" << std::endl; + if (!stHeaderEven.empty()) out << "" << stHeaderEven <<"" << std::endl; + if (!stFooterEven.empty()) out << "" << stFooterEven <<"" << std::endl; + if (!stHeaderFirst.empty()) out << "" << stHeaderFirst <<"" << std::endl; + if (!stFooterFirst.empty()) out << "" << stFooterFirst <<"" << std::endl; + out << "" << std::endl; break; } @@ -3736,19 +3735,19 @@ int32_t worksheet_bin(std::string filePath, bool chartsheet, std::string outPath out << ">" << std::endl; - // // order matters for - // out << "fHorizontal) - // out << " horizontalCentered = \"" << (int16_t)fields->fHorizontal << "\""; - // if (fields->fVertical) - // out << " verticalCentered = \"" << (int16_t)fields->fVertical << "\""; - // if (fields->fPrintRwCol) - // out << " headings = \"" << (int16_t)fields->fPrintRwCol << "\""; - // if (fields->fDspGridSv) - // out << " gridLines = \"" << (int16_t)fields->fDspGridSv << "\""; - // if (!fields->fPrintGrid) - // out << " gridLinesSet = \"" << (int16_t)fields->fPrintGrid << "\""; - // out << " />" << std::endl; + // order matters for + out << "fHorizontal) + out << " horizontalCentered = \"" << (int16_t)fields->fHorizontal << "\""; + if (fields->fVertical) + out << " verticalCentered = \"" << (int16_t)fields->fVertical << "\""; + if (fields->fPrintRwCol) + out << " headings = \"" << (int16_t)fields->fPrintRwCol << "\""; + if (fields->fDspGridSv) + out << " gridLines = \"" << (int16_t)fields->fDspGridSv << "\""; + if (!fields->fPrintGrid) + out << " gridLinesSet = \"" << (int16_t)fields->fPrintGrid << "\""; + out << " />" << std::endl; break; } diff --git a/tests/testthat/test-read_xlsb.R b/tests/testthat/test-read_xlsb.R index e1ea68573..9d2e7e190 100644 --- a/tests/testthat/test-read_xlsb.R +++ b/tests/testthat/test-read_xlsb.R @@ -163,3 +163,16 @@ test_that("shared formulas are detected correctly", { ) }) + +test_that("loading custom sheet view in xlsb files works", { + + skip_online_checks() + + fl <- testfile_path("custom_sheet_view.xlsb") + + wb <- wb_load(fl) + + exp <- "" + got <- wb$worksheets[[1]]$customSheetViews + expect_equal(exp, got) +})