Skip to content

Commit

Permalink
[xlsb] add xml_order_children() function and add test
Browse files Browse the repository at this point in the history
  • Loading branch information
JanMarvin committed Jan 10, 2025
1 parent 16863e3 commit 68c82bb
Show file tree
Hide file tree
Showing 9 changed files with 170 additions and 22 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ export(xml_attr_mod)
export(xml_node)
export(xml_node_create)
export(xml_node_name)
export(xml_order_children)
export(xml_rm_child)
export(xml_value)
import(R6)
Expand Down
4 changes: 4 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,10 @@ xml_remove_child3 <- function(node, child, level1, level2, which, pointer) {
.Call(`_openxlsx2_xml_remove_child3`, node, child, level1, level2, which, pointer)
}

xml_order_children1 <- function(node, child, order, pointer) {
.Call(`_openxlsx2_xml_order_children1`, node, child, order, pointer)
}

xml_si_to_txt <- function(doc) {
.Call(`_openxlsx2_xml_si_to_txt`, doc)
}
Expand Down
25 changes: 25 additions & 0 deletions R/pugixml.R
Original file line number Diff line number Diff line change
Expand Up @@ -374,3 +374,28 @@ xml_rm_child <- function(xml_node, xml_child, level, which = 0, pointer = FALSE,

return(z)
}

#' order xml children in node
#' @param xml_node an xml structure
#' @param level the xml root
#' @param order the wanted order as numeric
#' @param pointer pointer
#' @param ... additional arguments passed to `read_xml()`
#' @export
xml_order_children <- function(xml_node, level, order, pointer = FALSE, ...) {

if (missing(xml_node))
stop("need xml_node")

if (missing(level))
stop("need level")

if (missing(order))
stop("need order")

if (!inherits(xml_node, "pugi_xml")) xml_node <- read_xml(xml_node, ...)
assert_class(level, "character")

xml_order_children1(node = xml_node, child = level, order = order, pointer = pointer)

}
24 changes: 24 additions & 0 deletions R/wb_load.R
Original file line number Diff line number Diff line change
Expand Up @@ -1538,6 +1538,30 @@ wb_load <- function(
# correct sheet references and replace our replacement with it.
if (!data_only && length(workbookBIN)) {

# we need to update the order of customSheetView children. Incorrect orders
# causes spreadsheet software to be unable to load and recover the file.
for (sheet in seq_along(wb$worksheets)) {
if (length(wb$worksheets[[sheet]]$customSheetViews) == 0) next
cvs <- xml_node(wb$worksheets[[sheet]]$customSheetViews, "customSheetViews", "customSheetView")

for (i in seq_along(cvs)) {
exp_nams <- c("pane", "selection", "rowBreaks", "colBreaks", "pageMargins", "printOptions", "pageSetup", "headerFooter", "autoFilter", "extLst")
cv_nms <- xml_node_name(cvs[i], "customSheetView")

ordr <- match(exp_nams, cv_nms)
ordr <- ordr[!is.na(ordr)] - 1L
ordr <- ordr

cvs[i] <- xml_order_children(xml_node = cvs[i], level = "customSheetView", order = ordr, pointer = FALSE)

# headerFooter cause issues. they are (a) not added to the correct node
# and (b) brick the entire XML structure
if ("headerFooter" %in% cv_nms) cvs[i] <- xml_rm_child(cvs[i], "headerFooter")
}

wb$worksheets[[sheet]]$customSheetViews <- xml_node_create("customSheetViews", xml_children = cvs)
}

if (length(wb$workbook$xti)) {
# create data frame containing sheet names for Xti entries
xti <- rbindlist(xml_attr(wb$workbook$xti, "xti"))
Expand Down
22 changes: 22 additions & 0 deletions man/xml_order_children.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,20 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// xml_order_children1
SEXP xml_order_children1(XPtrXML node, std::string child, const std::vector<int>& order, bool pointer);
RcppExport SEXP _openxlsx2_xml_order_children1(SEXP nodeSEXP, SEXP childSEXP, SEXP orderSEXP, SEXP pointerSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< XPtrXML >::type node(nodeSEXP);
Rcpp::traits::input_parameter< std::string >::type child(childSEXP);
Rcpp::traits::input_parameter< const std::vector<int>& >::type order(orderSEXP);
Rcpp::traits::input_parameter< bool >::type pointer(pointerSEXP);
rcpp_result_gen = Rcpp::wrap(xml_order_children1(node, child, order, pointer));
return rcpp_result_gen;
END_RCPP
}
// xml_si_to_txt
SEXP xml_si_to_txt(XPtrXML doc);
RcppExport SEXP _openxlsx2_xml_si_to_txt(SEXP docSEXP) {
Expand Down Expand Up @@ -1015,6 +1029,7 @@ static const R_CallMethodDef CallEntries[] = {
{"_openxlsx2_xml_remove_child1", (DL_FUNC) &_openxlsx2_xml_remove_child1, 4},
{"_openxlsx2_xml_remove_child2", (DL_FUNC) &_openxlsx2_xml_remove_child2, 5},
{"_openxlsx2_xml_remove_child3", (DL_FUNC) &_openxlsx2_xml_remove_child3, 6},
{"_openxlsx2_xml_order_children1", (DL_FUNC) &_openxlsx2_xml_order_children1, 4},
{"_openxlsx2_xml_si_to_txt", (DL_FUNC) &_openxlsx2_xml_si_to_txt, 1},
{"_openxlsx2_is_to_txt", (DL_FUNC) &_openxlsx2_is_to_txt, 1},
{"_openxlsx2_si_to_txt", (DL_FUNC) &_openxlsx2_si_to_txt, 1},
Expand Down
45 changes: 45 additions & 0 deletions src/pugi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -750,3 +750,48 @@ SEXP xml_remove_child3(XPtrXML node, std::string child, std::string level1, std:
return Rcpp::wrap(Rcpp::String(oss.str()));
}
}

// [[Rcpp::export]]
SEXP xml_order_children1(XPtrXML node, std::string child, const std::vector<int>& order, bool pointer) {

uint32_t pugi_format_flags = pugi_format(node);

pugi::xml_node root = node->child(child.c_str());
if (!root) {
Rcpp::stop("Root node <root> not found.");
}

std::vector<pugi::xml_node> children;
for (pugi::xml_node child : root.children()) {
if (child.type() == pugi::node_element) {
children.push_back(child);
}
}

if (order.size() != children.size()) {
Rcpp::stop("Order size (%d) does not match the number of children (%d).",
order.size(), children.size());
}

std::vector<pugi::xml_node> reordered_children(order.size());
for (size_t i = 0; i < order.size(); ++i) {
if (order[i] < 0 || static_cast<size_t>(order[i]) >= children.size()) {
Rcpp::stop("Invalid order index: %d", order[i]);
}
reordered_children[i] = children[order[i]];
}

root.remove_children();

for (const auto& child : reordered_children) {
root.append_copy(child);
}

if (pointer) {
return node;
} else {
std::ostringstream oss;
root.print(oss, " ", pugi_format_flags);
return Rcpp::wrap(Rcpp::String(oss.str()));
}
}
43 changes: 21 additions & 22 deletions src/xlsb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2524,15 +2524,14 @@ int32_t worksheet_bin(std::string filePath, bool chartsheet, std::string outPath
stHeaderEven << ": " << stFooterEven << ": " <<
stHeaderFirst << ": " << stFooterFirst << std::endl;

out << "<headerFooter>" <<
"<oddHeader>" << stHeader <<"</oddHeader>" <<
"<oddFooter>" << stFooter <<"</oddFooter>" <<
"<firstHeader>" << stHeaderFirst <<"</firstHeader>" <<
"<firstFooter>" << stFooterFirst <<"</firstFooter>" <<
"<evenHeader>" << stHeaderEven <<"</evenHeader>" <<
"<evenFooter>" << stHeaderEven <<"</evenFooter>" <<
// "<drawingHF>" << <<"</drawingHF>" <<
"</headerFooter>" << std::endl;
out << "<headerFooter differentOddEven=\"1\" differentFirst=\"0\" scaleWithDoc=\"0\" alignWithMargins=\"0\">" << std::endl;
if (!stHeader.empty()) out << "<oddHeader>" << stHeader <<"</oddHeader>" << std::endl;
if (!stFooter.empty()) out << "<oddFooter>" << stFooter <<"</oddFooter>" << std::endl;
if (!stHeaderEven.empty()) out << "<evenHeader>" << stHeaderEven <<"</evenHeader>" << std::endl;
if (!stFooterEven.empty()) out << "<evenFooter>" << stFooterEven <<"</evenFooter>" << std::endl;
if (!stHeaderFirst.empty()) out << "<firstHeader>" << stHeaderFirst <<"</firstHeader>" << std::endl;
if (!stFooterFirst.empty()) out << "<firstFooter>" << stFooterFirst <<"</firstFooter>" << std::endl;
out << "</headerFooter>" << std::endl;

break;
}
Expand Down Expand Up @@ -3736,19 +3735,19 @@ int32_t worksheet_bin(std::string filePath, bool chartsheet, std::string outPath

out << ">" << std::endl;

// // order matters for <customSheetViews/>
// out << "<printOptions" << std::endl;
// if (fields->fHorizontal)
// out << " horizontalCentered = \"" << (int16_t)fields->fHorizontal << "\"";
// if (fields->fVertical)
// out << " verticalCentered = \"" << (int16_t)fields->fVertical << "\"";
// if (fields->fPrintRwCol)
// out << " headings = \"" << (int16_t)fields->fPrintRwCol << "\"";
// if (fields->fDspGridSv)
// out << " gridLines = \"" << (int16_t)fields->fDspGridSv << "\"";
// if (!fields->fPrintGrid)
// out << " gridLinesSet = \"" << (int16_t)fields->fPrintGrid << "\"";
// out << " />" << std::endl;
// order matters for <customSheetViews/>
out << "<printOptions" << std::endl;
if (fields->fHorizontal)
out << " horizontalCentered = \"" << (int16_t)fields->fHorizontal << "\"";
if (fields->fVertical)
out << " verticalCentered = \"" << (int16_t)fields->fVertical << "\"";
if (fields->fPrintRwCol)
out << " headings = \"" << (int16_t)fields->fPrintRwCol << "\"";
if (fields->fDspGridSv)
out << " gridLines = \"" << (int16_t)fields->fDspGridSv << "\"";
if (!fields->fPrintGrid)
out << " gridLinesSet = \"" << (int16_t)fields->fPrintGrid << "\"";
out << " />" << std::endl;

break;
}
Expand Down
13 changes: 13 additions & 0 deletions tests/testthat/test-read_xlsb.R
Original file line number Diff line number Diff line change
Expand Up @@ -163,3 +163,16 @@ test_that("shared formulas are detected correctly", {
)

})

test_that("loading custom sheet view in xlsb files works", {

skip_online_checks()

fl <- testfile_path("custom_sheet_view.xlsb")

wb <- wb_load(fl)

exp <- "<customSheetViews><customSheetView guid=\"{101E93B2-5AEC-EA4B-A037-FE6837BBF571}\" view=\"pageLayout\"><selection pane=\"topLeft\" activeCell=\"E23\" sqref=\"E23\"/><pageMargins left=\"0.7\" right=\"0.7\" top=\"0.75\" bottom=\"0.75\" header=\"0.3\" footer=\"0.3\"/><printOptions gridLines=\"1\"/><pageSetup copies=\"1\" firstPageNumber=\"1\" fitToHeight=\"1\" fitToWidth=\"1\" horizontalDpi=\"300\" paperSize=\"9\" scale=\"100\" verticalDpi=\"300\"/></customSheetView><customSheetView guid=\"{29911425-4959-8744-A301-8A3516CFBBC5}\" view=\"pageLayout\"><selection pane=\"topLeft\" activeCell=\"A1\" sqref=\"A1\"/><pageMargins left=\"0.7\" right=\"0.7\" top=\"0.75\" bottom=\"0.75\" header=\"0.3\" footer=\"0.3\"/><printOptions gridLines=\"1\"/><pageSetup copies=\"1\" firstPageNumber=\"1\" fitToHeight=\"1\" fitToWidth=\"1\" horizontalDpi=\"300\" paperSize=\"9\" scale=\"100\" verticalDpi=\"300\"/></customSheetView><customSheetView guid=\"{E70FF13A-F852-294B-B121-766AE66F850F}\"><selection pane=\"topLeft\" activeCell=\"A1\" sqref=\"A1\"/><pageMargins left=\"0.7\" right=\"0.7\" top=\"0.75\" bottom=\"0.75\" header=\"0.3\" footer=\"0.3\"/><printOptions gridLines=\"1\"/><pageSetup copies=\"1\" firstPageNumber=\"1\" fitToHeight=\"1\" fitToWidth=\"1\" horizontalDpi=\"300\" paperSize=\"9\" scale=\"100\" verticalDpi=\"300\"/></customSheetView><customSheetView guid=\"{F233A099-1241-ED48-AE78-14CEAAA87C95}\" hiddenColumns=\"1\"><selection pane=\"topLeft\" activeCell=\"F1\" sqref=\"F1:K1048576\"/><pageMargins left=\"0.7\" right=\"0.7\" top=\"0.75\" bottom=\"0.75\" header=\"0.3\" footer=\"0.3\"/><printOptions gridLines=\"1\"/><pageSetup copies=\"1\" firstPageNumber=\"1\" fitToHeight=\"1\" fitToWidth=\"1\" horizontalDpi=\"300\" paperSize=\"9\" scale=\"100\" verticalDpi=\"300\"/></customSheetView><customSheetView guid=\"{16591214-8D55-8A47-82DC-2F7E1C7FCAA3}\" filter=\"1\" showAutoFilter=\"1\" hiddenColumns=\"1\"><selection pane=\"topLeft\" activeCell=\"A1\" sqref=\"A1\"/><pageMargins left=\"0.7\" right=\"0.7\" top=\"0.75\" bottom=\"0.75\" header=\"0.3\" footer=\"0.3\"/><printOptions gridLines=\"1\"/><pageSetup copies=\"1\" firstPageNumber=\"1\" fitToHeight=\"1\" fitToWidth=\"1\" horizontalDpi=\"300\" paperSize=\"9\" scale=\"100\" verticalDpi=\"300\"/><autoFilter ref=\"A1:K33\"><filterColumn colId=\"0\"><customFilters and=\"1\"><customFilter operator=\"greaterThan\" val=\"15\"/></customFilters></filterColumn></autoFilter></customSheetView><customSheetView guid=\"{AB8C9049-5EEE-4D41-AFBB-D05935ABA15C}\" filter=\"1\" showAutoFilter=\"1\" hiddenColumns=\"1\"><selection pane=\"topLeft\" activeCell=\"A1\" sqref=\"A1\"/><pageMargins left=\"0.7\" right=\"0.7\" top=\"0.75\" bottom=\"0.75\" header=\"0.3\" footer=\"0.3\"/><printOptions gridLines=\"1\"/><pageSetup copies=\"1\" firstPageNumber=\"1\" fitToHeight=\"1\" fitToWidth=\"1\" horizontalDpi=\"300\" paperSize=\"9\" scale=\"100\" verticalDpi=\"300\"/><autoFilter ref=\"A1:K33\"><filterColumn colId=\"1\"><filters blank=\"0\"><filter val=\"4\"/></filters></filterColumn></autoFilter></customSheetView></customSheetViews>"
got <- wb$worksheets[[1]]$customSheetViews
expect_equal(exp, got)
})

0 comments on commit 68c82bb

Please sign in to comment.