diff --git a/grobid-core/src/main/java/org/grobid/core/data/Figure.java b/grobid-core/src/main/java/org/grobid/core/data/Figure.java index 55e902076c..7e01152c32 100644 --- a/grobid-core/src/main/java/org/grobid/core/data/Figure.java +++ b/grobid-core/src/main/java/org/grobid/core/data/Figure.java @@ -341,7 +341,7 @@ public String toTEI(GrobidAnalysisConfig config, Document doc, TEIFormatter form if (config.isGenerateTeiCoordinates("figure")) { List theBoxes = null; // non graphic elements - if (getLayoutTokens() != null && getLayoutTokens().size() > 0) { + if (CollectionUtils.isNotEmpty(getLayoutTokens())) { theBoxes = BoundingBoxCalculator.calculate(getLayoutTokens()); } @@ -352,7 +352,7 @@ public String toTEI(GrobidAnalysisConfig config, Document doc, TEIFormatter form // here we bound all figure graphics in one single box (given that we can have hundred graphics // in a single figure) BoundingBox theGraphicsBox = null; - if ((graphicObjects != null) && (graphicObjects.size() > 0)) { + if (CollectionUtils.isNotEmpty(graphicObjects)) { for (GraphicObject graphicObject : graphicObjects) { if (theGraphicsBox == null) { theGraphicsBox = graphicObject.getBoundingBox(); @@ -455,6 +455,16 @@ public String toTEI(GrobidAnalysisConfig config, Document doc, TEIFormatter form figureElement.appendChild(desc); } + + if (CollectionUtils.isNotEmpty(discardedPiecesTokens)) { + for (List discardedPieceTokens : discardedPiecesTokens) { + Element note = XmlBuilderUtils.teiElement("note"); + note.addAttribute(new Attribute("type", "other")); + note.appendChild(LayoutTokensUtil.normalizeText(LayoutTokensUtil.toText(discardedPieceTokens)).trim()); + figureElement.appendChild(note); + } + } + if ((graphicObjects != null) && (graphicObjects.size() > 0)) { for (GraphicObject graphicObject : graphicObjects) { Element go = XmlBuilderUtils.teiElement("graphic"); diff --git a/grobid-core/src/main/java/org/grobid/core/data/Table.java b/grobid-core/src/main/java/org/grobid/core/data/Table.java index 7553c78139..9163b555dc 100644 --- a/grobid-core/src/main/java/org/grobid/core/data/Table.java +++ b/grobid-core/src/main/java/org/grobid/core/data/Table.java @@ -1,5 +1,6 @@ package org.grobid.core.data; +import org.apache.commons.collections4.CollectionUtils; import org.grobid.core.GrobidModels; import org.apache.commons.lang3.StringUtils; import org.grobid.core.data.table.Cell; @@ -254,8 +255,18 @@ public String toTEI(GrobidAnalysisConfig config, Document doc, TEIFormatter form tableElement.appendChild(desc); tableElement.appendChild(contentEl); - if (noteNode != null) + if (noteNode != null) { tableElement.appendChild(noteNode); + } + + if (CollectionUtils.isNotEmpty(discardedPiecesTokens)) { + for (List discardedPieceTokens : discardedPiecesTokens) { + Element note = XmlBuilderUtils.teiElement("note"); + note.addAttribute(new Attribute("type", "other")); + note.appendChild(LayoutTokensUtil.normalizeText(LayoutTokensUtil.toText(discardedPieceTokens)).trim()); + tableElement.appendChild(note); + } + } return tableElement.toXML(); } diff --git a/grobid-core/src/main/java/org/grobid/core/engines/FigureParser.java b/grobid-core/src/main/java/org/grobid/core/engines/FigureParser.java index 5ff7f386b9..39763f4f15 100644 --- a/grobid-core/src/main/java/org/grobid/core/engines/FigureParser.java +++ b/grobid-core/src/main/java/org/grobid/core/engines/FigureParser.java @@ -34,11 +34,7 @@ class FigureParser extends AbstractParser { public Figure processing(List tokenizationFigure, String featureVector) { String res; try { -//System.out.println("---------------------featureVector-----------------------"); -//System.out.println(featureVector); res = label(featureVector);; -//System.out.println("---------------------res-----------------------"); -//System.out.println(res); } catch (Exception e) { throw new GrobidException("Sequence labeling with figure model fails.", e); } diff --git a/grobid-core/src/main/java/org/grobid/core/engines/FullTextParser.java b/grobid-core/src/main/java/org/grobid/core/engines/FullTextParser.java index d75b15e603..15124b2eb0 100755 --- a/grobid-core/src/main/java/org/grobid/core/engines/FullTextParser.java +++ b/grobid-core/src/main/java/org/grobid/core/engines/FullTextParser.java @@ -268,9 +268,6 @@ else if (config.getConsolidateCitations() == 2) figure.setLabeledCaption(captionProcess.getLeft()); figure.setCaptionLayoutTokens(captionProcess.getRight()); } - if (CollectionUtils.isNotEmpty(figure.getDiscardedPiecesTokens())) { - resHeader.getDiscardedPiecesTokens().addAll(figure.getDiscardedPiecesTokens()); - } } long numberFiguresFulltextModel = Arrays.stream(bodyResults.split("\n")) @@ -324,9 +321,6 @@ else if (config.getConsolidateCitations() == 2) table.setLabeledNote(noteProcess.getLeft()); table.setNoteLayoutTokens(noteProcess.getRight()); } - if (CollectionUtils.isNotEmpty(table.getDiscardedPiecesTokens())) { - resHeader.getDiscardedPiecesTokens().addAll(table.getDiscardedPiecesTokens()); - } } equations = processEquations(bodyResults, bodyLayoutTokens.getTokenization(), doc); @@ -352,12 +346,13 @@ else if (config.getConsolidateCitations() == 2) // callout in superscript is by error labeled as a numerical reference callout) List markerTypes = null; - if (bodyResults != null){ + if (bodyResults != null) { markerTypes = postProcessCallout(bodyResults, bodyLayoutTokens); } // final combination - toTEI(doc, // document + toTEI( + doc, // document bodyResults, annexResults, // labeled data for body and annex bodyLayoutTokens, @@ -367,7 +362,8 @@ else if (config.getConsolidateCitations() == 2) tables, equations, markerTypes, - config); + config + ); return doc; } catch (GrobidException e) { throw e;