diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c529d5befe..ed9b0413be1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -55,6 +55,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve - In case the library contains empty entries, they are not written to disk. [#8645](https://github.com/JabRef/jabref/issues/8645) - The formatter `remove_unicode_ligatures` is now called `replace_unicode_ligatures`. [#9890](https://github.com/JabRef/jabref/pull/9890) + ### Fixed - We fixed an issue where the browser import would add ' characters before the BibTeX entry on Linux. [#9588](https://github.com/JabRef/jabref/issues/9588) diff --git a/build.gradle b/build.gradle index 34ab610c984..6877e3b17df 100644 --- a/build.gradle +++ b/build.gradle @@ -261,7 +261,6 @@ processResources { task generateSource(dependsOn: ["generateBstGrammarSource", "generateSearchGrammarSource", - "generateEndnoteSource", "generateCitaviSource"]) { group = 'JabRef' description 'Generates all necessary (Java) source files.' @@ -289,14 +288,7 @@ tasks.register("generateSearchGrammarSource", JavaExec) { args = ["-o","src-gen/main/java/org/jabref/search" , "-visitor", "-no-listener", "-package", "org.jabref.search", "$projectDir/src/main/antlr4/org/jabref/search/Search.g4"] } -task generateEndnoteSource(type: XjcTask) { - group = 'JabRef' - description = "Generates java files for the endnote importer." - schemaFile = "src/main/resources/xjc/endnote/endnote.xsd" - outputDirectory = "src-gen/main/java/" - javaPackage = "org.jabref.logic.importer.fileformat.endnote" -} task generateCitaviSource(type: XjcTask) { group = 'JabRef' diff --git a/src/main/java/org/jabref/logic/importer/fileformat/EndnoteXmlImporter.java b/src/main/java/org/jabref/logic/importer/fileformat/EndnoteXmlImporter.java index 8ba682156cc..9134f9288e6 100644 --- a/src/main/java/org/jabref/logic/importer/fileformat/EndnoteXmlImporter.java +++ b/src/main/java/org/jabref/logic/importer/fileformat/EndnoteXmlImporter.java @@ -7,61 +7,38 @@ import java.net.MalformedURLException; import java.net.URL; import java.nio.charset.StandardCharsets; +import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Locale; +import java.util.Map; import java.util.Objects; -import java.util.Optional; -import java.util.stream.Collectors; +import javax.xml.XMLConstants; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; +import javax.xml.stream.events.XMLEvent; import org.jabref.logic.importer.ImportFormatPreferences; import org.jabref.logic.importer.Importer; +import org.jabref.logic.importer.ParseException; import org.jabref.logic.importer.Parser; import org.jabref.logic.importer.ParserResult; -import org.jabref.logic.importer.fileformat.endnote.Abstract; -import org.jabref.logic.importer.fileformat.endnote.Authors; -import org.jabref.logic.importer.fileformat.endnote.Contributors; -import org.jabref.logic.importer.fileformat.endnote.Dates; -import org.jabref.logic.importer.fileformat.endnote.ElectronicResourceNum; -import org.jabref.logic.importer.fileformat.endnote.Isbn; -import org.jabref.logic.importer.fileformat.endnote.Keyword; -import org.jabref.logic.importer.fileformat.endnote.Keywords; -import org.jabref.logic.importer.fileformat.endnote.Label; -import org.jabref.logic.importer.fileformat.endnote.Notes; -import org.jabref.logic.importer.fileformat.endnote.Number; -import org.jabref.logic.importer.fileformat.endnote.Pages; -import org.jabref.logic.importer.fileformat.endnote.PdfUrls; -import org.jabref.logic.importer.fileformat.endnote.Publisher; -import org.jabref.logic.importer.fileformat.endnote.Record; -import org.jabref.logic.importer.fileformat.endnote.RefType; -import org.jabref.logic.importer.fileformat.endnote.RelatedUrls; -import org.jabref.logic.importer.fileformat.endnote.SecondaryTitle; -import org.jabref.logic.importer.fileformat.endnote.Style; -import org.jabref.logic.importer.fileformat.endnote.Title; -import org.jabref.logic.importer.fileformat.endnote.Titles; -import org.jabref.logic.importer.fileformat.endnote.Url; -import org.jabref.logic.importer.fileformat.endnote.Urls; -import org.jabref.logic.importer.fileformat.endnote.Volume; -import org.jabref.logic.importer.fileformat.endnote.Xml; -import org.jabref.logic.importer.fileformat.endnote.Year; import org.jabref.logic.util.StandardFileType; import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.KeywordList; import org.jabref.model.entry.LinkedFile; +import org.jabref.model.entry.field.Field; import org.jabref.model.entry.field.StandardField; import org.jabref.model.entry.field.UnknownField; import org.jabref.model.entry.types.EntryType; import org.jabref.model.entry.types.IEEETranEntryType; import org.jabref.model.entry.types.StandardEntryType; import org.jabref.model.strings.StringUtil; -import org.jabref.model.util.OptionalUtil; -import jakarta.xml.bind.JAXBContext; -import jakarta.xml.bind.JAXBException; -import jakarta.xml.bind.Unmarshaller; +import com.google.common.base.Joiner; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -74,12 +51,15 @@ public class EndnoteXmlImporter extends Importer implements Parser { private static final Logger LOGGER = LoggerFactory.getLogger(EndnoteXmlImporter.class); private final ImportFormatPreferences preferences; - private Unmarshaller unmarshaller; public EndnoteXmlImporter(ImportFormatPreferences preferences) { this.preferences = preferences; } + private static String join(List list, String string) { + return Joiner.on(string).join(list); + } + @Override public String getName() { return "EndNote XML"; @@ -115,47 +95,110 @@ public boolean isRecognizedFormat(BufferedReader reader) throws IOException { } @Override - public ParserResult importDatabase(BufferedReader reader) throws IOException { - Objects.requireNonNull(reader); + public ParserResult importDatabase(BufferedReader input) throws IOException { + Objects.requireNonNull(input); + + List bibItems = new ArrayList<>(); + try { - Object unmarshalledObject = unmarshallRoot(reader); - if (unmarshalledObject instanceof Xml root) { - // We have an article set, an article, a book article or a book article set - List bibEntries = root - .getRecords().getRecord() - .stream() - .map(this::parseRecord) - .collect(Collectors.toList()); - return new ParserResult(bibEntries); - } else { - return ParserResult.fromErrorMessage("File does not start with xml tag."); + XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance(); + + // prevent xxe (https://rules.sonarsource.com/java/RSPEC-2755) + xmlInputFactory.setProperty(XMLConstants.ACCESS_EXTERNAL_SCHEMA, ""); + // required for reading Unicode characters such as ö + xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, true); + + XMLStreamReader reader = xmlInputFactory.createXMLStreamReader(input); + + while (reader.hasNext()) { + reader.next(); + if (isStartXMLEvent(reader)) { + String elementName = reader.getName().getLocalPart(); + if ("record".equals(elementName)) { + parseRecord(reader, bibItems, elementName); + } + } } - } catch (JAXBException | XMLStreamException e) { + } catch (XMLStreamException e) { LOGGER.debug("could not parse document", e); return ParserResult.fromError(e); } + return new ParserResult(bibItems); } - private Object unmarshallRoot(BufferedReader reader) throws XMLStreamException, JAXBException { - initUnmarshaller(); - - XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory(); - XMLStreamReader xmlStreamReader = xmlInputFactory.createXMLStreamReader(reader); - - // Go to the root element - while (!xmlStreamReader.isStartElement()) { - xmlStreamReader.next(); + private void parseRecord(XMLStreamReader reader, List bibItems, String startElement) + throws XMLStreamException { + + Map fields = new HashMap<>(); + EntryType entryType = StandardEntryType.Article; + + KeywordList keywordList = new KeywordList(); + List linkedFiles = new ArrayList<>(); + + while (reader.hasNext()) { + reader.next(); + if (isStartXMLEvent(reader)) { + String elementName = reader.getName().getLocalPart(); + switch (elementName) { + case "ref-type" -> { + String type = reader.getAttributeValue(null, "name"); + entryType = convertRefNameToType(type); + } + case "contributors" -> { + handleAuthorList(reader, fields, elementName); + } + case "titles" -> { + handleTitles(reader, fields, elementName); + } + case "pages" -> { + parseStyleContent(reader, fields, StandardField.PAGES, elementName); + } + case "volume" -> { + parseStyleContent(reader, fields, StandardField.VOLUME, elementName); + } + case "number" -> { + parseStyleContent(reader, fields, StandardField.NUMBER, elementName); + } + case "dates" -> { + parseYear(reader, fields); + } + case "notes" -> { + parseStyleContent(reader, fields, StandardField.NOTE, elementName); + } + case "urls" -> { + handleUrlList(reader, fields, linkedFiles); + } + case "keywords" -> { + handleKeywordsList(reader, keywordList, elementName); + } + case "abstract" -> { + parseStyleContent(reader, fields, StandardField.ABSTRACT, elementName); + } + case "isbn" -> { + parseStyleContent(reader, fields, StandardField.ISBN, elementName); + } + case "electronic-resource-num" -> { + parseStyleContent(reader, fields, StandardField.DOI, elementName); + } + case "publisher" -> { + parseStyleContent(reader, fields, StandardField.PUBLISHER, elementName); + } + case "label" -> { + parseStyleContent(reader, fields, new UnknownField("endnote-label"), elementName); + } + } + } + if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) { + break; + } } - return unmarshaller.unmarshal(xmlStreamReader); - } + BibEntry entry = new BibEntry(entryType); + entry.putKeywords(keywordList, preferences.bibEntryPreferences().getKeywordSeparator()); - private void initUnmarshaller() throws JAXBException { - if (unmarshaller == null) { - // Lazy init because this is expensive - JAXBContext context = JAXBContext.newInstance("org.jabref.logic.importer.fileformat.endnote"); - unmarshaller = context.createUnmarshaller(); - } + entry.setField(fields); + entry.setFiles(linkedFiles); + bibItems.add(entry); } private static EntryType convertRefNameToType(String refName) { @@ -170,156 +213,258 @@ private static EntryType convertRefNameToType(String refName) { }; } - private BibEntry parseRecord(Record endNoteRecord) { - BibEntry entry = new BibEntry(); - - entry.setType(getType(endNoteRecord)); - Optional.ofNullable(getAuthors(endNoteRecord)) - .ifPresent(value -> entry.setField(StandardField.AUTHOR, value)); - Optional.ofNullable(endNoteRecord.getTitles()) - .map(Titles::getTitle) - .map(Title::getStyle) - .map(this::mergeStyleContents) - .ifPresent(value -> entry.setField(StandardField.TITLE, clean(value))); - Optional.ofNullable(endNoteRecord.getTitles()) - .map(Titles::getSecondaryTitle) - .map(SecondaryTitle::getStyle) - .map(Style::getContent) - .ifPresent(value -> entry.setField(StandardField.JOURNAL, clean(value))); - Optional.ofNullable(endNoteRecord.getPages()) - .map(Pages::getStyle) - .map(Style::getContent) - .ifPresent(value -> entry.setField(StandardField.PAGES, value)); - Optional.ofNullable(endNoteRecord.getNumber()) - .map(Number::getStyle) - .map(Style::getContent) - .ifPresent(value -> entry.setField(StandardField.NUMBER, value)); - Optional.ofNullable(endNoteRecord.getVolume()) - .map(Volume::getStyle) - .map(Style::getContent) - .ifPresent(value -> entry.setField(StandardField.VOLUME, value)); - Optional.ofNullable(endNoteRecord.getDates()) - .map(Dates::getYear) - .map(Year::getStyle) - .map(Style::getContent) - .ifPresent(value -> entry.setField(StandardField.YEAR, value)); - Optional.ofNullable(endNoteRecord.getNotes()) - .map(Notes::getStyle) - .map(Style::getContent) - .ifPresent(value -> entry.setField(StandardField.NOTE, value.trim())); - getUrl(endNoteRecord) - .ifPresent(value -> entry.setField(StandardField.URL, value)); - entry.putKeywords(getKeywords(endNoteRecord), preferences.bibEntryPreferences().getKeywordSeparator()); - Optional.ofNullable(endNoteRecord.getAbstract()) - .map(Abstract::getStyle) - .map(Style::getContent) - .ifPresent(value -> entry.setField(StandardField.ABSTRACT, value.trim())); - entry.setFiles(getLinkedFiles(endNoteRecord)); - Optional.ofNullable(endNoteRecord.getIsbn()) - .map(Isbn::getStyle) - .map(Style::getContent) - .ifPresent(value -> entry.setField(StandardField.ISBN, clean(value))); - Optional.ofNullable(endNoteRecord.getElectronicResourceNum()) - .map(ElectronicResourceNum::getStyle) - .map(Style::getContent) - .ifPresent(doi -> entry.setField(StandardField.DOI, doi.trim())); - Optional.ofNullable(endNoteRecord.getPublisher()) - .map(Publisher::getStyle) - .map(Style::getContent) - .ifPresent(value -> entry.setField(StandardField.PUBLISHER, value)); - Optional.ofNullable(endNoteRecord.getLabel()) - .map(Label::getStyle) - .map(Style::getContent) - .ifPresent(value -> entry.setField(new UnknownField("endnote-label"), value)); - - return entry; + private void handleAuthorList(XMLStreamReader reader, Map fields, String startElement) throws XMLStreamException { + List authorNames = new ArrayList<>(); + + while (reader.hasNext()) { + reader.next(); + if (isStartXMLEvent(reader)) { + String elementName = reader.getName().getLocalPart(); + switch (elementName) { + case "author" -> { + parseAuthor(reader, authorNames); + } + } + } + + if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) { + break; + } + } + fields.put(StandardField.AUTHOR, join(authorNames, " and ")); + } + + private void parseAuthor(XMLStreamReader reader, List authorNames) throws XMLStreamException { + + while (reader.hasNext()) { + reader.next(); + if (isStartXMLEvent(reader)) { + String elementName = reader.getName().getLocalPart(); + switch (elementName) { + case "style" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + authorNames.add(reader.getText()); + } + } + } + } + + if (isEndXMLEvent(reader) && "author".equals(reader.getName().getLocalPart())) { + break; + } + } } - private EntryType getType(Record endNoteRecord) { - return Optional.ofNullable(endNoteRecord.getRefType()) - .map(RefType::getName) - .map(EndnoteXmlImporter::convertRefNameToType) - .orElse(StandardEntryType.Article); + private void parseStyleContent(XMLStreamReader reader, Map fields, Field field, String elementName) throws XMLStreamException { + while (reader.hasNext()) { + reader.next(); + if (isStartXMLEvent(reader)) { + String tag = reader.getName().getLocalPart(); + if ("style".equals(tag)) { + reader.next(); + if (isCharacterXMLEvent(reader)) { + if ("abstract".equals(elementName) || "electronic-resource-num".equals(elementName) || "notes".equals(elementName)) { + putIfValueNotNull(fields, field, reader.getText().trim()); + } else if ("isbn".equals(elementName) || "secondary-title".equals(elementName)) { + putIfValueNotNull(fields, field, clean(reader.getText())); + } else { + putIfValueNotNull(fields, field, reader.getText()); + } + } + } + } + if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(elementName)) { + break; + } + } } - private List getLinkedFiles(Record endNoteRecord) { - Optional urls = Optional.ofNullable(endNoteRecord.getUrls()) - .map(Urls::getPdfUrls); - return OptionalUtil.toStream(urls) - .flatMap(pdfUrls -> pdfUrls.getUrl().stream()) - .flatMap(url -> OptionalUtil.toStream(getUrlValue(url))) - .map(url -> { - try { - return new LinkedFile(new URL(url), "PDF"); - } catch (MalformedURLException e) { - LOGGER.info("Unable to parse {}", url); - return null; - } - }) - .collect(Collectors.toList()); + private void parseYear(XMLStreamReader reader, Map fields) throws XMLStreamException { + while (reader.hasNext()) { + reader.next(); + if (isStartXMLEvent(reader)) { + String elementName = reader.getName().getLocalPart(); + switch (elementName) { + case "style" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + putIfValueNotNull(fields, StandardField.YEAR, reader.getText()); + } + } + } + } + + if (isEndXMLEvent(reader) && "year".equals(reader.getName().getLocalPart())) { + break; + } + } + } + + private void handleKeywordsList(XMLStreamReader reader, KeywordList keywordList, String startElement) throws XMLStreamException { + + while (reader.hasNext()) { + reader.next(); + if (isStartXMLEvent(reader)) { + String elementName = reader.getName().getLocalPart(); + switch (elementName) { + case "keyword" -> { + parseKeyword(reader, keywordList); + } + } + } + if (isEndXMLEvent(reader) && reader.getName().getLocalPart().equals(startElement)) { + break; + } + } } - private Optional getUrl(Record endNoteRecord) { - Optional urls = Optional.ofNullable(endNoteRecord.getUrls()) - .map(Urls::getRelatedUrls); - return OptionalUtil.toStream(urls) - .flatMap(url -> url.getUrl().stream()) - .flatMap(url -> OptionalUtil.toStream(getUrlValue(url))) - .findFirst(); + private void parseKeyword(XMLStreamReader reader, KeywordList keywordList) throws XMLStreamException { + + while (reader.hasNext()) { + reader.next(); + if (isStartXMLEvent(reader)) { + String elementName = reader.getName().getLocalPart(); + switch (elementName) { + case "style" -> { + reader.next(); + if (isCharacterXMLEvent(reader)) { + if (reader.getText() != null) { + keywordList.add(reader.getText()); + } + } + } + } + } + + if (isEndXMLEvent(reader) && "keyword".equals(reader.getName().getLocalPart())) { + break; + } + } } - private String mergeStyleContents(List