Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MCR-3198 event handler for merging duplicate categories #2267

Open
wants to merge 19 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1026,7 +1026,7 @@ public Source resolve(String href, String base) {
}
} catch (Exception ex) {
LOGGER.info("MCRNotNullResolver caught exception: {}", ex.getLocalizedMessage());
LOGGER.debug(ex.getStackTrace());
LOGGER.debug(ex);
LOGGER.debug("MCRNotNullResolver returning empty xml");
return new JDOMSource(new Element("null"));
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package org.mycore.mods.merger;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jdom2.Element;
import org.mycore.common.events.MCREvent;
import org.mycore.common.events.MCREventHandlerBase;
import org.mycore.datamodel.metadata.MCRObject;
import org.mycore.mods.MCRMODSSorter;
import org.mycore.mods.MCRMODSWrapper;
import org.mycore.mods.classification.MCRClassMapper;

import java.util.List;

/**
* Checks for and removes redundant classifications in Mods-Documents. If a classification category and
* the classification's child category are both present in the document, the parent classification will
* be removed. The processed document will be finally be sorted using {@link MCRMODSSorter}.
*/
public class MCRCategoryMergeEventHandler extends MCREventHandlerBase {

private static final Logger LOGGER = LogManager.getLogger(MCRCategoryMergeEventHandler.class);

@Override
protected void handleObjectCreated(MCREvent evt, MCRObject obj) {
mergeCategories(obj);
}

@Override
protected void handleObjectUpdated(MCREvent evt, MCRObject obj) {
mergeCategories(obj);
}

@Override
protected void handleObjectRepaired(MCREvent evt, MCRObject obj) {
mergeCategories(obj);
}

private void mergeCategories(MCRObject obj) {
MCRMODSWrapper mcrmodsWrapper = new MCRMODSWrapper(obj);
if (mcrmodsWrapper.getMODS() == null) {
return;
}
LOGGER.info("merge redundant classification categories for {}", obj.getId());

Element filledMods = mcrmodsWrapper.getMODS();
List<Element> supportedElements = filledMods.getChildren().stream()
.filter(element -> MCRClassMapper.getCategoryID(element) != null).toList();

for (int i = 0; i < supportedElements.size(); i++) {
for (int j = i + 1; j < supportedElements.size(); j++) {

Element element1 = supportedElements.get(i);
Element element2 = supportedElements.get(j);
Element parentElement = MCRCategoryMerger.getElementWithParentCategory(element1, element2);
if (parentElement != null) {
parentElement.detach();
}
}
}

MCRMODSSorter.sort(filledMods);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,11 @@
package org.mycore.mods.merger;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Optional;

import org.jdom2.Element;
import org.mycore.common.config.MCRConfiguration2;
import org.mycore.datamodel.classifications2.MCRCategory;
import org.mycore.datamodel.classifications2.MCRCategoryDAO;
Expand Down Expand Up @@ -88,12 +91,39 @@ static boolean oneIsDescendantOfTheOther(MCRCategoryID idThis, MCRCategoryID idO
}

private static List<MCRCategory> getAncestorsAndSelf(MCRCategoryID categoryID) {
List<MCRCategory> ancestorsAndSelf = new ArrayList<>(DAO.getParents(categoryID));
List<MCRCategory> ancestorsAndSelf = new ArrayList<>(Optional.ofNullable(DAO.getParents(categoryID)).orElse(
Collections.emptyList()));
ancestorsAndSelf.remove(DAO.getRootCategory(categoryID, 0));
ancestorsAndSelf.add(DAO.getCategory(categoryID, 0));
return ancestorsAndSelf;
}

/**
* Compares two {@link Element Elements} that are assumed to be categories.
* If it is determined that one Element is a parent category of the other, return the parent, else return null.
* @param element1 first Element to compare
* @param element2 second Element to compare
* @return the parent Element or null
*/
public static Element getElementWithParentCategory(Element element1, Element element2) {
MCRCategoryID idThis = MCRClassMapper.getCategoryID(element1);
MCRCategoryID idOther = MCRClassMapper.getCategoryID(element2);
if (idThis == null || idOther == null) {
return null;
}

final String p = CONFIG_PREFIX + idThis.getRootID();
if (idThis.getRootID().equals(idOther.getRootID()) && !MCRConfiguration2.getBoolean(p).orElse(true)) {
return null;
}

if (idThis.equals(idOther) || !oneIsDescendantOfTheOther(idThis, idOther)) {
return null;
}

return getAncestorsAndSelf(idThis).containsAll(getAncestorsAndSelf(idOther)) ? element2 : element1;
}

@Override
public void mergeFrom(MCRMerger other) {
MCRCategoryMerger cmo = (MCRCategoryMerger) other;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
MCR.Metadata.Type.mods=true
MCR.Metadata.ShareAgent.mods=org.mycore.mods.MCRMODSMetadataShareAgent
MCR.EventHandler.MCRObject.040.Class=org.mycore.mods.MCRMODSLinksEventHandler
# MCR.EventHandler.MCRObject.016a.Class=org.mycore.mods.merger.MCRCategoryMergeEventHandler
MCR.MODS.NewObjectType=mods
MCR.MODS.Types=mods

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
package org.mycore.mods.merger;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.JDOMException;
import org.jdom2.input.SAXBuilder;
import org.junit.Test;
import org.mycore.common.MCRConstants;
import org.mycore.common.MCRJPATestCase;
import org.mycore.common.MCRSessionMgr;
import org.mycore.common.MCRTransactionHelper;
import org.mycore.common.content.MCRJDOMContent;
import org.mycore.datamodel.classifications2.MCRCategory;
import org.mycore.datamodel.classifications2.MCRCategoryDAO;
import org.mycore.datamodel.classifications2.MCRCategoryDAOFactory;
import org.mycore.datamodel.classifications2.utils.MCRXMLTransformer;
import org.mycore.datamodel.metadata.MCRObject;
import org.mycore.mods.MCRMODSWrapper;

import java.io.IOException;
import java.net.URISyntaxException;
import java.util.List;

import static org.junit.Assert.assertEquals;

public class MCRCategoryMergeEventHandlerTest extends MCRJPATestCase {

public static final String TEST_DIRECTORY = "MCRCategoryMergeEventHandlerTest/";

private static final Logger LOGGER = LogManager.getLogger();

public MCRCategoryDAO getDAO() {
return MCRCategoryDAOFactory.getInstance();
}

@Override
public void setUp() throws Exception {
super.setUp();
}

/**
* Tests if parent genres are succesfully removed from mods.
*/
@Test
public void testHandleObjectCreatedMultipleGenres() throws IOException, JDOMException, URISyntaxException {

MCRSessionMgr.getCurrentSession();
MCRTransactionHelper.isTransactionActive();
ClassLoader classLoader = getClass().getClassLoader();
SAXBuilder saxBuilder = new SAXBuilder();

MCRCategory category = MCRXMLTransformer
.getCategory(saxBuilder.build(classLoader.getResourceAsStream(TEST_DIRECTORY + "genre.xml")));
erodde marked this conversation as resolved.
Show resolved Hide resolved
getDAO().addCategory(null, category);

Document document = saxBuilder.build(classLoader.getResourceAsStream(TEST_DIRECTORY + "testMods.xml"));
MCRObject mcro = new MCRObject();

MCRMODSWrapper mw = new MCRMODSWrapper(mcro);
mw.setMODS(document.getRootElement().detach());
mw.setID("junit", 1);

MCRCategoryMergeEventHandler mergeEventHandler = new MCRCategoryMergeEventHandler();
mergeEventHandler.handleObjectCreated(null, mcro);
Document xml = mcro.createXML();

LOGGER.info(new MCRJDOMContent(xml).asString());

List<Element> genres = mw.getMODS().getChildren("genre", MCRConstants.MODS_NAMESPACE);
assertEquals(2, genres.size());
String url = genres.get(0).getAttribute("valueURI").getValue();
String genre = url.substring(url.indexOf('#') + 1);
assertEquals("book", genre);

url = genres.get(1).getAttribute("valueURI").getValue();
genre = url.substring(url.indexOf('#') + 1);
assertEquals("subchapter", genre);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
<?xml version="1.0" encoding="UTF-8"?>
<mycoreclass xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="MCRClassification.xsd" ID="mir_genres">
<label xml:lang="en" text="genre" description="A list of publication types" />
<label xml:lang="x-uri" text="http://www.mycore.org/classifications/mir_genres" />
<label xml:lang="de" text="Genre" description="Liste der Publikationsarten" />
<categories>
<category ID="article" counter="1">
<label xml:lang="en" text="Article / Chapter" />
<label xml:lang="x-mapping" text="diniPublType:article" />
<label xml:lang="de" text="Artikel / Aufsatz" />
<label xml:lang="x-hosts" text="journal newspaper collection festschrift proceedings standalone" />
<category ID="chapter" counter="0">
<label xml:lang="en" text="Book chapter" />
<label xml:lang="de" text="Buchkapitel" />
<label xml:lang="x-hosts" text="book collection" />
<category ID="subchapter" counter="0">
<label xml:lang="en" text="Book subchapter" />
<label xml:lang="de" text="Buchunterkapitel" />
<label xml:lang="x-hosts" text="book collection" />
</category>
</category>
<category ID="entry" counter="0">
<label xml:lang="en" text="Encyclopedia entry" />
<label xml:lang="x-hosts" text="lexicon" />
<label xml:lang="de" text="Lexikoneintrag" />
</category>
<category ID="preface" counter="0">
<label xml:lang="de" text="Vorwort / Nachwort" />
<label xml:lang="x-hosts" text="journal collection festschrift proceedings lexicon" />
<label xml:lang="en" text="Preface (foreword) / Postface" />
</category>
<category ID="speech" counter="0">
<label xml:lang="en" text="Lecture / Speech" />
<label xml:lang="de" text="Vortrag" />
<label xml:lang="x-hosts" text="proceedings standalone" />
</category>
<category ID="review" counter="0">
<label xml:lang="de" text="Rezension" />
<label xml:lang="en" text="Review" />
<label xml:lang="x-hosts" text="journal newspaper collection festschrift proceedings standalone" />
</category>
</category>
<category ID="thesis" counter="2">
<label xml:lang="en" text="Thesis" />
<label xml:lang="de" text="Hochschulschriften" />
<label xml:lang="x-hosts" text="series standalone" />
<label xml:lang="x-editor" text="false" />
<category ID="exam" counter="0">
<label xml:lang="de" text="Examensarbeit" />
<label xml:lang="en" text="Exam" />
<label xml:lang="x-hosts" text="series standalone" />
</category>
<category ID="dissertation" counter="0">
<label xml:lang="de" text="Dissertation" />
<label xml:lang="x-hosts" text="series standalone" />
<label xml:lang="en" text="Dissertation" />
</category>
<category ID="habilitation" counter="0">
<label xml:lang="de" text="Habilitation" />
<label xml:lang="x-hosts" text="series standalone" />
<label xml:lang="en" text="Habilitation" />
</category>
<category ID="diploma_thesis" counter="0">
<label xml:lang="x-hosts" text="series standalone" />
<label xml:lang="en" text="Diploma thesis" />
<label xml:lang="de" text="Diplomarbeit" />
</category>
<category ID="master_thesis" counter="0">
<label xml:lang="x-mapping" text="diniPublType:masterThesis" />
<label xml:lang="en" text="Master thesis" />
<label xml:lang="de" text="Abschlussarbeit (Master)" />
<label xml:lang="x-hosts" text="series standalone" />
</category>
<category ID="bachelor_thesis" counter="2">
<label xml:lang="x-mapping" text="diniPublType:StudyThesis" />
<label xml:lang="de" text="Abschlussarbeit (Bachelor)" />
<label xml:lang="en" text="Bachelor thesis" />
<label xml:lang="x-hosts" text="series standalone" />
</category>
<category ID="student_resarch_project" counter="0">
<label xml:lang="en" text="Student research project" />
<label xml:lang="x-hosts" text="series standalone" />
<label xml:lang="de" text="Studienarbeit" />
</category>
<category ID="magister_thesis" counter="0">
<label xml:lang="en" text="Magister thesis" />
<label xml:lang="de" text="Magisterarbeit" />
<label xml:lang="x-hosts" text="series standalone" />
</category>
</category>
<category ID="collection" counter="0">
<label xml:lang="de" text="Sammelwerk" />
<label xml:lang="x-hosts" text="series standalone" />
<label xml:lang="en" text="Collection" />
<category ID="festschrift" counter="0">
<label xml:lang="en" text="Festschrift" />
<label xml:lang="x-hosts" text="series standalone" />
<label xml:lang="de" text="Festschrift" />
</category>
<category ID="proceedings" counter="0">
<label xml:lang="en" text="Proceedings" />
<label xml:lang="de" text="Tagungsband" />
<label xml:lang="x-hosts" text="series standalone" />
</category>
<category ID="lexicon" counter="0">
<label xml:lang="x-hosts" text="series standalone" />
<label xml:lang="en" text="Lexicon" />
<label xml:lang="de" text="Lexikon" />
</category>
</category>
<category ID="report" counter="0">
<label xml:lang="x-hosts" text="standalone" />
<label xml:lang="de" text="Report" />
<label xml:lang="en" text="Report" />
<category ID="research_results" counter="0">
<label xml:lang="x-hosts" text="standalone" />
<label xml:lang="en" text="Research Results" />
<label xml:lang="de" text="Forschungsergebnisse" />
</category>
<category ID="in_house" counter="0">
<label xml:lang="x-hosts" text="standalone" />
<label xml:lang="en" text="In house" />
<label xml:lang="de" text="Hausinterne Veröffentlichung" />
</category>
<category ID="press_release" counter="0">
<label xml:lang="x-hosts" text="standalone" />
<label xml:lang="de" text="Presseerklärung" />
<label xml:lang="en" text="Press release" />
</category>
<category ID="declaration" counter="0">
<label xml:lang="x-hosts" text="standalone" />
<label xml:lang="de" text="Fachliche Stellungnahme" />
<label xml:lang="en" text="Professional declaration" />
</category>
</category>
<category ID="teaching_material" counter="0">
<label xml:lang="de" text="Lehrmaterial" />
<label xml:lang="x-hosts" text="standalone lecture" />
<label xml:lang="en" text="Teaching Resource" />
<category ID="lecture_resource" counter="0">
<label xml:lang="x-hosts" text="standalone lecture" />
<label xml:lang="de" text="Vorlesungsmaterial" />
<label xml:lang="en" text="Lecture Resource" />
</category>
<category ID="course_resources" counter="0">
<label xml:lang="x-hosts" text="standalone lecture" />
<label xml:lang="de" text="Kurs- und Seminarmaterial" />
<label xml:lang="en" text="Course Resources" />
</category>
</category>
<category ID="book" counter="0">
<label xml:lang="de" text="Buch" />
<label xml:lang="x-hosts" text="series standalone" />
<label xml:lang="en" text="Book" />
</category>
</categories>
</mycoreclass>
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<?xml version="1.0" encoding="UTF-8"?>
<mods:mods xmlns:mods="http://www.loc.gov/mods/v3" xmlns:xlink="http://www.w3.org/1999/xlink">
<mods:genre type="intern" authorityURI="http://www.mycore.org/classifications/mir_genres" valueURI="http://www.mycore.org/classifications/mir_genres#chapter"/>
<mods:genre type="intern" authorityURI="http://www.mycore.org/classifications/mir_genres" valueURI="http://www.mycore.org/classifications/mir_genres#article"/>
<mods:genre type="intern" authorityURI="http://www.mycore.org/classifications/mir_genres" valueURI="http://www.mycore.org/classifications/mir_genres#book"/>
<mods:genre type="intern" authorityURI="http://www.mycore.org/classifications/mir_genres" valueURI="http://www.mycore.org/classifications/mir_genres#subchapter"/>
<mods:titleInfo xml:lang="de" xlink:type="simple">
<mods:title>Test-Titel</mods:title>
</mods:titleInfo>
<mods:relatedItem type="series" xlink:href="host">
<mods:classification authority="sdnb" displayLabel="sdnb">000</mods:classification>
<mods:genre type="intern" authorityURI="http://www.mycore.org/classifications/mir_genres" valueURI="http://www.mycore.org/classifications/mir_genres#collection"/>
</mods:relatedItem>
<mods:language>
<mods:languageTerm authority="rfc5646" type="code">de</mods:languageTerm>
</mods:language>
<mods:accessCondition type="use and reproduction" xlink:type="simple">cc_by-nc</mods:accessCondition>
<mods:typeOfResource>text</mods:typeOfResource>
</mods:mods>
Loading