From 05dc50e6c1460830405e7cb610a9e93a24b2244d Mon Sep 17 00:00:00 2001 From: Daniel Berthereau Date: Mon, 19 Feb 2024 00:00:00 +0000 Subject: [PATCH 1/4] Added an index is_public on fulltext_search. --- application/data/install/schema.sql | 1 + ...0240219000003_AddIndexFullTextIsPublic.php | 21 +++++++++++++++++++ application/src/Entity/FulltextSearch.php | 1 + 3 files changed, 23 insertions(+) create mode 100644 application/data/migrations/20240219000003_AddIndexFullTextIsPublic.php diff --git a/application/data/install/schema.sql b/application/data/install/schema.sql index 5aeaf9fe02..2db3c4c48e 100644 --- a/application/data/install/schema.sql +++ b/application/data/install/schema.sql @@ -33,6 +33,7 @@ CREATE TABLE `fulltext_search` ( `text` longtext COLLATE utf8mb4_unicode_ci, PRIMARY KEY (`id`,`resource`), KEY `IDX_AA31FE4A7E3C61F9` (`owner_id`), + KEY `is_public` (`is_public`), FULLTEXT KEY `IDX_AA31FE4A2B36786B3B8BA7C7` (`title`,`text`), CONSTRAINT `FK_AA31FE4A7E3C61F9` FOREIGN KEY (`owner_id`) REFERENCES `user` (`id`) ON DELETE SET NULL ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; diff --git a/application/data/migrations/20240219000003_AddIndexFullTextIsPublic.php b/application/data/migrations/20240219000003_AddIndexFullTextIsPublic.php new file mode 100644 index 0000000000..8f445bf46a --- /dev/null +++ b/application/data/migrations/20240219000003_AddIndexFullTextIsPublic.php @@ -0,0 +1,21 @@ +executeStatement($sql); + } catch (\Exception $e) { + // Index exists. + } + } +} diff --git a/application/src/Entity/FulltextSearch.php b/application/src/Entity/FulltextSearch.php index 469db62225..fb887eeef5 100644 --- a/application/src/Entity/FulltextSearch.php +++ b/application/src/Entity/FulltextSearch.php @@ -5,6 +5,7 @@ * @Entity * @Table( * indexes={ + * @Index(name="is_public", columns={"is_public"}), * @Index(columns={"title", "text"}, flags={"fulltext"}) * } * ) From 6ec9e2a210872ce984a7c3a141554f2398922ab8 Mon Sep 17 00:00:00 2001 From: Daniel Berthereau Date: Mon, 19 Feb 2024 00:00:00 +0000 Subject: [PATCH 2/4] Separated record and text in full text search. --- application/Module.php | 2 +- .../__CG__OmekaEntityFulltextSearch.php | 26 ++++++++++- application/data/install/schema.sql | 4 +- ...00004_SeparateRecordAndTextForFullText.php | 45 +++++++++++++++++++ .../Adapter/AbstractResourceEntityAdapter.php | 17 ++++++- .../Adapter/FulltextSearchableInterface.php | 10 ++++- application/src/Api/Adapter/ItemAdapter.php | 16 +++++++ application/src/Api/Adapter/MediaAdapter.php | 12 +++++ .../src/Api/Adapter/SitePageAdapter.php | 5 +++ application/src/Entity/FulltextSearch.php | 18 +++++++- application/src/Job/IndexFulltextSearch.php | 2 +- .../Renderer/FulltextSearchableInterface.php | 2 +- application/src/Stdlib/FulltextSearch.php | 7 +-- 13 files changed, 154 insertions(+), 12 deletions(-) create mode 100644 application/data/migrations/20240219000004_SeparateRecordAndTextForFullText.php diff --git a/application/Module.php b/application/Module.php index 13723e5042..7ef977cd53 100644 --- a/application/Module.php +++ b/application/Module.php @@ -710,7 +710,7 @@ public function searchFulltext(ZendEvent $event) } $qb = $event->getParam('queryBuilder'); - $match = 'MATCH(omeka_fulltext_search.title, omeka_fulltext_search.text) AGAINST (:omeka_fulltext_search)'; + $match = 'MATCH(omeka_fulltext_search.title, omeka_fulltext_search.record, omeka_fulltext_search.text) AGAINST (:omeka_fulltext_search)'; if ('api.search.query' === $event->getName()) { diff --git a/application/data/doctrine-proxies/__CG__OmekaEntityFulltextSearch.php b/application/data/doctrine-proxies/__CG__OmekaEntityFulltextSearch.php index 5e8bd774a0..fa289b1570 100644 --- a/application/data/doctrine-proxies/__CG__OmekaEntityFulltextSearch.php +++ b/application/data/doctrine-proxies/__CG__OmekaEntityFulltextSearch.php @@ -67,10 +67,10 @@ public function __construct(?\Closure $initializer = null, ?\Closure $cloner = n public function __sleep() { if ($this->__isInitialized__) { - return ['__isInitialized__', 'id', 'resource', 'owner', 'isPublic', 'title', 'text']; + return ['__isInitialized__', 'id', 'resource', 'owner', 'isPublic', 'title', 'record', 'text']; } - return ['__isInitialized__', 'id', 'resource', 'owner', 'isPublic', 'title', 'text']; + return ['__isInitialized__', 'id', 'resource', 'owner', 'isPublic', 'title', 'record', 'text']; } /** @@ -273,6 +273,28 @@ public function getTitle() return parent::getTitle(); } + /** + * {@inheritDoc} + */ + public function setRecord($record) + { + + $this->__initializer__ && $this->__initializer__->__invoke($this, 'setRecord', [$record]); + + return parent::setRecord($record); + } + + /** + * {@inheritDoc} + */ + public function getRecord() + { + + $this->__initializer__ && $this->__initializer__->__invoke($this, 'getRecord', []); + + return parent::getRecord(); + } + /** * {@inheritDoc} */ diff --git a/application/data/install/schema.sql b/application/data/install/schema.sql index 2db3c4c48e..6a526b317f 100644 --- a/application/data/install/schema.sql +++ b/application/data/install/schema.sql @@ -30,11 +30,13 @@ CREATE TABLE `fulltext_search` ( `owner_id` int DEFAULT NULL, `is_public` tinyint(1) NOT NULL, `title` longtext COLLATE utf8mb4_unicode_ci, + `record` longtext COLLATE utf8mb4_unicode_ci, `text` longtext COLLATE utf8mb4_unicode_ci, PRIMARY KEY (`id`,`resource`), KEY `IDX_AA31FE4A7E3C61F9` (`owner_id`), KEY `is_public` (`is_public`), - FULLTEXT KEY `IDX_AA31FE4A2B36786B3B8BA7C7` (`title`,`text`), + FULLTEXT KEY `IDX_AA31FE4A2B36786B9B349F91` (`title`,`record`), + FULLTEXT KEY `IDX_AA31FE4A3B8BA7C7` (`text`), CONSTRAINT `FK_AA31FE4A7E3C61F9` FOREIGN KEY (`owner_id`) REFERENCES `user` (`id`) ON DELETE SET NULL ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; CREATE TABLE `item` ( diff --git a/application/data/migrations/20240219000004_SeparateRecordAndTextForFullText.php b/application/data/migrations/20240219000004_SeparateRecordAndTextForFullText.php new file mode 100644 index 0000000000..c3e64df37b --- /dev/null +++ b/application/data/migrations/20240219000004_SeparateRecordAndTextForFullText.php @@ -0,0 +1,45 @@ +jobDispatcher = $jobDispatcher; + } + + public function up(Connection $conn) + { + $sql = <<<'SQL' +TRUNCATE TABLE `fulltext_search`; + +ALTER TABLE `fulltext_search` +ADD `record` longtext COLLATE 'utf8mb4_unicode_ci' NULL AFTER `title`; + +ALTER TABLE `fulltext_search` +ADD FULLTEXT `IDX_AA31FE4A2B36786B9B349F91` (`title`, `record`), +ADD FULLTEXT `IDX_AA31FE4A3B8BA7C7` (`text`), +DROP INDEX `IDX_AA31FE4A2B36786B3B8BA7C7`; + +SQL; + $conn->executeStatement($sql); + + $this->jobDispatcher->dispatch(\DerivativeMedia\Job\DerivativeItem::class); + } + + public static function create(ServiceLocatorInterface $services) + { + return new self($services->get(\Omeka\Job\Dispatcher::class)); + } +} diff --git a/application/src/Api/Adapter/AbstractResourceEntityAdapter.php b/application/src/Api/Adapter/AbstractResourceEntityAdapter.php index 12d3c26618..c9b591b3a3 100644 --- a/application/src/Api/Adapter/AbstractResourceEntityAdapter.php +++ b/application/src/Api/Adapter/AbstractResourceEntityAdapter.php @@ -715,7 +715,17 @@ public function getFulltextTitle($resource) return $resource->getTitle(); } + public function getFulltextRecord($resource) + { + return $this->getFulltext($resource, 'record'); + } + public function getFulltextText($resource) + { + return $this->getFulltext($resource, 'text'); + } + + protected function getFulltext($resource, string $type) { $services = $this->getServiceLocator(); $dataTypes = $services->get('Omeka\DataTypeManager'); @@ -723,7 +733,11 @@ public function getFulltextText($resource) $eventManager = $this->getEventManager(); $criteria = Criteria::create()->where(Criteria::expr()->eq('isPublic', true)); - $args = $eventManager->prepareArgs(['resource' => $resource, 'criteria' => $criteria]); + $args = $eventManager->prepareArgs([ + 'resource' => $resource, + 'type' => $type, + 'criteria' => $criteria, + ]); $event = new Event('api.get_fulltext_text.value_criteria', $this, $args); $eventManager->triggerEvent($event); $criteria = $args['criteria']; @@ -738,6 +752,7 @@ public function getFulltextText($resource) $valueAnnotationCriteria = Criteria::create()->where(Criteria::expr()->eq('isPublic', true)); $args = $eventManager->prepareArgs([ 'resource' => $resource, + 'type' => $type, 'value' => $value, 'criteria' => $valueAnnotationCriteria, ]); diff --git a/application/src/Api/Adapter/FulltextSearchableInterface.php b/application/src/Api/Adapter/FulltextSearchableInterface.php index ecdd450820..ddf339754b 100644 --- a/application/src/Api/Adapter/FulltextSearchableInterface.php +++ b/application/src/Api/Adapter/FulltextSearchableInterface.php @@ -28,7 +28,15 @@ public function getFulltextIsPublic($resource); public function getFulltextTitle($resource); /** - * Get the the text of the passed resource. + * Get the record of the passed resource. + * + * @param mixed $resource + * @return string + */ + public function getFulltextRecord($resource); + + /** + * Get the the raw text (transcription, ocr, etc.) of the passed resource. * * @param mixed $resource * @return string diff --git a/application/src/Api/Adapter/ItemAdapter.php b/application/src/Api/Adapter/ItemAdapter.php index d85bebd435..a9074cf0ca 100644 --- a/application/src/Api/Adapter/ItemAdapter.php +++ b/application/src/Api/Adapter/ItemAdapter.php @@ -328,6 +328,22 @@ public function preprocessBatchUpdate(array $data, Request $request) return $data; } + public function getFulltextRecord($resource) + { + $texts = []; + $texts[] = parent::getFulltextRecord($resource); + // Get media text. + $mediaAdapter = $this->getAdapter('media'); + foreach ($resource->getMedia() as $media) { + $texts[] = $mediaAdapter->getFulltextRecord($media); + } + // Remove empty texts. + $texts = array_filter($texts, function ($text) { + return !is_null($text) && $text !== ''; + }); + return implode("\n", $texts); + } + public function getFulltextText($resource) { $texts = []; diff --git a/application/src/Api/Adapter/MediaAdapter.php b/application/src/Api/Adapter/MediaAdapter.php index d16f4e5a21..9823a3226a 100644 --- a/application/src/Api/Adapter/MediaAdapter.php +++ b/application/src/Api/Adapter/MediaAdapter.php @@ -202,6 +202,18 @@ public function preprocessBatchUpdate(array $data, Request $request) return $data; } + public function getFulltextRecord($resource) + { + $renderer = $this->getServiceLocator() + ->get('Omeka\Media\Renderer\Manager') + ->get($resource->getRenderer()); + $fulltextRecord = parent::getFulltextRecord($resource); + if ($renderer instanceof FulltextSearchableInterface) { + $fulltextRecord .= ' ' . $renderer->getFulltextRecord($this->getRepresentation($resource)); + } + return $fulltextRecord; + } + public function getFulltextText($resource) { $renderer = $this->getServiceLocator() diff --git a/application/src/Api/Adapter/SitePageAdapter.php b/application/src/Api/Adapter/SitePageAdapter.php index 5a4fb6c83a..c6365e4c02 100644 --- a/application/src/Api/Adapter/SitePageAdapter.php +++ b/application/src/Api/Adapter/SitePageAdapter.php @@ -355,6 +355,11 @@ public function getFulltextTitle($resource) return $resource->getTitle(); } + public function getFulltextRecord($resource) + { + return ''; + } + public function getFulltextText($resource) { $services = $this->getServiceLocator(); diff --git a/application/src/Entity/FulltextSearch.php b/application/src/Entity/FulltextSearch.php index fb887eeef5..dc3bcfafc5 100644 --- a/application/src/Entity/FulltextSearch.php +++ b/application/src/Entity/FulltextSearch.php @@ -6,7 +6,8 @@ * @Table( * indexes={ * @Index(name="is_public", columns={"is_public"}), - * @Index(columns={"title", "text"}, flags={"fulltext"}) + * @Index(columns={"title", "record"}, flags={"fulltext"}), + * @Index(columns={"text"}, flags={"fulltext"}) * } * ) */ @@ -40,6 +41,11 @@ class FulltextSearch */ protected $title; + /** + * @Column(type="text", nullable=true) + */ + protected $record; + /** * @Column(type="text", nullable=true) */ @@ -95,6 +101,16 @@ public function getTitle() return $this->title; } + public function setRecord($record) + { + $this->record = $record; + } + + public function getRecord() + { + return $this->record; + } + public function setText($text) { $this->text = $text; diff --git a/application/src/Job/IndexFulltextSearch.php b/application/src/Job/IndexFulltextSearch.php index 8b1dd235e6..8d0b13be1b 100644 --- a/application/src/Job/IndexFulltextSearch.php +++ b/application/src/Job/IndexFulltextSearch.php @@ -21,7 +21,7 @@ public function perform() // First delete all rows from the fulltext table to clear out the // resources that don't belong. - $conn->executeStatement('DELETE FROM `fulltext_search`'); + $conn->executeStatement('TRUNCATE TABLE `fulltext_search`'); // Then iterate through all resource types and index the ones that are // fulltext searchable. Note that we don't index "resource" and "value diff --git a/application/src/Media/Renderer/FulltextSearchableInterface.php b/application/src/Media/Renderer/FulltextSearchableInterface.php index 5b0d5a10cb..003da923ce 100644 --- a/application/src/Media/Renderer/FulltextSearchableInterface.php +++ b/application/src/Media/Renderer/FulltextSearchableInterface.php @@ -8,7 +8,7 @@ interface FulltextSearchableInterface /** * Get the the text of the passed media. * - * @param Media $media + * @param MediaRepresentation $media * @return string */ public function getFulltextText(MediaRepresentation $media); diff --git a/application/src/Stdlib/FulltextSearch.php b/application/src/Stdlib/FulltextSearch.php index 8ec27a56f0..278da527f7 100644 --- a/application/src/Stdlib/FulltextSearch.php +++ b/application/src/Stdlib/FulltextSearch.php @@ -32,11 +32,11 @@ public function save(ResourceInterface $resource, AdapterInterface $adapter) $ownerId = $owner ? $owner->getId() : null; $sql = 'INSERT INTO `fulltext_search` ( - `id`, `resource`, `owner_id`, `is_public`, `title`, `text` + `id`, `resource`, `owner_id`, `is_public`, `title`, `record`, `text` ) VALUES ( - :id, :resource, :owner_id, :is_public, :title, :text + :id, :resource, :owner_id, :is_public, :title, :record, :text ) ON DUPLICATE KEY UPDATE - `owner_id` = :owner_id, `is_public` = :is_public, `title` = :title, `text` = :text'; + `owner_id` = :owner_id, `is_public` = :is_public, `title` = :title, `record` = :record, `text` = :text'; $stmt = $this->conn->prepare($sql); $stmt->bindValue('id', $resourceId, PDO::PARAM_INT); @@ -44,6 +44,7 @@ public function save(ResourceInterface $resource, AdapterInterface $adapter) $stmt->bindValue('owner_id', $ownerId, PDO::PARAM_INT); $stmt->bindValue('is_public', $adapter->getFulltextIsPublic($resource), PDO::PARAM_BOOL); $stmt->bindValue('title', $adapter->getFulltextTitle($resource), PDO::PARAM_STR); + $stmt->bindValue('record', $adapter->getFulltextRecord($resource), PDO::PARAM_STR); $stmt->bindValue('text', $adapter->getFulltextText($resource), PDO::PARAM_STR); $stmt->executeStatement(); } From e6c76e083612a9bd944f0c6cfa6fcf26645da9d9 Mon Sep 17 00:00:00 2001 From: Daniel Berthereau Date: Mon, 26 Feb 2024 00:00:00 +0000 Subject: [PATCH 3/4] Fixed migration for full text search. --- .../20240219000004_SeparateRecordAndTextForFullText.php | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/application/data/migrations/20240219000004_SeparateRecordAndTextForFullText.php b/application/data/migrations/20240219000004_SeparateRecordAndTextForFullText.php index c3e64df37b..a9d055d7ed 100644 --- a/application/data/migrations/20240219000004_SeparateRecordAndTextForFullText.php +++ b/application/data/migrations/20240219000004_SeparateRecordAndTextForFullText.php @@ -28,10 +28,14 @@ public function up(Connection $conn) ADD `record` longtext COLLATE 'utf8mb4_unicode_ci' NULL AFTER `title`; ALTER TABLE `fulltext_search` -ADD FULLTEXT `IDX_AA31FE4A2B36786B9B349F91` (`title`, `record`), -ADD FULLTEXT `IDX_AA31FE4A3B8BA7C7` (`text`), DROP INDEX `IDX_AA31FE4A2B36786B3B8BA7C7`; +ALTER TABLE `fulltext_search` +ADD FULLTEXT `IDX_AA31FE4A2B36786B9B349F91` (`title`, `record`); + +ALTER TABLE `fulltext_search` +ADD FULLTEXT `IDX_AA31FE4A3B8BA7C7` (`text`); + SQL; $conn->executeStatement($sql); From a9ac1457b886e220b5ff1a5c7984b1dbf6004048 Mon Sep 17 00:00:00 2001 From: Daniel Berthereau Date: Mon, 26 Feb 2024 00:00:00 +0000 Subject: [PATCH 4/4] Fixed search with multiple fulltext indexes. --- application/Module.php | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/application/Module.php b/application/Module.php index 7ef977cd53..e80a5b1f11 100644 --- a/application/Module.php +++ b/application/Module.php @@ -710,7 +710,8 @@ public function searchFulltext(ZendEvent $event) } $qb = $event->getParam('queryBuilder'); - $match = 'MATCH(omeka_fulltext_search.title, omeka_fulltext_search.record, omeka_fulltext_search.text) AGAINST (:omeka_fulltext_search)'; + $match = '(MATCH(omeka_fulltext_search.title, omeka_fulltext_search.record) AGAINST (:omeka_fulltext_search) > 0 OR MATCH(omeka_fulltext_search.text) AGAINST (:omeka_fulltext_search) > 0)'; + $matchOrder = '(MATCH(omeka_fulltext_search.title, omeka_fulltext_search.record) AGAINST (:omeka_fulltext_search) OR MATCH(omeka_fulltext_search.text) AGAINST (:omeka_fulltext_search))'; if ('api.search.query' === $event->getName()) { @@ -727,7 +728,7 @@ public function searchFulltext(ZendEvent $event) $qb->innerJoin('Omeka\Entity\FulltextSearch', 'omeka_fulltext_search', 'WITH', $joinConditions); // Filter out resources with no similarity. - $qb->andWhere(sprintf('%s > 0', $match)); + $qb->andWhere($match); // Set visibility constraints. $acl = $this->getServiceLocator()->get('Omeka\Acl'); @@ -754,7 +755,7 @@ public function searchFulltext(ZendEvent $event) if (isset($query['sort_by_default']) || !$qb->getDQLPart('orderBy')) { $sortOrder = 'asc' === $query['sort_order'] ? 'ASC' : 'DESC'; - $qb->orderBy($match, $sortOrder); + $qb->orderBy($matchOrder, $sortOrder); } } }