From f3127295f2e65816c2bc8be964b9f3e0de013ff3 Mon Sep 17 00:00:00 2001 From: Luk Puk Date: Thu, 22 Dec 2016 10:55:13 +0100 Subject: [PATCH 1/3] Update search index on tag name change. Support for search index update --- application/controllers/TagsController.php | 6 +- application/models/Job/SearchTextIndex.php | 119 +++++++++++++++++++-- application/models/Tag.php | 20 +++- 3 files changed, 130 insertions(+), 15 deletions(-) diff --git a/application/controllers/TagsController.php b/application/controllers/TagsController.php index 253cae986c..47b7c9efe6 100644 --- a/application/controllers/TagsController.php +++ b/application/controllers/TagsController.php @@ -32,7 +32,7 @@ public function editAction() */ public function browseAction() { - $params = $this->_getAllParams(); + $params = $this->getAllParams(); //Check to see whether it will be tags for exhibits or for items //Default is Item @@ -93,10 +93,10 @@ public function renameAjaxAction() $csrf = new Omeka_Form_SessionCsrf; $oldTagId = $_POST['id']; $oldTag = $this->_helper->db->findById($oldTagId); - $oldName = $oldTag->name; + $oldName = trim($oldTag->name); $newName = trim($_POST['value']); - $oldTag->name = $newName; + $oldTag->setPostData(array('name' => $newName, 'nameChanged' => ($oldName != $newName))); $this->_helper->viewRenderer->setNoRender(); if ($csrf->isValid($_POST) && $oldTag->save(false)) { $this->getResponse()->setBody($newName); diff --git a/application/models/Job/SearchTextIndex.php b/application/models/Job/SearchTextIndex.php index 0561a7b295..795a349ebb 100644 --- a/application/models/Job/SearchTextIndex.php +++ b/application/models/Job/SearchTextIndex.php @@ -16,6 +16,18 @@ class Job_SearchTextIndex extends Omeka_Job_AbstractJob */ public function perform() { + // when passing specific records, do only update + // (works safely only with hundreds of records, because `args` column is TEXT type) + if (!empty($this->_options['records'])) { + return $this->_performUpdate($this->_options['records']); + } + // when passing custom SQL, do update on the found records + // (solves TEXT data type limitation when passing thousands of records to update) + if (!empty($this->_options['sql'])) { + $recordMap = $this->_getRecordMapFromSql($this->_options['sql']); + return $this->_performUpdate($recordMap); + } + // Truncate the `search_texts` table before indexing to clean out // obsolete records. $sql = "TRUNCATE TABLE {$this->_db->SearchText}"; @@ -25,17 +37,8 @@ public function perform() $recordType = is_string($key) ? $key : $value; - if (!class_exists($recordType)) { - // The class does not exist or cannot be found. - continue; - } - $record = new $recordType; - if (!($record instanceof Omeka_Record_AbstractRecord)) { - // The class is not a valid record. - continue; - } - if (!is_callable(array($record, 'addSearchText'))) { - // The record does not implement the search mixin. + $record = $this->_getIndexedRecordByType($recordType); + if (!$record) { continue; } @@ -54,9 +57,103 @@ public function perform() Zend_Log::ERR); } release_object($recordObject); + // TODO/Question - what about short usleep(5000); here as well?? spends less than half of cpu } $pageNumber++; } } } + /** + * Updates index for given record_type + record_id(s). + * + * @param mixed $recordMap Map of record types and their ids, in format: + * + * [ + * (string) recordType => [(int) recordId, (int) recordId, ...], + * (string) recordType2 => [(int) recordId, (int) recordId, ...], + * ... + * ] + * + * @return void + */ + protected function _performUpdate($recordMap) + { + foreach (get_custom_search_record_types() as $key => $value) { + $recordType = is_string($key) ? $key : $value; + + if (empty($recordMap[$recordType])) { + continue; + } + $record = $this->_getIndexedRecordByType($recordType); + if (!$record) { + continue; + } + + $recordTable = $record->getTable(); + $recordTableAlias = $recordTable->getTableAlias(); + $pageNumber = 0; + $perPpage = 100; + // Find all record by given list of ids (paginated by 100). + while ($ids = array_slice($recordMap[$recordType], $pageNumber * $perPpage, $perPpage)) { + $recordObjects = $recordTable->fetchObjects($recordTable->getSelect()->where("$recordTableAlias.id IN (?)", $ids)); + foreach ($recordObjects as $recordObject) { + // Save the record object, which indexes its search text. + try { + $recordObject->save(); + } catch (Omeka_Validate_Exception $e) { + _log($e, Zend_Log::ERR); + _log(sprintf('Failed to index %s #%s', + get_class($recordObject), $recordObject->id), + Zend_Log::ERR); + } + release_object($recordObject); + usleep(5000); + } + $pageNumber++; + } + } + } + /** + * Retrieves map of records to update from given SQL query. + * + * @param string $sql Select query that must specify `record_type` and `record_id` columns. + * Example1: SELECT `record_type`, `record_id` FROM `records_tags` + * Example2: SELECT 'Item' AS `record_type`, `items`.`id` AS `record_id` FROM `items` + * @return ArrayObject $recordMap Map of records, where key is record type and values is array of record ids. + * Returns empty array if nothing found. + */ + protected function _getRecordMapFromSql($sql) + { + $recordMap = new ArrayObject(); + $records = $this->_db->fetchAll($sql); + foreach ($records as $record) { + if (isset($record['record_type'], $record['record_id'])) { + $recordMap[$record['record_type']][] = $record['record_id']; + } + } + return $recordMap; + } + /** + * Retrieves record that should be indexed by given record type. + * + * @param string $recordType + * @return null|Omeka_Record_AbstractRecord Returns null if record doesn't exist or doesn't implement search mixin. + */ + protected function _getIndexedRecordByType($recordType) + { + if (!class_exists($recordType)) { + // The class does not exist or cannot be found. + return null; + } + $record = new $recordType; + if (!($record instanceof Omeka_Record_AbstractRecord)) { + // The class is not a valid record. + return null; + } + if (!is_callable(array($record, 'addSearchText'))) { + // The record does not implement the search mixin. + return null; + } + return $record; + } } diff --git a/application/models/Tag.php b/application/models/Tag.php index a9251831b9..df41b9ae5f 100644 --- a/application/models/Tag.php +++ b/application/models/Tag.php @@ -28,7 +28,25 @@ class Tag extends Omeka_Record_AbstractRecord { public function __toString() { return $this->name; } - + /** + * Executes after the record is inserted. + */ + protected function afterSave($args) { + // on tag name change/update, run SearchIndex for all record types using this tag + if (!empty($args['post']['nameChanged'])) { + $db = $this->getDb(); + $sql = " + SELECT `record_type`, `record_id` + FROM `{$db->RecordsTags}` + WHERE `tag_id` = " . (int) $this->id; + // safer to pass SQL here, it's unclear how many records needs to be updated, + // and `args` column in `processes` table is only TEXT data type + Zend_Registry::get('bootstrap')->getResource('jobs') + ->sendLongRunning('Job_SearchTextIndex', array( + 'sql' => $sql + )); + } + } /** * Delete handling for a tag. * From 1cc41f6d8d968064132ddce56e708f3351f7c49b Mon Sep 17 00:00:00 2001 From: Luk Puk Date: Thu, 22 Dec 2016 11:16:26 +0100 Subject: [PATCH 2/3] Typo --- application/models/Job/SearchTextIndex.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/application/models/Job/SearchTextIndex.php b/application/models/Job/SearchTextIndex.php index 795a349ebb..d8994f92a0 100644 --- a/application/models/Job/SearchTextIndex.php +++ b/application/models/Job/SearchTextIndex.php @@ -93,7 +93,7 @@ protected function _performUpdate($recordMap) $recordTableAlias = $recordTable->getTableAlias(); $pageNumber = 0; $perPpage = 100; - // Find all record by given list of ids (paginated by 100). + // Find all records by given list of ids (paginated by 100). while ($ids = array_slice($recordMap[$recordType], $pageNumber * $perPpage, $perPpage)) { $recordObjects = $recordTable->fetchObjects($recordTable->getSelect()->where("$recordTableAlias.id IN (?)", $ids)); foreach ($recordObjects as $recordObject) { From 114141822282507b8e58c0a22747f8fb30f07042 Mon Sep 17 00:00:00 2001 From: Luk Puk Date: Wed, 5 Apr 2017 10:37:34 +0200 Subject: [PATCH 3/3] Update search index on tag delete as well --- application/models/Tag.php | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/application/models/Tag.php b/application/models/Tag.php index e8e8333b11..a5ea91ca96 100644 --- a/application/models/Tag.php +++ b/application/models/Tag.php @@ -61,9 +61,22 @@ protected function _delete() ->getTable('RecordsTags') ->findBySql('tag_id = ?', array((int) $this->id)); + $reindex = array(); foreach ($taggings as $tagging) { + $reindex[$tagging->record_type][] = $tagging->record_id; $tagging->delete(); } + + if (count($reindex)) { + // TODO - problem with MySQL TEXT data type (max. 65 535 chars) + // should we change `processes`.`args` to MEDIUMTEXT or even LONGTEXT?? + // TODO - it's possible others have implemented batch-delete for tags that runs in background + // then this may open large amount of parallel processes and kill CPU/memory, is it safe?? + Zend_Registry::get('bootstrap')->getResource('jobs') + ->sendLongRunning('Job_SearchTextIndex', array( + 'records' => $reindex + )); + } } /**