Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update search index on tag delete/name change. #772

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions application/controllers/TagsController.php
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public function editAction()
*/
public function browseAction()
{
$params = $this->_getAllParams();
$params = $this->getAllParams();

//Check to see whether it will be tags for exhibits or for items
//Default is Item
Expand Down Expand Up @@ -92,10 +92,10 @@ public function renameAjaxAction()
$csrf = new Omeka_Form_SessionCsrf;
$oldTagId = $_POST['id'];
$oldTag = $this->_helper->db->findById($oldTagId);
$oldName = $oldTag->name;
$oldName = trim($oldTag->name);
$newName = trim($_POST['value']);

$oldTag->name = $newName;
$oldTag->setPostData(array('name' => $newName, 'nameChanged' => ($oldName != $newName)));
$this->_helper->viewRenderer->setNoRender();
if ($csrf->isValid($_POST) && $oldTag->save(false)) {
$this->getResponse()->setBody($newName);
Expand Down
121 changes: 109 additions & 12 deletions application/models/Job/SearchTextIndex.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,25 +16,28 @@ class Job_SearchTextIndex extends Omeka_Job_AbstractJob
*/
public function perform()
{
// when passing specific records, do only update
// (works safely only with hundreds of records, because `args` column is TEXT type)
if (!empty($this->_options['records'])) {
return $this->_performUpdate($this->_options['records']);
}
// when passing custom SQL, do update on the found records
// (solves TEXT data type limitation when passing thousands of records to update)
if (!empty($this->_options['sql'])) {
$recordMap = $this->_getRecordMapFromSql($this->_options['sql']);
return $this->_performUpdate($recordMap);
}

// Truncate the `search_texts` table before indexing to clean out
// obsolete records.
$sql = "TRUNCATE TABLE {$this->_db->SearchText}";
$this->_db->query($sql);

foreach (get_custom_search_record_types() as $key => $value) {
$recordType = is_string($key) ? $key : $value;

if (!class_exists($recordType)) {
// The class does not exist or cannot be found.
continue;
}
$record = new $recordType;
if (!($record instanceof Omeka_Record_AbstractRecord)) {
// The class is not a valid record.
continue;
}
if (!is_callable(array($record, 'addSearchText'))) {
// The record does not implement the search mixin.

$record = $this->_getIndexedRecordByType($recordType);
if (!$record) {
continue;
}

Expand All @@ -53,9 +56,103 @@ public function perform()
Zend_Log::ERR);
}
release_object($recordObject);
usleep(5000);
}
$pageNumber++;
}
}
}
/**
* Updates index for given record_type + record_id(s).
*
* @param mixed $recordMap Map of record types and their ids, in format:
* <code>
* [
* (string) recordType => [(int) recordId, (int) recordId, ...],
* (string) recordType2 => [(int) recordId, (int) recordId, ...],
* ...
* ]
* </code>
* @return void
*/
protected function _performUpdate($recordMap)
{
foreach (get_custom_search_record_types() as $key => $value) {
$recordType = is_string($key) ? $key : $value;

if (empty($recordMap[$recordType])) {
continue;
}
$record = $this->_getIndexedRecordByType($recordType);
if (!$record) {
continue;
}

$recordTable = $record->getTable();
$recordTableAlias = $recordTable->getTableAlias();
$pageNumber = 0;
$perPpage = 100;
// Find all records by given list of ids (paginated by 100).
while ($ids = array_slice($recordMap[$recordType], $pageNumber * $perPpage, $perPpage)) {
$recordObjects = $recordTable->fetchObjects($recordTable->getSelect()->where("$recordTableAlias.id IN (?)", $ids));
foreach ($recordObjects as $recordObject) {
// Save the record object, which indexes its search text.
try {
$recordObject->save();
} catch (Omeka_Validate_Exception $e) {
_log($e, Zend_Log::ERR);
_log(sprintf('Failed to index %s #%s',
get_class($recordObject), $recordObject->id),
Zend_Log::ERR);
}
release_object($recordObject);
usleep(5000);
}
$pageNumber++;
}
}
}
/**
* Retrieves map of records to update from given SQL query.
*
* @param string $sql Select query that must specify `record_type` and `record_id` columns.
* Example1: SELECT `record_type`, `record_id` FROM `records_tags`
* Example2: SELECT 'Item' AS `record_type`, `items`.`id` AS `record_id` FROM `items`
* @return ArrayObject $recordMap Map of records, where key is record type and values is array of record ids.
* Returns empty array if nothing found.
*/
protected function _getRecordMapFromSql($sql)
{
$recordMap = new ArrayObject();
$records = $this->_db->fetchAll($sql);
foreach ($records as $record) {
if (isset($record['record_type'], $record['record_id'])) {
$recordMap[$record['record_type']][] = $record['record_id'];
}
}
return $recordMap;
}
/**
* Retrieves record that should be indexed by given record type.
*
* @param string $recordType
* @return null|Omeka_Record_AbstractRecord Returns null if record doesn't exist or doesn't implement search mixin.
*/
protected function _getIndexedRecordByType($recordType)
{
if (!class_exists($recordType)) {
// The class does not exist or cannot be found.
return null;
}
$record = new $recordType;
if (!($record instanceof Omeka_Record_AbstractRecord)) {
// The class is not a valid record.
return null;
}
if (!is_callable(array($record, 'addSearchText'))) {
// The record does not implement the search mixin.
return null;
}
return $record;
}
}
33 changes: 33 additions & 0 deletions application/models/Tag.php
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,26 @@ public function __toString()
return $this->name;
}

/**
* Executes after the record is inserted.
*/
protected function afterSave($args) {
// on tag name change/update, run SearchIndex for all record types using this tag
if (!empty($args['post']['nameChanged'])) {
$db = $this->getDb();
$sql = "
SELECT `record_type`, `record_id`
FROM `{$db->RecordsTags}`
WHERE `tag_id` = " . (int) $this->id;
// safer to pass SQL here, it's unclear how many records needs to be updated,
// and `args` column in `processes` table is only TEXT data type
Zend_Registry::get('bootstrap')->getResource('jobs')
->sendLongRunning('Job_SearchTextIndex', array(
'sql' => $sql
));
}
}

/**
* Delete handling for a tag.
*
Expand All @@ -41,9 +61,22 @@ protected function _delete()
->getTable('RecordsTags')
->findBySql('tag_id = ?', array((int) $this->id));

$reindex = array();
foreach ($taggings as $tagging) {
$reindex[$tagging->record_type][] = $tagging->record_id;
$tagging->delete();
}

if (count($reindex)) {
// TODO - problem with MySQL TEXT data type (max. 65 535 chars)
// should we change `processes`.`args` to MEDIUMTEXT or even LONGTEXT??
// TODO - it's possible others have implemented batch-delete for tags that runs in background
// then this may open large amount of parallel processes and kill CPU/memory, is it safe??
Zend_Registry::get('bootstrap')->getResource('jobs')
->sendLongRunning('Job_SearchTextIndex', array(
'records' => $reindex
));
}
}

/**
Expand Down