diff --git a/src/S2/Rose/Indexer.php b/src/S2/Rose/Indexer.php index eef712e..9758b34 100644 --- a/src/S2/Rose/Indexer.php +++ b/src/S2/Rose/Indexer.php @@ -79,20 +79,23 @@ protected static function arrayFromStr(string $contents): array protected function addToIndex(ExternalId $externalId, string $title, ContentWithMetadata $content, string $keywords): void { $sentenceCollection = $content->getSentenceMap()->toSentenceCollection(); - $contentWords = $sentenceCollection->getWordsArray(); + $contentWordsArray = $sentenceCollection->getWordsArray(); - foreach ($contentWords as $i => $word) { + foreach ($contentWordsArray as $i => $word) { if ($this->storage->isExcludedWord($word)) { - unset($contentWords[$i]); + unset($contentWordsArray[$i]); } } - $this->storage->addMetadata($externalId, \count($contentWords), $content->getImageCollection()); + $titleWordsArray = self::arrayFromStr($title); + $keywordsArray = self::arrayFromStr($keywords); + + $this->storage->addMetadata($externalId, \count($titleWordsArray) + \count($contentWordsArray), $content->getImageCollection()); $this->storage->addSnippets($externalId, ...$sentenceCollection->getSnippetSources()); $this->storage->addToFulltextIndex( - $this->getStemsWithComponents(self::arrayFromStr($title)), - $this->getStemsWithComponents(self::arrayFromStr($keywords)), // TODO consider different semantics of space and comma? - $this->getStemsWithComponents($contentWords), + $this->getStemsWithComponents($titleWordsArray), + $this->getStemsWithComponents($keywordsArray), // TODO consider different semantics of space and comma? + $this->getStemsWithComponents($contentWordsArray), $externalId ); } diff --git a/tests/unit/Rose/IntegrationTest.php b/tests/unit/Rose/IntegrationTest.php index 859b5b8..16fc955 100644 --- a/tests/unit/Rose/IntegrationTest.php +++ b/tests/unit/Rose/IntegrationTest.php @@ -90,8 +90,8 @@ public function testFeatures( $this->assertEquals([ '20:id_2' => 2.5953804134970615, - '20:id_1' => 0.12778564557899275, - '10:id_1' => 0.08519043038599518, + '20:id_1' => 0.12828323517212156, + '10:id_1' => 0.08569157515491249, ], $resultSet2->getSortedRelevanceByExternalId()); $items = $resultSet2->getItems(); @@ -101,7 +101,7 @@ public function testFeatures( $this->assertEquals('url1', $items[2]->getUrl()); $this->assertEquals('Description can be used in snippets', $items[2]->getDescription()); $this->assertEquals(new \DateTime('2016-08-24 00:00:00'), $items[2]->getDate()); - $this->assertEquals(0.08519043038599518, $items[2]->getRelevance()); + $this->assertEquals(0.08569157515491249, $items[2]->getRelevance()); $this->assertEquals('I have changed the content.', $items[2]->getSnippet()); $this->assertEquals(2.5953804134970615, $items[0]->getRelevance()); @@ -112,7 +112,7 @@ public function testFeatures( $this->assertEquals([ '20:id_2' => 2.5953804134970615, - '20:id_1' => 0.12778564557899275 + '20:id_1' => 0.12828323517212156 ], $resultSet2->getSortedRelevanceByExternalId()); $this->assertEquals(3, $resultSet2->getTotalCount()); @@ -146,7 +146,7 @@ public function testFeatures( 'Тут есть тонкость - нужно проверить, как происходит экранировка в сущностях вроде + и +. Для этого нужно включить в текст само сочетание букв "plus".', $resultSet3->getItems()[0]->getSnippet() ); - $this->assertEquals(18.327969620020077, $resultSet3->getItems()[0]->getRelevance()); + $this->assertEquals(18.35150247903209, $resultSet3->getItems()[0]->getRelevance()); // Query 4 $resultSet4 = $finder->find(new Query('эпл')); @@ -171,7 +171,7 @@ public function testFeatures( 'Русский текст. Красным заголовком. АБВГ', $resultItems4[0]->getHighlightedTitle($stemmer) ); - $this->assertEquals( 38.858378912122475, $resultSet4->getItems()[0]->getRelevance()); + $this->assertEquals(38.86779205572728, $resultSet4->getItems()[0]->getRelevance()); // Query 5 $resultSet5 = $finder->find(new Query('русский')); @@ -189,7 +189,7 @@ public function testFeatures( // Query 6 $resultSet6 = $finder->find(new Query('учитель не должен')); $this->assertCount(1, $resultSet6->getItems()); - $this->assertEquals(55.06322790532708, $resultSet6->getItems()[0]->getRelevance()); + $this->assertEquals(55.0961739079439, $resultSet6->getItems()[0]->getRelevance()); // Query 7: Test empty queries $resultSet7 = $finder->find(new Query('')); @@ -266,11 +266,12 @@ public function testFeatures( $this->assertEquals('Alternative text', $img1->getAlt()); if ($readStorage instanceof PdoStorage && strpos($GLOBALS['s2_rose_test_db']['dsn'], 'sqlite') !== 0) { + $indexer->index(new Indexable('dummy', 'Dummy new', '')); $similarItems = $readStorage->getSimilar(new ExternalId('id_2', 20), false); $this->assertInstanceOf(TocEntryWithMetadata::class, $similarItems[0]['tocWithMetadata']); $this->assertEquals($right = [ 'toc_id' => '1', - 'word_count' => '13', + 'word_count' => '16', 'external_id' => 'id_1', 'instance_id' => '10', 'title' => 'Test page title',