Skip to content

Commit

Permalink
Merge branch 'release/1.0.0' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
Ne-Lexa committed Apr 20, 2022
2 parents c6da078 + 6a9a6d5 commit 480ff12
Show file tree
Hide file tree
Showing 8 changed files with 152 additions and 22 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
matrix:
os: [ 'ubuntu-latest' ]
php: [ '8.0', '8.1' ]
symfony_version: [ '5.3.*', '5.4.*', '6.0.*' ]
symfony_version: [ '6.0.*' ]
dependency-version: [ 'prefer-lowest', 'prefer-stable' ]

runs-on: ${{ matrix.os }}
Expand Down Expand Up @@ -65,7 +65,6 @@ jobs:
run: vendor/bin/simple-phpunit install

- name: Run psalm
if: matrix.symfony_version != '5.3.*'
run: vendor/bin/psalm

- name: Run tests with phpunit
Expand Down
1 change: 1 addition & 0 deletions .php-cs-fixer.php
Original file line number Diff line number Diff line change
Expand Up @@ -1337,6 +1337,7 @@
'phpdoc_to_comment' => [
'ignored_tags' => [
'noinspection',
'psalm-suppress',
],
],

Expand Down
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ Add `nelexa/roach-php-bundle` to your composer.json file:
composer require nelexa/roach-php-bundle
```

## Versions & Dependencies
| Bundle version | roach-php/core version | Symfony version | PHP version(s) |
|----------------|------------------------|-----------------|----------------|
| 0.3.0 | 0.3.0 | ^5.3 \| ^6.0 | >= 8.0 |
| 1.0.0 | 1.0.0 | ^6.0 | >= 8.0 |

#### Register the bundle:
Register bundle into config/bundles.php (Flex did it automatically):
```php
Expand Down Expand Up @@ -58,6 +64,11 @@ php bin/console roach:php google --concurrency 8 --delay 2
```
These options override the `$concurrency` and `$requestDelay` public properties of your spider.

Add the `--output` (`-o`) option and you can save the collected data to a JSON file.
```bash
php bin/console roach:php google --output 'path/to/data.json'
```

### Starting the REPL

Roach ships with an [interactive shell](https://roach-php.dev/docs/repl) (often called Read-Evaluate-Print-Loop, or Repl for short) which makes prototyping our spiders a breeze. We can use the provided `roach:shell` command to launch a new Repl session.
Expand Down
22 changes: 13 additions & 9 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,18 @@
"type": "symfony-bundle",
"require": {
"php": ">= 8.0",
"roach-php/core": "^0.3.0",
"symfony/config": "^5.3 | ^6.0",
"symfony/dependency-injection": "^5.3 | ^6.0",
"symfony/http-kernel": "^5.3 | ^6.0",
"symfony/console": "^5.3 | ^6.0"
"roach-php/core": "^1.0",
"symfony/config": "^6.0",
"symfony/dependency-injection": "^6.0",
"symfony/http-kernel": "^6.0",
"symfony/console": "^6.0",
"symfony/serializer": "^6.0"
},
"require-dev": {
"roave/security-advisories": "dev-latest",
"symfony/phpunit-bridge": "^5.3 | ^6.0",
"symfony/var-dumper": "^5.3 | ^6.0",
"symfony/framework-bundle": "^5.3 | ^6.0",
"symfony/phpunit-bridge": "^6.0",
"symfony/var-dumper": "^6.0",
"symfony/framework-bundle": "^6.0",
"symfony/maker-bundle": "^1.37",
"vimeo/psalm": "^4.21",
"psalm/plugin-symfony": "^3.1",
Expand All @@ -37,5 +38,8 @@
"name": "Ne-Lexa",
"email": "[email protected]"
}
]
],
"suggest": {
"spatie/browsershot": "Required to execute Javascript in spiders"
}
}
42 changes: 40 additions & 2 deletions src/Command/RunSpiderCommand.php
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
use Symfony\Component\Console\Style\OutputStyle;
use Symfony\Component\Console\Style\SymfonyStyle;
use Symfony\Component\DependencyInjection\ServiceLocator;
use Symfony\Component\Serializer\Encoder\JsonEncode;
use Symfony\Component\Serializer\SerializerInterface;

final class RunSpiderCommand extends Command
{
Expand All @@ -33,7 +35,7 @@ final class RunSpiderCommand extends Command
/** @var array<class-string<\RoachPHP\Spider\SpiderInterface>, array<string>> */
private array $spiderNames;

public function __construct(private ServiceLocator $serviceLocator)
public function __construct(private ServiceLocator $serviceLocator, private SerializerInterface $serializer)
{
/** @var array<class-string<\RoachPHP\Spider\SpiderInterface>> $providedServices */
$providedServices = $this->serviceLocator->getProvidedServices();
Expand All @@ -55,6 +57,7 @@ protected function configure(): void
->addArgument('spider', InputArgument::OPTIONAL, rtrim($spiderArgDescription))
->addOption('delay', 't', InputOption::VALUE_OPTIONAL, 'The delay (in seconds) between requests.')
->addOption('concurrency', 'p', InputOption::VALUE_OPTIONAL, 'The number of concurrent requests.')
->addOption('output', 'o', InputOption::VALUE_OPTIONAL, 'Save to JSON file')
;
}

Expand Down Expand Up @@ -107,6 +110,8 @@ private function selectSpiderClassName(OutputStyle $io): string

protected function execute(InputInterface $input, OutputInterface $output): int
{
$io = new SymfonyStyle($input, $output);
$outputFilename = $input->getOption('output');
$spiderName = $input->getArgument('spider');
$spiderClassName = $this->findSpiderClass($spiderName);

Expand Down Expand Up @@ -135,7 +140,15 @@ protected function execute(InputInterface $input, OutputInterface $output): int
requestDelay: $delay,
);

Roach::startSpider($spiderClassName, $overrides);
if ($outputFilename !== null) {
$collectData = Roach::collectSpider($spiderClassName, $overrides);

if (!$this->saveCollectData($collectData, $outputFilename, $io)) {
return self::FAILURE;
}
} else {
Roach::startSpider($spiderClassName, $overrides);
}

return self::SUCCESS;
}
Expand All @@ -155,4 +168,29 @@ private function findSpiderClass(?string $spiderName): ?string

return null;
}

private function saveCollectData(array $collectData, string $outputFilename, SymfonyStyle $io): bool
{
$content = $this->serializer->serialize($collectData, 'json', [
JsonEncode::OPTIONS => \JSON_UNESCAPED_UNICODE | \JSON_PRETTY_PRINT | \JSON_UNESCAPED_LINE_TERMINATORS | \JSON_UNESCAPED_SLASHES | \JSON_THROW_ON_ERROR,
]);

$dirname = \dirname($outputFilename);

if (!is_dir($dirname) && !mkdir($dirname, 0755, true) && !is_dir($dirname)) {
$io->error(sprintf('Directory "%s" was not created', $dirname));

return false;
}

if (file_put_contents($outputFilename, $content) === false) {
$io->error(sprintf('An error occurred while saving output to file %s', $dirname));

return false;
}

$io->success(sprintf('Collected data successfully saved to file %s', $outputFilename));

return true;
}
}
37 changes: 37 additions & 0 deletions src/Normalizer/ItemNormalizer.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
<?php

declare(strict_types=1);

/*
* Copyright (c) 2022 Ne-Lexa <[email protected]>
*
* For the full copyright and license information, please view
* the LICENSE file that was distributed with this source code.
*
* @see https://github.com/Ne-Lexa/roach-php-bundle
*/

namespace Nelexa\RoachPhpBundle\Normalizer;

use RoachPHP\ItemPipeline\ItemInterface;
use Symfony\Component\Serializer\Normalizer\CacheableSupportsMethodInterface;
use Symfony\Component\Serializer\Normalizer\NormalizerInterface;

class ItemNormalizer implements NormalizerInterface, CacheableSupportsMethodInterface
{
public function hasCacheableSupportsMethod(): bool
{
return true;
}

public function supportsNormalization(mixed $data, ?string $format = null): bool
{
return $data instanceof ItemInterface;
}

public function normalize(mixed $object, ?string $format = null, array $context = []): array
{
/** @psalm-suppress all */
return $object->all();
}
}
17 changes: 17 additions & 0 deletions src/Resources/config/services.php
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,16 @@
service('event_dispatcher'),
])
;
$services->alias(\RoachPHP\Core\EngineInterface::class, \RoachPHP\Core\Engine::class);

$services
->set(\RoachPHP\Core\Runner::class)
->args([
service('service_container'),
service(\RoachPHP\Core\EngineInterface::class),
])
;
$services->alias(\RoachPHP\Core\RunnerInterface::class, \RoachPHP\Core\Runner::class);

// Downloader and downloader middlewares
$services
Expand Down Expand Up @@ -125,6 +135,7 @@
->set(\Nelexa\RoachPhpBundle\Command\RunSpiderCommand::class)
->args([
tagged_locator('roach_php.spider'),
service('serializer'),
])
->tag('console.command')
;
Expand Down Expand Up @@ -162,4 +173,10 @@
->set(\Nelexa\RoachPhpBundle\Maker\Spider\MakeItemMiddleware::class)
->tag('maker.command')
;

// normalizers
$services
->set(\Nelexa\RoachPhpBundle\Normalizer\ItemNormalizer::class)
->tag('serializer.normalizer', [])
;
};
41 changes: 32 additions & 9 deletions tests/Command/RunSpiderCommandTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,14 @@

use Symfony\Bundle\FrameworkBundle\Console\Application;
use Symfony\Bundle\FrameworkBundle\Test\KernelTestCase;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Tester\CommandTester;

/**
* @internal
*
* @medium
*/
class RunSpiderCommandTest extends KernelTestCase
final class RunSpiderCommandTest extends KernelTestCase
{
public function testExecute(): void
{
Expand All @@ -36,11 +35,7 @@ public function testExecute(): void
'spider' => 'quotes',
]);

if (method_exists($commandTester, 'assertCommandIsSuccessful')) {
$commandTester->assertCommandIsSuccessful();
} else {
static::assertSame(Command::SUCCESS, $commandTester->getStatusCode());
}
$commandTester->assertCommandIsSuccessful();
}

public function testUnknownSpider(): void
Expand All @@ -54,7 +49,35 @@ public function testUnknownSpider(): void
'spider' => 'unknown_spider',
]);

static::assertNotSame(0, $commandTester->getStatusCode());
static::assertStringContainsString('[ERROR] Unknown spider unknown_spider', $commandTester->getDisplay());
self::assertNotSame(0, $commandTester->getStatusCode());
self::assertStringContainsString('[ERROR] Unknown spider unknown_spider', $commandTester->getDisplay());
}

public function testSpiderCommandOutputToJsonFile(): void
{
$outputFilename = sys_get_temp_dir() . '/_roach-export.json';

$kernel = self::bootKernel();
$application = new Application($kernel);

$command = $application->find('roach:run');

try {
$commandTester = new CommandTester($command);
$commandTester->execute([
'spider' => 'quotes',
'--output' => $outputFilename,
]);

$commandTester->assertCommandIsSuccessful();

self::assertFileExists($outputFilename);
$json = json_decode(file_get_contents($outputFilename), false, 512, \JSON_THROW_ON_ERROR);
self::assertNotEmpty($json);
} finally {
if (is_file($outputFilename)) {
unlink($outputFilename);
}
}
}
}

0 comments on commit 480ff12

Please sign in to comment.