diff --git a/books/settings.py b/books/settings.py index cec717c8..08518a2a 100644 --- a/books/settings.py +++ b/books/settings.py @@ -7,18 +7,3 @@ ROBOTSTXT_OBEY = True HTTPCACHE_ENABLED = True - -SPIDER_MIDDLEWARES = { - 'scrapy_deltafetch.DeltaFetch': 100, - 'scrapy_magicfields.MagicFieldsMiddleware': 200, -} - -DELTAFETCH_ENABLED = True - -MAGICFIELDS_ENABLED = True -MAGIC_FIELDS = { - "timestamp": "$time", - "spider": "$spider:name", - "url": "scraped from $response:url", - "domain": "$response:url,r'https?://([\w\.]+)/']", -} diff --git a/books/spiders/toscrape.py b/books/spiders/books.py similarity index 89% rename from books/spiders/toscrape.py rename to books/spiders/books.py index 4a862977..99ab4346 100644 --- a/books/spiders/toscrape.py +++ b/books/spiders/books.py @@ -2,8 +2,8 @@ import scrapy -class ToscrapeSpider(scrapy.Spider): - name = "toscrape" +class BooksSpider(scrapy.Spider): + name = "books" allowed_domains = ["books.toscrape.com"] start_urls = [ 'http://books.toscrape.com/', @@ -26,4 +26,5 @@ def parse_book_page(self, response): item['description'] = response.xpath( "//div[@id='product_description']/following-sibling::p/text()" ).extract_first() + item['price'] = response.css('p.price_color ::text').extract_first() yield item