Skip to content

Commit

Permalink
Merge pull request #1 from scrapy/cleanup
Browse files Browse the repository at this point in the history
rename spider and clean it up
  • Loading branch information
redapple authored Jan 18, 2017
2 parents caa80bc + 645fe88 commit 959a7dc
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 17 deletions.
15 changes: 0 additions & 15 deletions books/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,3 @@

ROBOTSTXT_OBEY = True
HTTPCACHE_ENABLED = True

SPIDER_MIDDLEWARES = {
'scrapy_deltafetch.DeltaFetch': 100,
'scrapy_magicfields.MagicFieldsMiddleware': 200,
}

DELTAFETCH_ENABLED = True

MAGICFIELDS_ENABLED = True
MAGIC_FIELDS = {
"timestamp": "$time",
"spider": "$spider:name",
"url": "scraped from $response:url",
"domain": "$response:url,r'https?://([\w\.]+)/']",
}
5 changes: 3 additions & 2 deletions books/spiders/toscrape.py → books/spiders/books.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
import scrapy


class ToscrapeSpider(scrapy.Spider):
name = "toscrape"
class BooksSpider(scrapy.Spider):
name = "books"
allowed_domains = ["books.toscrape.com"]
start_urls = [
'http://books.toscrape.com/',
Expand All @@ -26,4 +26,5 @@ def parse_book_page(self, response):
item['description'] = response.xpath(
"//div[@id='product_description']/following-sibling::p/text()"
).extract_first()
item['price'] = response.css('p.price_color ::text').extract_first()
yield item

0 comments on commit 959a7dc

Please sign in to comment.