diff --git a/books/spiders/toscrape.py b/books/spiders/toscrape.py index 32960481..4a862977 100644 --- a/books/spiders/toscrape.py +++ b/books/spiders/toscrape.py @@ -20,11 +20,10 @@ def parse_book_page(self, response): item = {} product = response.css("div.product_main") item["title"] = product.css("h1 ::text").extract_first() - item["price"] = product.css("p.price_color ::text").re_first("(\d+.\d+)") - item["stock"] = product.xpath( - "normalize-space(./p[contains(@class, 'availability')])" - ).re_first("(\d+) available") item['category'] = response.xpath( "//ul[@class='breadcrumb']/li[@class='active']/preceding-sibling::li[1]/a/text()" ).extract_first() + item['description'] = response.xpath( + "//div[@id='product_description']/following-sibling::p/text()" + ).extract_first() yield item