clean commit

Shah13079 · Mar 8, 2023 · 598a6ea · 598a6ea
commit 598a6ea
Show file tree

Hide file tree

Showing 15 changed files with 422 additions and 0 deletions.
diff --git a/RedFinCounties.csv b/RedFinCounties.csv
@@ -0,0 +1,36 @@
+County,State,For Sale,Sold in 1mo,Sold in 3mo
+Hampshire County,WV,,,
+Mineral County,CO,,,
+Pinal County,AZ,,,
+Teller County,CO,,,
+Jackson,NC,,,
+Park,CO,,,
+Maricopa County,AZ,,,
+Coconino County,AZ,,,
+Costilla County,CO,,,
+Deschutes County,OR,,,
+Fremont County,CO,,,
+Jackson County,GA,,,
+Las Animas,CO,,,
+Saguache County,CO,,,
+White,GA,,,
+Tyler,TX,,,
+Yakima County,WA,,,
+Douglas County,WA,,,
+Archuleta County,CO,,,
+Haywood,NC,,,
+Yavapai County,AZ,,,
+Hidalgo County,TX,,,
+Duchesne County,UT,,,
+Hardin,TX,,,
+Chambers,TX,,,
+San Bernardino,CA,,,
+Mitchell County,NC,,,
+Burnet County,TX,,,
+Rutherford,NC,,,
+Jefferson,TX,,,
+Apache County,AZ,,,
+Ferry County,WA,,,
+Blanco County,TX,,,
+Orange,TX,,,
+Conejos County,CO,,,
diff --git a/redfin_sales/__init__.py b/redfin_sales/__init__.py
diff --git a/redfin_sales/__pycache__/__init__.cpython-39.pyc b/redfin_sales/__pycache__/__init__.cpython-39.pyc
diff --git a/redfin_sales/__pycache__/settings.cpython-39.pyc b/redfin_sales/__pycache__/settings.cpython-39.pyc
diff --git a/redfin_sales/items.py b/redfin_sales/items.py
@@ -0,0 +1,12 @@
+# Define here the models for your scraped items
+#
+# See documentation in:
+# https://docs.scrapy.org/en/latest/topics/items.html
+
+import scrapy
+
+
+class RedfinSalesItem(scrapy.Item):
+    # define the fields for your item here like:
+    # name = scrapy.Field()
+    pass
diff --git a/redfin_sales/middlewares.py b/redfin_sales/middlewares.py
@@ -0,0 +1,103 @@
+# Define here the models for your spider middleware
+#
+# See documentation in:
+# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+
+from scrapy import signals
+
+# useful for handling different item types with a single interface
+from itemadapter import is_item, ItemAdapter
+
+
+class RedfinSalesSpiderMiddleware:
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the spider middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_spider_input(self, response, spider):
+        # Called for each response that goes through the spider
+        # middleware and into the spider.
+
+        # Should return None or raise an exception.
+        return None
+
+    def process_spider_output(self, response, result, spider):
+        # Called with the results returned from the Spider, after
+        # it has processed the response.
+
+        # Must return an iterable of Request, or item objects.
+        for i in result:
+            yield i
+
+    def process_spider_exception(self, response, exception, spider):
+        # Called when a spider or process_spider_input() method
+        # (from other spider middleware) raises an exception.
+
+        # Should return either None or an iterable of Request or item objects.
+        pass
+
+    def process_start_requests(self, start_requests, spider):
+        # Called with the start requests of the spider, and works
+        # similarly to the process_spider_output() method, except
+        # that it doesn’t have a response associated.
+
+        # Must return only requests (not items).
+        for r in start_requests:
+            yield r
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
+
+
+class RedfinSalesDownloaderMiddleware:
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the downloader middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_request(self, request, spider):
+        # Called for each request that goes through the downloader
+        # middleware.
+
+        # Must either:
+        # - return None: continue processing this request
+        # - or return a Response object
+        # - or return a Request object
+        # - or raise IgnoreRequest: process_exception() methods of
+        #   installed downloader middleware will be called
+        return None
+
+    def process_response(self, request, response, spider):
+        # Called with the response returned from the downloader.
+
+        # Must either;
+        # - return a Response object
+        # - return a Request object
+        # - or raise IgnoreRequest
+        return response
+
+    def process_exception(self, request, exception, spider):
+        # Called when a download handler or a process_request()
+        # (from other downloader middleware) raises an exception.
+
+        # Must either:
+        # - return None: continue processing this exception
+        # - return a Response object: stops process_exception() chain
+        # - return a Request object: stops process_exception() chain
+        pass
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
diff --git a/redfin_sales/pipelines.py b/redfin_sales/pipelines.py
@@ -0,0 +1,13 @@
+# Define your item pipelines here
+#
+# Don't forget to add your pipeline to the ITEM_PIPELINES setting
+# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
+
+
+# useful for handling different item types with a single interface
+from itemadapter import ItemAdapter
+
+
+class RedfinSalesPipeline:
+    def process_item(self, item, spider):
+        return item
diff --git a/redfin_sales/settings.py b/redfin_sales/settings.py
@@ -0,0 +1,19 @@
+# Scrapy settings for redfin_sales project
+#
+# For simplicity, this file contains only settings considered important or
+# commonly used. You can find more settings consulting the documentation:
+#
+#     https://docs.scrapy.org/en/latest/topics/settings.html
+#     https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
+#     https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+
+# from shutil import which
+# from webdriver_manager.chrome import ChromeDriverManager
+BOT_NAME = 'redfin_sales'
+
+SPIDER_MODULES = ['redfin_sales.spiders']
+NEWSPIDER_MODULE = 'redfin_sales.spiders'
+
+# Obey robots.txt rules
+ROBOTSTXT_OBEY = False
+LOG_LEVEL = "INFO"
diff --git a/redfin_sales/spiders/__init__.py b/redfin_sales/spiders/__init__.py
@@ -0,0 +1,4 @@
+# This package will contain the spiders of your Scrapy project
+#
+# Please refer to the documentation for information on how to create and manage
+# your spiders.
diff --git a/redfin_sales/spiders/__pycache__/__init__.cpython-39.pyc b/redfin_sales/spiders/__pycache__/__init__.cpython-39.pyc
diff --git a/redfin_sales/spiders/__pycache__/final_spider.cpython-39.pyc b/redfin_sales/spiders/__pycache__/final_spider.cpython-39.pyc
diff --git a/redfin_sales/spiders/__pycache__/test.cpython-39.pyc b/redfin_sales/spiders/__pycache__/test.cpython-39.pyc