From e63e75ae06b3b543f9e9588081dfa6296bb5129d Mon Sep 17 00:00:00 2001 From: Richard Date: Tue, 22 Nov 2022 19:39:59 -0600 Subject: [PATCH] update --- .env | 7 - .../__pycache__/settings.cpython-310.pyc | Bin 0 -> 1226 bytes Raw DB Tasks/empty collection.py | 19 ++ settings.py => Raw DB Tasks/settings.py | 50 +++-- .../extractors/__init__.py => __init__.py | 0 __pycache__/config.cpython-310.pyc | Bin 0 -> 8880 bytes __pycache__/settings.cpython-310.pyc | Bin 1065 -> 0 bytes config.py | 136 ++++++++++++ extractors/__init__.py | 0 .../__pycache__/__init__.cpython-310.pyc | Bin 0 -> 158 bytes extractors/__pycache__/items.cpython-310.pyc | Bin 0 -> 664 bytes .../__pycache__/middlewares.cpython-310.pyc | Bin 0 -> 3020 bytes .../__pycache__/pipelines.cpython-310.pyc | Bin 0 -> 3407 bytes .../__pycache__/settings.cpython-310.pyc | Bin 0 -> 768 bytes extractors/__pycache__/utils.cpython-310.pyc | Bin 0 -> 914 bytes .../__pycache__/__init__.cpython-310.pyc | Bin 169 -> 0 bytes .../__pycache__/items.cpython-310.pyc | Bin 615 -> 0 bytes .../__pycache__/middlewares.cpython-310.pyc | Bin 3031 -> 0 bytes .../__pycache__/pipelines.cpython-310.pyc | Bin 3106 -> 0 bytes .../__pycache__/settings.cpython-310.pyc | Bin 1370 -> 0 bytes .../__pycache__/utils.cpython-310.pyc | Bin 1750 -> 0 bytes .../amazon_find_spider.cpython-310.pyc | Bin 3441 -> 0 bytes .../extractors/spiders/amazon_find_spider.py | 137 ------------ extractors/extractors/utils.py | 43 ---- extractors/{extractors => }/items.py | 4 +- extractors/{extractors => }/middlewares.py | 5 +- extractors/{extractors => }/pipelines.py | 71 ++++--- .../__pycache__/amazon.cpython-310.pyc | Bin 0 -> 1841 bytes extractors/selectors/amazon.py | 32 +++ extractors/{extractors => }/settings.py | 78 +++---- .../{extractors => }/spiders/__init__.py | 0 .../__pycache__/__init__.cpython-310.pyc | Bin .../amazon_find_spider - Copy.cpython-310.pyc | Bin .../amazon_find_spider.cpython-310.pyc | Bin 0 -> 4267 bytes .../__pycache__/amazon_spyder.cpython-310.pyc | Bin .../__pycache__/newegg.cpython-310.pyc | Bin 0 -> 1844 bytes extractors/spiders/amazon_find_spider.py | 201 ++++++++++++++++++ extractors/spiders/newegg.py | 43 ++++ extractors/utils.py | 29 +++ main.py | 29 +++ extractors/scrapy.cfg => scrapy.cfg | 0 41 files changed, 596 insertions(+), 288 deletions(-) delete mode 100644 .env create mode 100644 Raw DB Tasks/__pycache__/settings.cpython-310.pyc create mode 100644 Raw DB Tasks/empty collection.py rename settings.py => Raw DB Tasks/settings.py (58%) rename extractors/extractors/__init__.py => __init__.py (100%) create mode 100644 __pycache__/config.cpython-310.pyc delete mode 100644 __pycache__/settings.cpython-310.pyc create mode 100644 config.py create mode 100644 extractors/__init__.py create mode 100644 extractors/__pycache__/__init__.cpython-310.pyc create mode 100644 extractors/__pycache__/items.cpython-310.pyc create mode 100644 extractors/__pycache__/middlewares.cpython-310.pyc create mode 100644 extractors/__pycache__/pipelines.cpython-310.pyc create mode 100644 extractors/__pycache__/settings.cpython-310.pyc create mode 100644 extractors/__pycache__/utils.cpython-310.pyc delete mode 100644 extractors/extractors/__pycache__/__init__.cpython-310.pyc delete mode 100644 extractors/extractors/__pycache__/items.cpython-310.pyc delete mode 100644 extractors/extractors/__pycache__/middlewares.cpython-310.pyc delete mode 100644 extractors/extractors/__pycache__/pipelines.cpython-310.pyc delete mode 100644 extractors/extractors/__pycache__/settings.cpython-310.pyc delete mode 100644 extractors/extractors/__pycache__/utils.cpython-310.pyc delete mode 100644 extractors/extractors/spiders/__pycache__/amazon_find_spider.cpython-310.pyc delete mode 100644 extractors/extractors/spiders/amazon_find_spider.py delete mode 100644 extractors/extractors/utils.py rename extractors/{extractors => }/items.py (88%) rename extractors/{extractors => }/middlewares.py (97%) rename extractors/{extractors => }/pipelines.py (66%) create mode 100644 extractors/selectors/__pycache__/amazon.cpython-310.pyc create mode 100644 extractors/selectors/amazon.py rename extractors/{extractors => }/settings.py (63%) rename extractors/{extractors => }/spiders/__init__.py (100%) rename extractors/{extractors => }/spiders/__pycache__/__init__.cpython-310.pyc (100%) rename extractors/{extractors => }/spiders/__pycache__/amazon_find_spider - Copy.cpython-310.pyc (100%) create mode 100644 extractors/spiders/__pycache__/amazon_find_spider.cpython-310.pyc rename extractors/{extractors => }/spiders/__pycache__/amazon_spyder.cpython-310.pyc (100%) create mode 100644 extractors/spiders/__pycache__/newegg.cpython-310.pyc create mode 100644 extractors/spiders/amazon_find_spider.py create mode 100644 extractors/spiders/newegg.py create mode 100644 extractors/utils.py create mode 100644 main.py rename extractors/scrapy.cfg => scrapy.cfg (100%) diff --git a/.env b/.env deleted file mode 100644 index ab6950d..0000000 --- a/.env +++ /dev/null @@ -1,7 +0,0 @@ -USER = "" -PASSWD = "" -CLUSTER_ARG = "" - -SCRAPER_API_KEY = "" - -export FLASK_DEBUG="development" \ No newline at end of file diff --git a/Raw DB Tasks/__pycache__/settings.cpython-310.pyc b/Raw DB Tasks/__pycache__/settings.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dc5bf7af368329b1cd88cb331fa6891070ecd964 GIT binary patch literal 1226 zcmZ8hTXWk)6qe+R6zAg5LNWs{8lLLbmh4IF=0c#38)!*mf}PUok{KauPh?GGtT(Gpput7MI? z(e+;>vQ9T>nQqb#=pA~Oen>z1O(h#t?n{-ecW6cGzLQZVny~pbx9A|z!idqaBRDB% zyN)fuWzmdmWI!K$A0kffX8L*LIq<~eB7JGq4#PeO;RP;75A7R|lFgUxFTZQ;(c{ta zw?o$bniF+AjIO5M%t?J%4h1(kTpe8eV-!MqBUAaU{2HTvCH*SV-0!*9@~hPQe3z8_ zonHI=q-V9ypIWCUFXPrwi15HPxf9tT;PMQIjRO(QEQe7z8gqlf4f<+{SqMiI__bi+ zLHy|>3}ia1afogpGX8UYbg);iH}r~NB0bdX%!#~MztZv$i!v%4&!#ilr zTK2&LOV&m#a2eVkW7Ns!QVVU)CoG~#d5JjwmANkb*49k}N^##DD`dfL+TpL6aGe@h(P#u@p&w(M)IB z-_T>*OK<%L{U5!#$DaBRIyv=S8yvhQU28p9V_RBV`{;Szk9~I~_ujqf1ib!y`F-WD z=ZVDM*WwvUH{U)f7M{&S`GVw{pjjA?>iN|>eS6{-;Oy^)5weCrK)P?-KCVgb}^$vcel#UJ-Vj`dLG07`dH zhz1Etm4al4h@lRkELVtVU`cM0a&NL@b4)`>awFctk(pjoiYlg|l#6XiU#196a%k_&Or z%tMV^@0sqZ!6-t?ySO0&lLitarUtViI!3x@K&?`^yM!GK6x^;mDEnZk=psnV!4klr zYBqJzP`W`QrnEYYT?#mQd}+z-QD{Y5I76~$-T0fkk@t_4IkC`p3qVDc(7D6=9W^72eLZ^%nw`ebE1V%F5U zK?-?Jg;kR*$aM?sPZC@aF`>v;=)~KX zxC+N&(TyThjV~@SClArK7s;mNrpDnBx~%{(8e_Cz%$gI;dF1|L34B~XBC^H`COsc9NWmQ?Qhz1 z)_Nh6&lXCV{C4S8_P`#`jV)9bc;VS7DW+S;~?*MFZQ5LUKW+}_(H|836h=Jqyo0Jv{u3(fiK zqkP7)UgruM&AIEA^__jIm@U|2(D|E#=EPPuvym-W&58ZYW_GV={hsh2Lb`8#yjQc! zZ)I6Zmv$YvJCAE6i#-0|!F}|%aC1L!LL4kX%LP1U@wVdDkDRLLT{b7bI)z6wYWQpN m$VMQ-$7o`72)5zYM)umQ^ffgb=T#)(@2yPDaZZ@lYo=i$%H?~({5^7LY@@I@ zs3@V5ON_}X84{HZnWl^sO&e*NF)}o3WNFUGQN>W`6wQAk#uP2kX*xq^X_3y+`7c=` zPo*{~E&Ray$l(tOwNSXl`pl#qp#-5%&uq>_G1)aO4qZPSiP;2bgX==b#Dl~>3LP7E z9meC23RV!bfpf>}GCTgm-H>sULTRF~MO2(zOE*L!T)s|G=|;LDRK6si2~B-XT}hWQ z$Mm5nwwuR|qt>z6IC^dFwcbay9_PV^t}{Ed0zh#z+duPoI5KUY!qyqnD4ZiBlaB(} zqQLsx4>qF3ZKOLKl^KNR5NiJ%A8f2uDpj?V7Fp_Z@Xn2b>s!>E7((`$$aW!ilw>2H z97RGR7vJdsyCvZ)KIwO@wQ*XlR_f+~WA`j@&34Q(z+mVX&|wY-Rz~Ck<9EuEC~3h+ zOyTY3nBROH9;^eRNO@^mN~gj(G@|gpj~v&v^i{2*K6&SO)W2Zr;fY$UXq8PBf!fNZ zI;^df)J71v@D4hA4%b(g>)LWneX_TEa<09LzZ3uW&oUG-2%j=$5UAPBY9wrz= zDN;L!CnDeUJrC@dQmGIYFi2tCNJa*^zNXj$9n-T0VBA$LGr84q!FZHZEi>^wxIKeSLaaHvug`PMU@Nec_CMVc9VIsHNYV5R*+vB?UOBIo$P@WI4$y|65 aH{ZA!6r#xKcuIFu$;i{PEK8G_|Mdq>i9SXE diff --git a/config.py b/config.py new file mode 100644 index 0000000..d2ef341 --- /dev/null +++ b/config.py @@ -0,0 +1,136 @@ +HOST = "127.0.0.1" +PORT = 3000 +DEBUG = True + +SCRAPER_API_KEY = "ef991d16e237a6680f215c650206cf8e" + +APP_ID = 1 +# database details where products, categories, etc. are stored +DB_NAME = "tasks_db" +PRODUCT_COL = "products" +PRODUCT_CATEGORY_COL = "productCategory" +PRODUCT_SELLER_COL = "productSellers" +PRODUCT_PRICE_HISTORY_COL = "productPriceHistory" +APP_SETTING_COL = "appSettings" + +MONGODB_USER = "david" +MONGODB_PASSWORD ="YAFV68dBmBQhoNJs" +MONGODB_CLUSTER = "uhrfxiy" + + +# refer to https://www.scraperapi.com/documentation/python/#getting-started to learn more about the ScraperAPI +# get proxy from ScraperAPI +PROXY = f"http://scraperapi.country_code=us.device_type=desktop:{SCRAPER_API_KEY}@proxy-server.scraperapi.com:8001 " + + +# User agents to experiment with + +# headers = { +# 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' +# 'Chrome/71.0.3578.98 Safari/537.36', +# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', +# 'Accept-Language': 'en-US,en;q=0.5', +# 'Accept-Encoding': 'gzip', +# 'DNT': '1', # Do Not Track Request Header +# 'Connection': 'close' +# } + +# headers = {"User-Agent": "Mozilla/5.0", +# "Accept-Language": "en-US,en;q=0.9"} + + +# headers = { +# 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 ' +# 'Safari/537.36 OPR/91.0.4516.95', +# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', +# 'Accept-Language': 'en-US,en;q=0.5', +# 'Accept-Encoding': 'gzip', +# 'Referer': 'https://www.google.com/', +# 'DNT': '1', # Do Not Track Request Header +# 'Connection': 'close' +# } + +HEADERS = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' + 'Chrome/105.0.0.0 Safari/537.36 OPR/91.0.4516.95', + 'Accept-Language': 'en-US,en;q=0.9,es;q=0.8', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,' + 'application/signed-exchange;v=b3', + 'Accept-Encoding': 'gzip', + 'Referer': 'https://www.google.com/', + 'Upgrade-Insecure-Requests': '1' +} + +UAGENTS = [ + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36 Edg/106.0.1370.52', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 YaBrowser/21.8.1.468 Yowser/2.5 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:106.0) Gecko/20100101 Firefox/106.0', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:105.0) Gecko/20100101 Firefox/105.0', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36', + 'Mozilla/5.0 (X11; Linux x86_64; rv:105.0) Gecko/20100101 Firefox/105.0', + 'Mozilla/5.0 (X11; CrOS x86_64 14440.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4807.0 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14467.0.2022) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4838.0 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14469.7.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.13 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14455.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4827.0 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14469.11.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.17 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14436.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4803.0 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14475.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4840.0 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14469.3.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.9 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14471.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4840.0 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14388.37.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.9 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14409.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4829.0 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14395.0.2021) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4765.0 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14469.8.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.14 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14484.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4840.0 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14450.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4817.0 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14473.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4840.0 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14324.72.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.91 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14454.0.2022) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4824.0 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14453.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4816.0 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14447.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4815.0 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14477.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4840.0 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14476.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4840.0 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14469.8.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.9 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14588.67.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14588.67.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14526.69.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.82 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14695.25.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.22 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14526.89.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.133 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14526.57.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.64 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14526.89.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.133 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14526.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.93 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14469.59.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14588.91.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.55 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14695.23.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.20 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14695.36.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.36 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14588.41.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.26 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14695.11.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.6 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14588.67.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14685.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.4992.0 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14526.69.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.82 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14682.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.16 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14695.9.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.5 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14574.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4937.0 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14388.52.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.88 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14716.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5002.0 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14268.81.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.88 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14469.41.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.48 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14388.61.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14695.37.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.37 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14588.51.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.32 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14526.89.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.133 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14588.92.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.56 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14526.43.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.54 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14505.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4870.0 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14526.16.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.25 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14526.28.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.44 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14543.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4918.0 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14588.11.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.6 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14526.89.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.133 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14588.31.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.19 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14526.6.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.13 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14658.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.4975.0 Safari/537.36', + 'Mozilla/5.0 (X11; CrOS x86_64 14695.25.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5002.0 Safari/537.36' +] \ No newline at end of file diff --git a/extractors/__init__.py b/extractors/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/extractors/__pycache__/__init__.cpython-310.pyc b/extractors/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fa8e1a807b5f08c8d10fab64ba848203b73d04cb GIT binary patch literal 158 zcmd1j<>g`kf;FYt$sqbMh(HF6K#l_t7qb9~6oz01O-8?!3`HPe1o6w!)hfo&(7-(2 zH#0dSF*PSXIJqdXpdd3ZJ*GT0sW`JFwOAJ@Ql67q6q8y}Qk0lnl3!FD6Ca2KczG$)edBIF%ytrVE_PSKPArq literal 0 HcmV?d00001 diff --git a/extractors/__pycache__/items.cpython-310.pyc b/extractors/__pycache__/items.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..453ea70a1e20e18a8bd8b4a4347931ec972761d2 GIT binary patch literal 664 zcmZva%}yIJ5P-eAN!V->3P^COxaZIcPDoXSDnyHrAVLjXR*@{%9)XGX2ip;aYk3fQ z?2YH~wWr=Vb7H)41VWva=bIVN&e*eC4F(xOIeWb24>2Ksz4KaH=zPItHc<&8B1Lj? zKtwE(J0cPwm(i~zk3}>l!`45{AtUbfYr(qn8Ws&8c_c_4Lvr8xP4aeNCvYh+4eSPH zfxW~0@Lk|a;A(TS*k#8|Pr)8qC`Q9Imr`1?0w{GEr9~xX%G-TPuVzezosQv} z)iV2HkP;W zhv;=2dZV6pzkZr*Z*T3;W63WVC_45Rtg+wZ8h#jQ!F<5THQr$YKdolmR@zK3UYL*d zjrQNMi_5%6m69}0qyIhqsL32Ax`TPH{6 literal 0 HcmV?d00001 diff --git a/extractors/__pycache__/middlewares.cpython-310.pyc b/extractors/__pycache__/middlewares.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..52289c9fb0808a69964a51cb1ce4c4ddd9db5f29 GIT binary patch literal 3020 zcmbVOOK%)S5bmDW&OYqeVQoS{!4eo)lB{jQGelryM-f6#_0PlL_bY__ zg@f#4!eA4c+y%i1qao?gCZ*UnLZf3g&5qTy^wvNEf{yU6AM-@Lkh|0ofDki(rdoM|p9VN1UCR z$_zpY)51RtxlmM|5wQq6{@K2NcXwlB{Uh&TD|qbl(A$AE-7a`$_mKZA!Hx3vaMvy$ z8?xIO+aT-RV^!G`amO33TS5aI5RO@;7VW=%z60vneF1u4^kAPt)80V0FakEyhDKd8 zl_hz&r(D5hH;yC+M`(lfbSkdGCRK&&4!D%w&>yX++e==9nP=!P%kHmT0B+Xa0I`f7 z8#<^bfF_qg{0djIO@6eWSOe=@z<*#OY=KiLa9g~L{c9kU1K@jMB3{7(ZRMda<&KYg z31EK%7E2d|@r!G_)cUu-et8R06;mdlIU_#{ytxcmD+9L3j{@FJT5+WD=D}XefgP{D zi7HBBGMtW5kN;$Jc1-^X>O-KAP?Z0yY`YG0E)Jn?rJk;-bs^oc{nDs*J zS3K{a=ZC|YauE7bc6jnQX5uA8a2*9wm{>u9F{@!@pdtAPOi_VMu>eh?aEW1;sOz`| z*L4e(qMpU{ntDtaZ9@q<+jrKaH`u zS+j#T21R&l-1gj|CLh&w&Tqt1%xA8lYY5&#>ZY`I?MR5za-bPVJv4%pY=^gC7 zkKzLqcueN#Nhq(w#OKiDJcx0IR@`ExByjGz{KWX$geEceNBNl+P)(DWkb9O|+Um(9 z?mQCl(Q&3C!qgr;*vGa7O{+>YO+M8`$&7mhf37vNq3lQBeEt0o%7!~TI)>uF9Tj)V zgpyBLz)il`04*8wH20tfV$_a^tF*s56I3SD^dF=FA(V5fcID*SCcgU+6i0Xh=7>uw zRN$9Jtqc_lR$9kt;a0|a)lVx`fi??Vud3;qdM{~(P-vy4tH82De1ZbGQ(uABnz3#S cK)Lr)+Wrjx^7;lV_JUb49An;`U%R^YA8d@iZ2$lO literal 0 HcmV?d00001 diff --git a/extractors/__pycache__/pipelines.cpython-310.pyc b/extractors/__pycache__/pipelines.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9396f41126393e671d3f4391bf8cf07a860f7dfb GIT binary patch literal 3407 zcmaJ@OK%&=5$>KBhvblyD0)}2<$YxHSO&Y=b%G#4pcvLGUIH>Dh;lZbC>e~VTk^>B zV0wm;MUX)N`3LMh7wH%v|0Van=Hx>TL2dy8q^f5~$;u)#=$@+Tn(FHA>aQwoG-?E% ze?-&ZuN^}Ej?C>x2j&U1Yz!SEj7B7;E~SKNOpml!cXgl%Ga@54T~pC!WW^P?qI9gt zj;n4}$tzJUuDLZu+fm)E!++Jti5qT1$!bwEZn-T=o)T7P&KtshqZKQD0=)LTe&wt!z4sa!3xMWx$bA;{DH&X>p$Jw-t?Xy^x!Brz5d3t9ogAG*xP=-b?9wv9PT{bJ9y!3?d{6?TnbEk z#te3LcXtky!pcmxf6&|7@xJN}4rl+iHum?u!Or1fum2QvUDF>9drVp=?`_Lk#-oS} z3^3OoWqeaoAzb*Ypx@c@)n_wUV}pC3JX^yN>zXJPQt=aDxE1UL=1&p~nyZXfa2x$uKL71`0SjM;2$cp>fe$d8Yi|Mu z<6><+La(yNKooy=?+J`}Tz{D40wZ7Qa3`e|s4FTpcI>u;@`i4uu#G_;rpe7d-H4XU zRSwi*ijngLH`64R6sQ8c=fI~w-p1t1hc0B1STTo&M`Qa z0~X};Qu~IAF97)f^RD!3g2=)sAWC~e2P7m@19%N^!(bYq3c#+SbwwMBHWY0t+Elcq zXiL!*MOPGUE813cRnb*N*A!h-bY0PPMFYCQyv{nwjVlwHbxi=_4Ay{A#4eiwKA%+; zZkZF4HI;sWezlTYSSNsR25Xn!rFC2t;&%PYnE;NTHKvtGWn!@oST$G+Y*-3GVID=_ zTPNdY-ppIyLy(KHXnbz1x6KEYWsXidfWcW|E zIIZW+NuAyQv!T5G7-p%0)$b^;y9UYI-_VOsDVaKyj92reiKDXMyXye$J1YY_Uy~OQ z#8c;wPGT0{b9#LMbmPz#*$-7tA2 z>+?$v%NCTc_hfrzUl?}o%cWaTK z<^{Zr6X?KJ0=u6pIIkBpXOK>%c@m|5E)Z>` zb&`r0nC~G|-FY4G;>14$zS~fUCk;r5ae;fQAa)z2--qeI{{x@1Md}e7pdy}Nw~gH< zbgmPOgn**6*trHPx2M jsxZIQ1Xlr}=KeNUq2MW&`55nBhKS%)ZuY;}>9+n~(B*-N literal 0 HcmV?d00001 diff --git a/extractors/__pycache__/settings.cpython-310.pyc b/extractors/__pycache__/settings.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6c993ea54c20b057b98a72301a23be8885377669 GIT binary patch literal 768 zcmY*W-A>yu7>(P|rln0gxZwfNr0yrcE{250MoT>usrgxws+(2G3b83hX%fX@T_f#D zrb*l7Udgw++ADCgxfBGmeHC6th}{c?mjZiSVm)(P;OYO&kqOg zoxolIMd@veO3Fo*FS$^&u(%YpFw1B{M4hla3FlPQ!*hOnqhWl(Vv4b-MQL)uW~gO3 z_L(yr;IR)q)V*hJ-}fhuH$;Vy)6?VW`#!roIA{=9uO_SH2P5Ps9v&{JkvFi_3kaNW znv;ld#+`2@qTJ;*$c%g3hwN}VBw=t5on(+uO#c>QoYaS{x)3U|CL6MX%;{$)8=MUh zJAhlbV2n^tb`oMU-NqkopQ75*!A8V18a@pft3Z)|dtx$SATSiCY zE!c1H`lZrI8Fdny|GOTDxk%HfQ`o$B9a2-%na%|Ow}>a=JCQXI_3QHL_2~Tk<;Bg1 zEV+$EeltvDJe_7mI+}~I%Cu0YP?_gKj>JOCIMJn4qnXZfbv9j^!&Kf%9Kn zj3FCH{7@k?wZ$~nx5mju5xSH!vc#0!ki5l&PvBGTi!6?)vtv5k;Qu0|YE|IVbAOs{}L9^NT E3mS&jJOBUy literal 0 HcmV?d00001 diff --git a/extractors/extractors/__pycache__/__init__.cpython-310.pyc b/extractors/extractors/__pycache__/__init__.cpython-310.pyc deleted file mode 100644 index 34b24e3af4cad22f9102577ffd7f0f1fd69b0c69..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 169 zcmd1j<>g`kf;FYt$sqbMh(HF6K#l_t7qb9~6oz01O-8?!3`HPe1o6wq)hfo&(7-(2 zH#0dSF*PSXIJqdXpdd3ZJ*GT0sW`JFwOAJ@Ql67q6q8y}Qk0lnl3!Ge%#4rE%*!l^ bkJl@xyv1RYo1apelWGUDrI-mwurL4sToWpV diff --git a/extractors/extractors/__pycache__/items.cpython-310.pyc b/extractors/extractors/__pycache__/items.cpython-310.pyc deleted file mode 100644 index fdb5e2f6baed0242011b980a8e843bc8dcac42c2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 615 zcmZ8fO-~y!5cTdRVc8^r3PL?`&jAD{B!p0fXi?M>A;M`D$ujl`OuQep9a7Hi5245Y z4E~0%J@qeeV!SbG#8{s9#(w)W8gJF_X9R;kKk% z1NH*;1Fi*JZ#s!xcER)%?9f6n8f>_f(vlTGsmmxWDlt=@_bB~7V=CNq4A-oFbnPz^ zlyJRTqmJ8C%}U`ugz5}XC2K3I(q#se0zE=ww<-&E3*Tfpb=@FsiZ_>QX%)Dxu@#?= zEweM@vY7#W#jGrE-P+qX6=!NFT)WnigG(zVEo`5gmA z$G-6zeI|3bG17wB#gjSqHh~{jGj1zwmMKaKvsXWA|1>FX^RXdmoJQ|gFKSeSM0Zf* dT7IO>$^VWPY|CG#$BXHnMBdf^J!yM8eg%<{r&0g_ diff --git a/extractors/extractors/__pycache__/middlewares.cpython-310.pyc b/extractors/extractors/__pycache__/middlewares.cpython-310.pyc deleted file mode 100644 index 07ffe368ccacc5abb5d74eb1a2e0654bef1b0961..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3031 zcmbVOTW=dh6rS0e*O#P8aY89li6{X~TPLAhqpAu@s|uvL6_F~if>z7UB;Byyb!OH| zV!0A1dF4OQ$NU9;%DnQ#bKiJ@b7pO4y>-%vk>>2o&h0yA&iUpfGcy$e<w1@gVgiPt!2ol>x5LmofGY_{&L~GSW7<$tW{U$T4kO%w*OHY1_B?DuNoEj}q$;q@nH3e>^N)}jXg+;0kHy>~*y`gEsxZ6wLg^^ciI?L{_TmWv`+5oYP ziVY3iCx9v!LHq`{^nmW=ExvXcO(yo3h7+cQGX^&%^yaPgOjxldxWd z?V9K9_xxx$QVk+s$_`JTCrrGB2(F_*HWN!IZlXXKIcUg30z*_FQ^0dV76*H6W{L;NK^!Ii<(OKO;9P-GDDX)XGbpr1 zXKJA#@P-A_VgJ^}CtZ#Q3-Jm3k7S}sL&u&Pm{&$lJ)HC%tdQCREB)phKoypr%Nr6S z6Q8Hox#Lq9(QG#r+}zm-8I3<1qbuc^dD`VPi)+t?S1?;+NNXC}dx&pJ)TsH<_G2#c z#@O6E=U#|yy!c!K5Fa3dWfUKx(0o~NB@CSq2XBGs-+sfb6=Jw7ZsXEBD4Hl%Q0Oqd zi>=R4e2(JW2wjEI`%vX9h;fA0+;XiVaO}1CME}}=D$(~x@tNgPZIhW0dzM<*?8!9g zJQc~|Q7$>c&>j_RW8H+Rw@NilzSKm?jC%wh*M`|r_S5g5{P3f);m!_^AUSYHd7d&M z`%@NhlP}g_mtv5j1!CMC5!Y#dc`B%!tLYb{1uj%Dsy5}>wVlzx7qD>$Prw*)NsS8p z(`r;9W5J~BD9hohn7aB|(kf7Af%8>8o73#2VFZa*mc$M>j1-w6C-}~Y`_Gk2O%m_ztDm;<_t-%`3A-(Ju1C^ z$tMq4G%dM!Lo1!TABk7I#1^pVj(HhQMSjBLGAwvmCfT^ap8I(=&Uey;XC+kIV73#L ze4LB3K{Vl@av0RhWWonfv3Gf#OrjK^Ud&#`IR5IRLFHp8#ScMn5>Q40jZvn3Ndlee z%z&@KOlHB?WH!vfk!=;i?xVwG%F`s{F*;d39smQ%4wT|QAWCvU7TR39q>R3%3tb@% zkfD$c$W+JxWGQ36r;LQUK{M;E86GWvyrzgz|3f44iF)}2~U80^hkk-S) z!?3>#EB+`PJlx-t8;3^+yU%t;;m*U+-jjo)=i$!5)2DkoqyE7_cHTB#>DHIJ;U3r> zt(rerHXR=IclN?h`oqyRhqrL};ofM}A3Ry@>Fpm3o*Y0-&yM=ijHc5*lcd)cCj1(y z!|o9lA}dxqN^0z-?8Nnk+R4)twqKHG@&;^0HjB#@)ty>3pVoX*i4p zLdM3Sm%CoSF5W^~d3c{kU?$UQ& z-KSS?M|1UC)YZ;6uI1L&inn+2408^P%Ci;#*rVi}2S6EjVliFa0!kE;+LkeS38_ZG|liFam!kE+s za}>s;Hdteul*XkA#d=L3sSV~rD><<*9R@^n(!f*)`Z1ZO?0wbh*0ts~VJ+y>uD@&B z1foYW@h<&2(A7zEVVAABeE}!Q+@4$P2J|U&C0@!z~Nhu(vM z^lLVGB%+MTPF>sOMJ$r3A}1GE0kRYGUN+#g8}p}0_DXuSsj4i!jFK`1S^-XqUk%Hs zoE6fY6^2%di!dp6gq`JPwvC zF#If0EcKWJcjhTmCV_p#$FnpN!CF0%QjgB2JaBiDBF<-7$=Mr*k#=j7rT$M*I^z=J zQdECXyX$X&vWc0u^L$+1UFc=IV#{nu^P$0o*)Mj3(_m z=%87HQ9g`5=WnakAtio|Vf+fk?@%Du1%5me0&Y{)`ZZa-NY!E(IPdT9C5#dCq6l&P z--D0hB?x45L-(~SRGYq90h7H=HcHtgw{ zI(w@dhp8;oSzqlSBhKOYv{`jw6%80#YJ#hspvL~bP^Gh7P4kbKdj%f2zJ2uva8-R+ G;r|07G&E8G diff --git a/extractors/extractors/__pycache__/settings.cpython-310.pyc b/extractors/extractors/__pycache__/settings.cpython-310.pyc deleted file mode 100644 index 4f7d2575f03ed9b1dc8eb5e42661dbb5da0041b9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1370 zcmZuwO>f&q5G5s9qGVZ;-NcUTex!{9G)+P!IV6Bk6rsh9O@Ln|DJWhTC1~zKVnB%m zmyRO+iXIEJ1(IVg{Xx9;)W48J+o3Gls?jC*usb_%-|Wn6w6)dXVE8}3^}p$G+@JYk zebiv_5`Ojv0O1H9a+sss8~%NXls@qMrLBz1wEVvEfx{cL{HBU)fC8=qHt;536K?^w za2v3Lw*hzXCxEx`ZNOc;2Y3hXlQO;i3r8w+4{(F-16F^l;ZI2oAArYa_;VuQFGzjF z<1a}a-z5$F71_jJlP1|BtzS#{9%++Kz~e*0+uVcgzaVB#Uwn&t-KS5Vyl}O^AA2-( zZ9nniIGB#QGkVT~l(NTQnT0f2v?kGX6p{0XEP31Oc0cmHMdPI(Ua*uVPsGKxxY`$| zH0=U0b#Q~eOB2seql7JXuID0)146-8ekQ(HJWpot$h;3t3wrNyJQ!zTeDA)R>%Wnv$qiza3gy_;)S=0%U?x15V4noYyV zgHYBv<1~%qoFSgT>?ZecO(_my8U|C)(el*DN9M{9Jj%nmpOx91WtG@V$5|E9N~a_f z7)?{iFT?c_P2Hss-Y_lW^(o!}ABEtmWSXub;~F`4r{09(ntbHyl7?^#+{uNXGB@?k zLyC8RClC6L+c#7d^&Q2~@!pk>@1=AUCG(Z@!IiV*pku>2U1#lHu^0`Z+J(Y?L9!7j zSnHI76$S1P_|zRk5wFGW7Ch5Z`p7*|Z0CmJ4qVnPlCcVLH@~@!97oZQ*QJ`P)^aZx zO>k2)^kV~ZGO&?_JH=k?F*b`5f;+{&DcSbWuw>jWd{*s>+l5PC9oP<>vRN1gmXftB zPLZ7UWlOOJqTUbcn!JG*d+y(j6%;W7c+tkTWEMl+=gZWxKL5)Kw$bb2UYl zRWy_=5NIh5(p*z9k*es({+&y<;b|}7ql$WG|Np$tZa(j`>%Lx}76lUHc_w@}f$j_G s_t4xNdk!Gfo4mjarE00luYJqaol>P-EmdHz!dKvj319uYA_)Kd3*gqQm;e9( diff --git a/extractors/extractors/__pycache__/utils.cpython-310.pyc b/extractors/extractors/__pycache__/utils.cpython-310.pyc deleted file mode 100644 index 1d12361e86b98241edb46ca48913449ce4187019..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1750 zcmah~&2QT_6qjV#ahy2L+NP%h^NazFgDP5v0t1F-aN>4=_u&O~E^}f8l*lNAWl|$4 zCpMhh{)`>k-S!s^;C0s>cHUvzqhu#eQeZS5`A9uJzV~|{=GAJ&f-(Dj9R5|ZtUt_V zd2GO@1tj0Xge`3UZ1pYd-~y~Rb|V`Xfl84bS$O4F8<%0XiYt+W*MM6Gsse2Q)qpmE zJ_365OSSLd$33h0B(y>CVi?{5Y6HnvFwLxY)_dz3R%{O)QNd2n@CD#Ev77Nl;Gg1B z#;*YX43~RgDd=A9>e9goO*!jtp=@m+FaZ0X~K%wG7F0-kkvD{52x)u?&c(w~q81&>5n96q9cJGk; zuYd?!p%GaOV~(K=@Eu^Vb?wZo*VbU;x-hffS+l}>8%`W-rM7rvTQdm2Pp+ApcGoy~ zl^}J|boHtvG0E)IWxzJ!OcK2UXCuOtE-$`j0OV5Dpb=HNWcxX&e(J z;eHzXF}?Hl_KiP)HDu0_;O8_Z$AU%?5gmN%8<}f{_tO{&4=E*RkSgMtB;9ZHc#P}8 zB)9S;2Xnu%kcM&z_?2gHsv{4)4Ew`KeE{d1kSyQ1XY9`M z>nIfMR4P6q;w>@3J2tsE4-^-R+B}6K-?%*hF`+C%x(2&yP1UzvXf`6*1$*Vf#M3$3AKyG6%w NOp9>Me(|gP`5&cb8jJt{ diff --git a/extractors/extractors/spiders/__pycache__/amazon_find_spider.cpython-310.pyc b/extractors/extractors/spiders/__pycache__/amazon_find_spider.cpython-310.pyc deleted file mode 100644 index ee95bbc5be3b8483386008b7236b778d764a8a6f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3441 zcmaJE%WfRUaeCf6`yja_R}|$)Ysz-KvP4ONkRXJRSfUNZfW#Q4>;Pj)o8fem8fqS^ z?pZNf%mPyAtAgB<0K4!>mwZ9Kq>)=r0`dhqSgGn6KGrZY%jv1EM^|-KbyqQKHa!iV zm;ZVe-dWPL|KQ~GW5VQ9c*S2}pc*By=Hpk7Nuv8YDRG8x0M3Zb#PTh`nbeBy#PJ;k z+p(L}{95Aq^`zl9l#CNMla}97up7@M^ZvYoYw<$T_S*{f0ABPLiFQY$b=vq&qYb9* zFC7~GG9}wuulWXS(zbd;*6(HMUbY!WEX`rMw8!%PfM@$G%=?1nd6e#nH##r_(YqJ$ zCoKOWXGzbHbHKV8yS(Juo;OPf#_!M4o83v~LglfJ{3Aj)9O=?i{JI%MKMQ!-n z)S)hX9a@9Uda_YjHi#(WzFp2noyU1T5F1yo9vmF32g>&KFiS!L3XUFxF_37EScT!G zmTUW@(DrqB4N6|=-)QJ^(4%j?)};aVOE(PScqa&-^h{}skOzb3()o%#9Ws&o^{{f< z*TC5t1YEHG5egh&3Ks9a*7!0gd;93~jR)7SU;9=6UKBnKSlr*n#Rd?R2M26NL^%^z z0CNyC{($`{=YewkX;7#=iwEWY`nyp|`z3B2jB1HI;CY{yrinK061S_rSTRQDPJ3;A zmNUd~96c@oHoVG7{{-NrR*(r~{FF?{RO8J;o9I-Z5~Afs4#7GL^h15D7y4(~!|vEn zSQD_up}tQrh_A@lEKL4O>J;YRw14PhE4TLT!UCr|6etm)?kfXb|Fol|Yvr29(sp6< zuX7jpYlRKE93}6-+@sz}&{W{+TW{MJLr6#PG5nzy85g7GW{K+VX2X<97mfuThPjYt zAfi<2Ten_AzEZXu4C8#X`qSBTl@6}<3_b^5;qx#^Ghulk-4w$0Ww6J13y{6KG(sU{ zvx*^viHlqsakj_Nxw3YAsQ3b|gn^RHGIuJocmr|HP9PX07GH!}&*gA2Y0~9P46;-( zepc}uNnk$v^=j6K#N`(dC7m z41R;Uh4D<|H*#m<(pq6m^+}C-Q{YHj70+#2S3I{1W8%>UZBBKRpVX%sehZzpDlK!R zmPrFBCT$ibto+BqoHRiOGP}fxKhSwaNfc#XQ4~f{x}Yc{MOjc3g%MBb!lYH0Qv%xB zmA1v%JppC0qAbl)#@3!Twqb{Jbh)s=dZ%#URJF%$;lhqOYDWmq(=)Jhx3DJ*g^Svs zeqY$I?$S*-jr(nYRw@l=XEjU~ry5YtR@Bv5>KHsa_UO5By{J(ey#9{9SJa+q;tl;7 ze))fa=B11DyqdkE7sj57N4KbfwT+^|-xZB2F7IUkBU~QyTaZ%vVi{l zjHciVQ*uTdw_v@G#~=+BHNm2uvglG}(I+Pwz|K!9?ByBkW$^AviMX=GS0LF(t(mK( z%hgG2RVVRo2Ib=m>RUTJNGU%D0{lIcs(ll|mEX-Dv(fS~*!_6@(rchIu#J?JD`vYRq=?Zj_eO?v-xFp{#dBmZcnv z*J$PUDae$5nc?l}vVO`2>8Z&kT<rhbnVKoANs8m0@%+- zE9L5yUF8HTq0k#?qD2llAZ_R;7^E3q#L_BpMwCMJ5yPF{oT?!FBP5#Gw)r&}1}cyE6~Ou)v?P#9 z_n~I^F4FEs(Bzm1NrPtx{3>#t4*e!{di=UlmjqCA`7ad5iBFWreq4P%048kx#Ghe+ z>Z5za)>}r$nAexcJk%YRxOyAvkw@Hjr^lAD3S6cR9PoO`2|VWsRx1a7ZCeanGV|R$Bk5*^8qnT2?)pMoW@27BZ`hDs3`w27-F~W_0 z|LHJ@D~gF<{vqs#V_j7Rmg7E8?OoMiReqa5z*k_QcQr6xcXiilEOW#{+F)nkJA>y5 zcJv$@G=2jHxm0$b>%%;X#X1~xZ0@;%BnvvoM5qjtmZEcP!P+)I+=r7oQiCzb+|6v1$9VOXT>=xbAY>+moyW@Qy?>ouu?adl~etP$F^rl&> z{b3i?&o(YPIP@`2v=%$miJiDc-CrDf16Ot0h+TRU|J}mT#Ib>66US}3g-^EW9h`Sw zZ@s9|yJxlbC$ryz;6bm$=#i&^sA%ZD3@DrQ4yr$aPKa39LL+OqI(mRdbgv^l*67!nNB#F9ttVIYqwicwccwtVVZ>;4u_0V5NP=_=nNHH%z^+z zFT+NhXuaoYKOr#_#4ss*0awPGJG5`iKD916-f+s9fUdZZh}odoZhQ7c5E!)R|U0!A&A8ugWRmg+ONba zef==y9E?W)ln0z$J9~G5M*~`^Qzb7LhjYc^7*v-ou|XLb&rdl~J~2kIFg4%{GKM4X zOmf-^r$v^%PEO{LH@6PcX(+FJ%TtT^h(2N>_Vb&&PKF-@I#Mh#OB(n0TKBtS62mc< z(de7=Z|_?g3&!uExvj1$dFRY1tl94sG!X~`>O^vUyi7NNs{sW)GBPRl7i!AkL{h=2O>o6%URPLD1;#aDkP=wC zg{8bAHA)QekqxYg5lYjj)Jn2s%T{V}UEznbUm{p!__z@GJkj+_vKq*hluzXuZ$ErS zd7=LkL!E1+pcX7CYm|If-uOr8{!hW$TUoR0Ao@kq_O4TuP1eYpPj(xtB9|66wrsP| z`!m#Tw!y5rzGq^TZI$l1hO!+j)M#{Oum|bK=$!4O8tWEYqliyrDol1~xq)UXYvi?` zHL)4wMfR-iWE*%CL`Ew8Yfaf#P(J8>yniLU_G@(}$+us#`+nH2lUG;4k}#Zh5}VT(8>- zesM7D{_~2W{D%hTe+C90;!FO4hAB)96qkA}P(#ht)B>lwI&gYmgeA8GoWV+g8J69$ zgw4PTD{dvUT|2D0Rf#JHwXp8i<$Nn>giW_8;Yu(Q&bqS_wu8BF-kq0l74U+)AmJL| zMR!rcYOv%kNxhccWnMZ_+?9_NR%eZG71rR&;iVJZU1jRF(rSK>9VlBZHLu=_qy6|! z;PWWOaABXPot}sfc{lAOJWc&*Klxq*Gl=#*Pdwu3Cn*nGdfo)<9WUkkv3RoOg*>-` zxEt^glJXhA_q~Y4;peyZfk-gDlJEdxV?hJcOPJrKC95Rw7V~;37sS;U0gt*d;~11N zIE?*Bg3;rCoboPde*Q<%=ndj%yXP}5T(cMrmJZUimu#-DA0HpLJ;~AT#$i{*0;fNMEqtU-@-~`plvFuX zGv!djr>90*8ktNTtFqU|>Ivb?O#jaKLcwhrSidybww-go2M2!QggiZnnFA<|9h{=m zjiVGN=)`-@zJJ6cr#m@x2Zz(13YY?U`SV*J-`#q!otJ-i_tr;u?{7afavi+6)%Aj4 z*XusY%Lm-Uj+5NLa^6ajXf$BL+D%1C3lI8{k;olL(3`iV9wjg_I6X&)&M|2WiB zb5v$VrjIpo^$Z?atTa}KMx-%wXrvWfyq)PM+OQ;f%2^5gx6fekTi`b(znK{#&J4_{ z9;txPk7}7A^{9^;7&pON1_zAzrjpJa&SoaCbLo7#Fj{0bOpf}hG`TXX6$bZsMed?r z?BY;m4c7cl9a@xgMYFQSnZ&Gr$9M-aX(_YHIbVPY}VUZf3tnH zwX^;`d1$T-UM-w?XMJb=%dd9UAHH?g6LWzRlG*SQKN2&TCuXUcBMDXn?LULUxwdud zDMAwCdtN_Cw_3}&-c6FcKG_}2R3vGx2l2ir({!?~+=Oeu>vBzS_!##@_aHCri@4tt zb)uJdy@bQX+}dOlpZZCfAUyVZ99uKGF)ZFX;_`E7CjQL0^`~f4C&Go+zUP{E-Ub(N3PxNCutns!(PN| z#9gLS0jqTLqInT`xg;?p8)&qz8)aLa(=Dy0FQ^9EqBnF~t>Mp7-&D1)mvjSgTQz=s z&Ur7!oD0s|g!B?V>Ae4iTmO>to{+<;hlLAk%z)Ft5m{;K3djm@1wGT5)mOzktdi+R zig+h2!!hBwV{KF+cN*$ZW5RQr)g;gDOdr{lVaFQH&otJYNS~R6iz>o}!5WzXi94Aw zs(}|FrNAWHY*tcyNtu-tiAny%<|O4vQsyK@Vv>-}jp`H-u-g1hg_-CVCaW$!x2Mv+ zGBmM|23w+_gRoIX;F=@1hu+IlZ-Sd_1^cryb2O776sgMNzapq;Z1E<79s-QUE=^Wg zy|}_?ZmfX1I-y>^NFCzbhBmu0tY#HvKGMYx?1ijyq$K}hFH$f54VoRzvzO%P2j&cI zdFn4`6=++_YT}2iHaY!jI-l7mD!ang{*D05UYV@<>g1#mw1@SqPIvehRm?)p0(%YT zTp#PhIzsD}VO`$)>*x3zS!2Tgo?Q2hjBXO^PO$HuuFl?ERwV3<)n#SafV|cav!PQR zI$f0-y*1J3+Qn0VuGc1%>p!7f2W640bF{?TW0g6m6EwC?*mN2A6-eK>tW5AXa37ki zA1nmkw+A0jA)5Og|x#%773b50#Cy{xTanvDx zFZJW-nd;=KozG3y|LBb3+Nd#5GEmj&R{Rjb`BOxxL31te#6Dl!T>J8?wTD;N29+t| znYt!^eSW5$uUa0h8E z6~D#IyxB*{zVA^TF0lji%da-BzxU8>oDrbE7_1c1CwdhFDu0q^>gGV-*xYDoZdFJ@ z#GjlFq*T{FljfFs!tZj|iUanUgl1TL90jq*Isr=0sQbh{y%gk#E>DsNe#m_VJAIz8 z?GJeCl4xNk;wm+7QA3ft$Yj$zDXtT)P0c!*yexPx@VZ=3QJZvnQ>lOp4-Hgf{5?;a1S} zq!EfuU|k!PD(vt9(wu7%jq%9Hk!IMS2PZCR^hk`|XAUVzsRv!EF z3G*E={Q+O{EgB@ux~-aeLtoNov;}pBlB9+NiiBzX&(hA?25?ITQYz>*mC`Aw`iq!n zs0QE})kJ!w#A<+Q>Y8f*I9IlTH?%p(p*br^zJ{jhCTOeXs=B1F7`8T8y--14C@|=u zV0AhX{I1ivSIXyYvZrQbKf+(%avfgpay(b|(RRtCC(p>U|;?2_Tr+$#M5hv)?Ds<5E5}p=eA}uX1 zNxCp;o-{amy|^>e*J!(VW)$^Wl|p4V@REclg5C&ul|*^P`&3j3Q|pT$@ORttYy@d7 zYdO&%h~9#*O!@YewODQz&r7+RpF4oux>=m#`xJc=2aTmrb%YuTMnEl9OY~>qYE5lP F`#;+jdkg>o literal 0 HcmV?d00001 diff --git a/extractors/extractors/spiders/__pycache__/amazon_spyder.cpython-310.pyc b/extractors/spiders/__pycache__/amazon_spyder.cpython-310.pyc similarity index 100% rename from extractors/extractors/spiders/__pycache__/amazon_spyder.cpython-310.pyc rename to extractors/spiders/__pycache__/amazon_spyder.cpython-310.pyc diff --git a/extractors/spiders/__pycache__/newegg.cpython-310.pyc b/extractors/spiders/__pycache__/newegg.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a372bddee030546d3aadc3b6a92acf4f65a687e1 GIT binary patch literal 1844 zcmZ`)QEwYX5Z>K8pU+NWAe5?VMU@s&#faqMR1oA6Frrmoz(NFga8jKvx0}nQ=ex`9 zI&LgiYSq5>6(Lnf&I?i>`8)f{Q-1(2eJL|{cA8LFX=mqlW@l#So7u}27U~4XZ-0-% zKOI8;!jH4VhK~+Rbpr?|oF=4KuCzz*63)2wlyFOs1NOw~S)6W>ru`ChNjF4#b};zp zz*K8M1nE&udW=&z#d^^E;ODBK((Vci$ZSOb|DHw=23ka!JpLIpD|sN5Xi{SNwuSj45*#yMZsx9=|E~tK2TrS6HinH5aUbWWsc9Y&`+{U=eLw1!vtR_vRkjg*6+_*JtMsHwyrq z?&zB5?GCig((R6pbs{#ecOgO7?|B^_t6(<~Tv~8}k#i#tBC#nk8KbM77i@RJBv5Me zIwZHhQk=z8`%>&diB%rtf!;R@(`5U5u>>ToB#TTH4Rr^7Sp`$lMN&h8xi(HDG?W5p ziqtF=So^um_#o8Hikw3gR|=J9sS?Ib!Tb+ktv5f_`7TcHD>HZItEi8XEX=$hqb>nq zj~cX2UD}{aw809VU7+^s(c(Fzi!jas3>{7|E#IQ0KY+~OJ)qdh&nQ5QfZG5OLx^}~ zY#kB#;nH>EP$tQ)xDI~>?y2h)8WTTK&3rVq~R(W+|Bg6nbdU2;-?Bc6- zLQkWBBAQgjr9bSrdUy1Z=k2#n!JB646h93aU2YoI{RUdB%EewVNc8Aa&*SmYTZeWB zNg}iY*awN1#NPBUc}15Dt&a{pFYZU%9i@-4uKIzD;w55zc9^s^&j?1L;$<$JINI#_h_B8q2K_7gUWS)AtXwtxpK&M_=g zi~CTHScRruI_(C1@$VY^0E}IMsjdQ9gz9s#3SYbI4Xs-ZYY{4PbouQ{oHazZSu?Kh zr{KZ&P2KnV&?gD<^S*yL2$HG9##JtYud>k^$YHlh4CG93_>{_`OYkwnPFJYw5}Vr0 rrY>`>`G!PU(=NK#Fe!{v{zr=a|1xIWFU!Eb!heIp)^UkTFS36ClNjR{ literal 0 HcmV?d00001 diff --git a/extractors/spiders/amazon_find_spider.py b/extractors/spiders/amazon_find_spider.py new file mode 100644 index 0000000..a978181 --- /dev/null +++ b/extractors/spiders/amazon_find_spider.py @@ -0,0 +1,201 @@ +import scrapy +from pymongo import MongoClient +import re +from scrapy.utils.project import get_project_settings + +from ..items import MarketItem +from ..utils import getCategoryName, getElement, getRandomUAgents +from ..selectors.amazon import selectors + +from dataclasses import asdict +from itemadapter import ItemAdapter +from urllib.parse import urlencode +from urllib.parse import urljoin +from urllib.parse import unquote +import copy + +import random + +settings = get_project_settings() + +class AmazonSpider(scrapy.Spider): + name = "Amazon" + + baseUrl = "https://www.amazon.com" + + + def start_requests(self): + ''' + This method is to get content of given category url. + + ''' + + # request with category url + # self.custom_request(url=self.categoryUrl, callback=self.parse_category) + yield scrapy.Request(url=self.categoryUrl, callback=self.parse_category, headers = getRandomUAgents(settings.get('UAGENTS'), settings.get('HEADERS')), meta=self.meta) + + def parse_category(self, response): + ''' + This method is to extract product pages from given category + + ''' + + # check if the Captcha exists. + if response.css('#captchacharacters').extract_first(): + self.log("Captcha found") + + # get products from the category + products = getElement(selectors["products"], response).getall() + + for productLink in products: + + # get asin + if re.search(r'dp\/(.*)\/', productLink): + asin = re.search(r'dp\/(.*)\/', productLink).group(1) + else: + asin = "" + + # get current link + productUrl = urljoin(self.baseUrl, productLink) + + # get rid of unnecessary query params + if re.search(r'https:\/\/[^\/]+\/[^\/]+\/dp\/[^\/]+',productUrl): + realProductlink = re.search(r'https:\/\/[^\/]+\/[^\/]+\/dp\/[^\/]+',productUrl).group(0) + else: + realProductlink = "" + + # get product page + if asin: + if asin not in self.productLists: + self.productLists.append(asin) + customMeta = copy.deepcopy(self.meta) + customMeta['asin'] = asin + yield scrapy.Request(url=realProductlink, callback=self.parse_product,headers = getRandomUAgents(settings.get('UAGENTS'), settings.get('HEADERS')), meta=customMeta) + + # get next page url + nextPage = getElement(selectors["nextPage"], response).extract_first(default="NA") + if nextPage: + nextUrl = urljoin(self.baseUrl, nextPage) + yield scrapy.Request(url=nextUrl, callback=self.parse_category, headers = getRandomUAgents(settings.get('UAGENTS'), settings.get('HEADERS')), meta=self.meta) + + def parse_product(self, response): + ''' + This method is to extract data from product page. + ''' + + # try: + # with open('response.html', 'w', encoding='utf-8') as file: + # file.write(response.body.decode('utf-8')) + # file.close() + # except Exception: + # print(Exception) + + # check if the recaptcha exists. + if response.css('#captchacharacters').extract_first(): + self.log("Captcha found ") + + # initialize the item + Item = MarketItem() + + # Asin + Item["productLocalId"] = response.meta['asin'] + + # brand + tempBrand = getElement(selectors["brand"], response).extract_first(default="NA") + + if tempBrand is not None and "Visit the" in tempBrand: + tempBrand = re.search(r'Visit the (.*?) Store', tempBrand).group(1) + elif tempBrand is not None and "Brand:" in tempBrand: + tempBrand = tempBrand.replace('Brand: ', "") + + Item["productBrand"] = tempBrand + + # description + productDescription = getElement(selectors["description"], response).getall() + + ## get rid of blank rows. + while '' in productDescription: + productDescription.remove('') + while ' ' in productDescription: + productDescription.remove(' ') + while '\n' in productDescription: + productDescription.remove('\n') + + Item["productDescription"] = "\n".join(productDescription) + + # sellername + Item["sellerName"] = "NA" + # Item["sellerName"] = getElement(selectors["sellerName"], response).extract_first(default="NA") + + # imagelinks + ScriptText = getElement(selectors["imageLink"], response).extract_first(default="NA") + + tempList = [] + temp = re.findall(r'"large":"[^"]*"', ScriptText) + + for row in temp: + row = row.replace('"large":"', "") + row = row.rstrip('"') + tempList.append(row) + + Item["imageLink"] = tempList + + # productLink + Item["productLink"] = response.url + + # productTitle + Item["productTitle"] = getElement(selectors["productTitle"], response).extract_first(default="NA").strip() + + # StockStatus and StockCount: out of stock 0, in stock 1, low stock 2 + stockStatusDesc = getElement(selectors["stockStatusDesc"], response).extract_first(default="NA") + stockStatusCode = 1 + stockCount = 0 + + if stockStatusDesc != "NA": + if 'Currently unavailable' in stockStatusDesc or 'Temporarily out of stock' in stockStatusDesc: + stockStatusCode = 0 + elif "left in stock - order soon" in stockStatusDesc: + stockStatusCode = 2 + match = re.search(r'Only (.*?) left in stock', stockStatusDesc) + if match: + stockCount = match.group(1) + + Item["stockStatus"] = { + "stockStatus": int(stockStatusCode), + "stockCount": 0 + # "stockCount": int(stockCount) + } + + # userRating + userRatingCount = getElement(selectors["userRatingCount"], response).extract_first() + + if userRatingCount is not None: + userRatingCount = re.sub('[^0-9]', '', userRatingCount) + else: + userRatingCount = 0 + + userRatingStars = getElement(selectors["userRatingStar"], response).extract_first() + if userRatingStars is not None: + match = re.search(r'(.*?) out of (.*?) stars', userRatingStars) + if match is not None: + userRatingStars = match.group(1) + ':' + match.group(2) + else: + userRatingStars = "0:0" + + Item["userRating"] = { + "ratingStars": userRatingStars, + "ratingCount": int(userRatingCount) + } + + # price + Item["price"] = getElement(selectors["price"], response).extract_first(default = "NA") + Item["oldPrice"] = getElement(selectors["oldPrice"], response).extract_first(default = "NA") + + #productPricessTime + Item["productProcessTime"] = round(response.meta.get('download_latency'),2) + # print(download_latency) + + #productProcessSize + Item["productProcessSize"] = round(len(response.body)/1024,2) + + yield Item diff --git a/extractors/spiders/newegg.py b/extractors/spiders/newegg.py new file mode 100644 index 0000000..b814143 --- /dev/null +++ b/extractors/spiders/newegg.py @@ -0,0 +1,43 @@ +import scrapy +import re + +class NewEggSpider(scrapy.Spider): + name = "newegg" + + pageNum = 1 + categoryUrl = "https://www.newegg.com/p/pl?N=100008225%20600030002" + + def start_requests(self): + yield scrapy.Request(url = self.categoryUrl, callback = self.parse) + + def parse(self, response): + nextPage = response.xpath('//button[@title="Next"]/@disabled').extract_first() + self.log(nextPage) + if nextPage is None: + self.pageNum += 1 + yield scrapy.Request(url = f'{self.categoryUrl}&page={self.pageNum}', callback = self.parse) + + productLinks = response.xpath('//a[@class="item-title"]/@href').getall() + self.log(len(productLinks)) + self.log(productLinks) + + for productLink in productLinks: + yield scrapy.Request(url = productLink, callback = self.parse_product) + + def parse_product(self, response): + productTitle = response.xpath('//h1[@class="product-title"]/text()').extract_first(default = "NA") + productDescription = response.xpath('//div[@class="product-bullets"]/ul/li/text()').getall() + while '' in productDescription: + productDescription.remove('') + while ' ' in productDescription: + productDescription.remove(' ') + while '\n' in productDescription: + productDescription.remove('\n') + + productDescription = "\n".join(productDescription) + + # imageLink = response.xpath('//div[@class="product-view-container"]/div[@style="display: none;"]/img/@src').getall() + imageLink = response.xpath('//img[@style="margin:auto;transform:scale(1);transform-origin:top left;transition-duration:300ms;opacity:1"]/@src').getall() + + sellerName = response.xpath('//div[@class="product-seller"]/strong/text()').extract_first(default="NA") + self.log(sellerName) \ No newline at end of file diff --git a/extractors/utils.py b/extractors/utils.py new file mode 100644 index 0000000..3b29478 --- /dev/null +++ b/extractors/utils.py @@ -0,0 +1,29 @@ +import random + +def getCategoryName(name): + name = name.title() + if name == "Amazon": + return "amazonCategoryAddress" + if name == "Bestbuy": + return "bestbuyCategoryAddress" + if name == "Costco": + return "costcoCategoryAddress" + if name == "Newegg": + return "neweggCategoryAddress" + + +def getElement(selectors, response): + element = None + for selector in selectors: + element = response.xpath(selector) + # print(element) + if len(element) != 0: + break + return element + + +def getRandomUAgents(agents, headers): + randIndex = random.randint(0, len(agents)-1) + headers["'User-Agent'"] = agents[randIndex] + + return headers \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..b2a2ea0 --- /dev/null +++ b/main.py @@ -0,0 +1,29 @@ + +import subprocess +from datetime import datetime +from flask import Flask, render_template, request +from twisted.internet import reactor +from scrapy.crawler import CrawlerRunner +from extractors.spiders.amazon_find_spider import AmazonSpider +import config + +app = Flask(__name__) + +@app.route('/') +def scrape(): + """ + Run spider in another process and store items in file. Simply issue command: + + > scrapy crawl spidername + + wait for this command to finish, and read output.json to client. + """ + try: + spider_name = "Amazon" + subprocess.check_output(['scrapy', 'crawl', spider_name]) + except subprocess.CalledProcessError as e: + raise RuntimeError("command '{}' return with error (code {}): {}".format(e.cmd, e.returncode, e.output)) + return "" + +if __name__ == '__main__': + app.run(host=config.HOST, port=config.PORT, debug=config.DEBUG) diff --git a/extractors/scrapy.cfg b/scrapy.cfg similarity index 100% rename from extractors/scrapy.cfg rename to scrapy.cfg