-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrapper_angeloni.py
74 lines (67 loc) · 2.51 KB
/
scrapper_angeloni.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#stantard imports
import os
import time
from datetime import date
#third party imports
import requests
from bs4 import BeautifulSoup
import pandas as pd
class Scrapper():
#angeloni URL sorted low to high price, 48 items/page
today = date.today()
base_url = [
"https://www.angeloni.com.br/super/c?No=",
"&Nrpp=48&Ns=dim.product.inStock|1||sku.activePrice|0"
]
def __init__(self, supermarket='angeloni', index=0):
self.supermarket = supermarket
self.index = index
self.response = 200
def _get_item_information(self):
df = pd.DataFrame(columns=[
'insert_date',
'market_name',
'product_description',
'product_price',
'product_availability'
])
#for i in range(200):
while self.response == 200:
page = requests.get(
"{}{}{}".format(
self.base_url[0],
str(self.index),
self.base_url[1]
)
)
soup = BeautifulSoup(page.content, 'html.parser')
elements = soup.find_all(class_='box-produto')
if not elements:
self.response = 404
for element in elements:
item_desc = element.find('h2', class_='box-produto__desc-prod')
item_preco_int = element.find('span', class_='box-produto__preco__valor')
item_preco_dec = element.find('span', class_='box-produto__preco__centavos')
if not item_preco_int:
disponibilidade = 0
item_preco = 0
else:
disponibilidade = 1
item_preco = '{}{}'.format(
item_preco_int.text,
item_preco_dec.text
)
new_row = {
'insert_date': self.today.strftime('%m-%d-%Y'),
'market_name': self.supermarket,
'product_description': item_desc.text,
'product_price': item_preco,
'product_availability': disponibilidade
}
df = df.append(new_row, ignore_index=True)
df.to_csv('{}_data.csv'.format(self.supermarket))
self.index = self.index + 48
print('Extraction {} data finished without errors!'.format(self.supermarket))
if __name__ == '__main__':
scrapper_item = Scrapper()
scrapper_item._get_item_information()