-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscra.py
69 lines (55 loc) · 2.59 KB
/
scra.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import urllib2
import sys
from bs4 import BeautifulSoup
import os
import json
import re
def main():
arr =[]
url = 'http://www.taxiautofare.com/Taxi-Fare-Card.aspx'
soup = BeautifulSoup( urllib2.urlopen(url).read() )
metadata2 = soup.find('span',{'id' : 'MC_lblCabRates'})
table2 = metadata2.findNext('table')
metadata = soup.find('span',{'id' : 'MC_lblTaxiRates'})
table = metadata.findNext('table')
for j in table.find_all('a'):
url = 'http://www.taxiautofare.com/' + j.get('href')
print url
arr.append( scrape(url) )
for j in table2.find_all('a'):
url = 'http://www.taxiautofare.com/' + j.get('href')
print url
try:
arr.append( scrape(url) )
except:
print "Could not access this fare"
with open("./autotaxi.txt", "w") as f:
f.write(json.dumps(arr))
def scrape(url='http://www.taxiautofare.com/taxi-fare-card/Chandigarh-Auto-fare'):
dict = {}
soup = BeautifulSoup( urllib2.urlopen(url).read() )
metadata = soup.find('span',{'id' : 'BottomContent_lblFareBreakup'})
#dict['vehicle'] = 'taxi'
try:
dict['operator'] = metadata.get_text()
dict['booking_fee'] = metadata.findNext('table').find('span',{'id' : 'BottomContent_lblBookingFee'}).get_text()[2:4]
dict['min_fare'] = metadata.findNext('table').find('span',{'id' : 'BottomContent_lblMinimumFare'}).get_text().split(' ')[0][2:]
dict['min_dist'] = metadata.findNext('table').find('span',{'id' : 'BottomContent_lblMinimumFare'}).get_text().split(' ')[-1][0]
dict['fare_per_km'] = metadata.findNext('table').find('span',{'id' : 'BottomContent_lblFarePerUnitDistance'}).get_text()[2:4]
dict['waiting_charges'] = metadata.findNext('table').find('span',{'id' : 'BottomContent_lblWaitingCharges'}).get_text()
#dict['night_booking_fee'] = metadata.findNext('table').find('span',{'id' : 'BottomContent_lblNightBookingFee'}).get_text()[2:4]
#dict['night_gen_fare'] = metadata.findNext('table').find('span',{'id' : 'BottomContent_lblNightExtraFare'}).get_text()
x = dict['operator']
#Delhi Mega cabs fare breakup -> mega Cabs
dict['operator'] = ' '.join( x.split(' ')[1:-2] )
dict['city'] = ''.join( x.split(' ')[0] )
a = dict['waiting_charges']
dict['waiting_charges'] = a[a.find('s')+1:a.find('.')]
except:
print "Failed"
return dict
if __name__ == '__main__':
main()
#print scrape()