forked from EliotAndres/facebook-friend-graph
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfacebookFOF.py
133 lines (101 loc) · 3.77 KB
/
facebookFOF.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
from html.parser import HTMLParser
import re
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import os
from tqdm import tqdm
import pickle
import getpass
username = input("Facebook username:")
password = getpass.getpass('Password:')
chrome_options = webdriver.ChromeOptions()
prefs = {"profile.default_content_setting_values.notifications" : 2}
chrome_options.add_experimental_option("prefs",prefs)
driver = webdriver.Chrome(chrome_options=chrome_options)
driver.get('http://www.facebook.com/')
# authenticate to facebook account
elem = driver.find_element_by_id("email")
elem.send_keys(username)
elem = driver.find_element_by_id("pass")
elem.send_keys(password)
elem.send_keys(Keys.RETURN)
time.sleep(5)
SCROLL_PAUSE_TIME = 2
def get_fb_page(url):
time.sleep(2)
driver.get(url)
# Get scroll height
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
# Scroll down to bottom
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Wait to load page
time.sleep(SCROLL_PAUSE_TIME)
# Calculate new scroll height and compare with last scroll height
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
html_source = driver.page_source
return html_source
def find_friend_from_url(url):
if re.search('com\/profile.php\?id=\d+\&', url) is not None:
m = re.search('com\/profile.php\?id=(\d+)\&', url)
friend = m.group(1)
else:
m = re.search('com\/(.*)\?', url)
friend = m.group(1)
return friend
class MyHTMLParser(HTMLParser):
urls = []
def error(self, message):
pass
def handle_starttag(self, tag, attrs):
# Only parse the 'anchor' tag.
if tag == "a":
# Check the list of defined attributes.
for name, value in attrs:
# If href is defined, print it.
if name == "href":
if re.search('\?href|&href|hc_loca|\?fref', value) is not None:
if re.search('.com/pages', value) is None:
self.urls.append(value)
my_url = 'http://www.facebook.com/' + username + '/friends'
UNIQ_FILENAME = 'uniq_urls.pickle'
if os.path.isfile(UNIQ_FILENAME):
with open(UNIQ_FILENAME, 'rb') as f:
uniq_urls = pickle.load(f)
print('We loaded {} uniq friends'.format(len(uniq_urls)))
else:
friends_page = get_fb_page(my_url)
parser = MyHTMLParser()
parser.feed(friends_page)
uniq_urls = set(parser.urls)
print('We found {} friends, saving it'.format(len(uniq_urls)))
with open(UNIQ_FILENAME, 'wb') as f:
pickle.dump(uniq_urls, f)
friend_graph = {}
GRAPH_FILENAME = 'friend_graph.pickle'
if os.path.isfile(GRAPH_FILENAME):
with open(GRAPH_FILENAME, 'rb') as f:
friend_graph = pickle.load(f)
print('Loaded existing graph, found {} keys'.format(len(friend_graph.keys())))
for url in tqdm(uniq_urls):
friend_username = find_friend_from_url(url)
if friend_username in friend_graph.keys():
continue
friend_graph[friend_username] = [username]
mutual_url = 'https://www.facebook.com/{}/friends_mutual'.format(friend_username)
mutual_page = get_fb_page(mutual_url)
parser = MyHTMLParser()
parser.urls = []
parser.feed(mutual_page)
mutual_friends_urls = set(parser.urls)
print('Found {} urls'.format(len(mutual_friends_urls)))
for mutual_url in mutual_friends_urls:
mutual_friend = find_friend_from_url(mutual_url)
friend_graph[friend_username].append(mutual_friend)
with open(GRAPH_FILENAME, 'wb') as f:
pickle.dump(friend_graph, f)
driver.quit()