-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathstage2.py
56 lines (53 loc) · 2.73 KB
/
stage2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from dependencies import *
from login import *
df = pd.read_csv("names_and_positions.csv")
profileLinks = df['ProfileLinks']
names = df['Name']
headers = ["Name", "Company_Name", "Duration", "Information"]
filename = "roles_of_person_in_pervious_list.csv"
File = open(filename,'a')
writer_object = writer(File)
writer_object.writerow(headers)
count = 0
for j in range(len(profileLinks)):
if count>999:
break
print(profileLinks[j])
driver.get(profileLinks[j])
try:
driver.find_element_by_xpath("//button[@class='pv-profile-section__see-more-inline pv-profile-section__text-truncate-toggle artdeco-button artdeco-button--tertiary artdeco-button--muted']").click()
except:
pass
src = driver.page_source
parser = soup(src,"html.parser")
profiles = parser.find_all("li",{"class":"pv-entity__position-group-pager pv-profile-section__list-item ember-view"})
for i in range(len(profiles)):
count += 1
name = names[j]
try:
company_name = profiles[i].find("div",{"class":"pv-entity__summary-info pv-entity__summary-info--background-section mb2"}).find("p",{"class":"pv-entity__secondary-title t-14 t-black t-normal"}).text
company_name = company_name.replace("\n","").strip(" ").replace(",","|").replace("None","")
except:
try:
company_name = profiles[i].find("div",{"class":"pv-entity__summary-info pv-entity__summary-info--background-section"}).find("p",{"class":"pv-entity__secondary-title t-14 t-black t-normal"}).text
company_name = company_name.replace("\n","").strip(" ").replace(",","|").replace("None","")
except:
company_name = profiles[i].find("div",{"class":"pv-entity__company-summary-info"}).h3.text
company_name = company_name.replace("\n","").strip(" ").replace(",","|")
try:
duration = (profiles[i].find("div",{"class":"display-flex"}).find("h4",{"class":"t-14 t-black--light t-normal"}).find("span",{"class":"pv-entity__bullet-item-v2"}).text)
duration = duration.replace("\n","").strip(" ").replace(",","|").replace("None","")
except:
try:
duration = profiles[i].find("div",{"class":"pv-entity__company-summary-info"}).h4.text
duration = duration.replace("\n","").strip(" ").replace(",","|")
except:
duration = ""
try:
info = (profiles[i].find("div",{"class":"pv-entity__extra-details t-14 t-black--light ember-view"}).text)
info = info.replace("\n","").strip(" ").replace(",","|").replace("None","")
except:
info = ""
data = [name,company_name,duration,info]
writer_object.writerow(data)
File.close()