Skip to content

Commit

Permalink
bug fixing + twitter plugin
Browse files Browse the repository at this point in the history
bug fixed ask search, removed ask from linkedin, twitter plugin added
  • Loading branch information
maldevel committed Apr 23, 2016
1 parent de6e297 commit a9c79ac
Show file tree
Hide file tree
Showing 11 changed files with 193 additions and 32 deletions.
21 changes: 12 additions & 9 deletions EmailHarvester.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,13 @@
colorama.init()

class myparser:
def __init__(self, results, word):

def __init__(self):
self.temp = []

def extract(self, results, word):
self.results = results
self.word = word
self.temp = []

def genericClean(self):
for e in '''<KW> </KW> </a> <b> </b> </div> <em> </em> <p> </span>
Expand Down Expand Up @@ -85,7 +88,7 @@ def __init__(self, userAgent, proxy):
self.plugins = {}
self.proxy = proxy
self.userAgent = userAgent

self.parser = myparser()
path = "plugins/"
plugins = {}

Expand All @@ -94,7 +97,7 @@ def __init__(self, userAgent, proxy):
fname, ext = os.path.splitext(f)
if ext == '.py':
mod = __import__(fname)
plugins[fname] = mod.Plugin(self)
plugins[fname] = mod.Plugin(self, {'useragent':userAgent, 'proxy':proxy})

def register_plugin(self, search_method, functions):
self.plugins[search_method] = functions
Expand All @@ -105,18 +108,18 @@ def get_plugins(self):
def show_message(self, msg):
print(green(msg))

def init_search(self, urlPattern, word, limit, counterInit, counterStep):
def init_search(self, url, word, limit, counterInit, counterStep):
self.results = ""
self.totalresults = ""
self.limit = int(limit)
self.counter = int(counterInit)
self.urlPattern = urlPattern
self.url = url
self.step = int(counterStep)
self.word = word

def do_search(self):
try:
urly = self.urlPattern.format(counter=str(self.counter), word=self.word)
urly = self.url.format(counter=str(self.counter), word=self.word)
headers = {'User-Agent': self.userAgent}
if(self.proxy):
proxies = {self.proxy.scheme: "http://" + self.proxy.netloc}
Expand All @@ -139,8 +142,8 @@ def process(self):
print("\tSearching " + str(self.counter) + " results...")

def get_emails(self):
rawres = myparser(self.totalresults, self.word)
return rawres.emails()
self.parser.extract(self.totalresults, self.word)
return self.parser.emails()

###################################################################

Expand Down
61 changes: 54 additions & 7 deletions plugins/ask.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,69 @@
For more see the file 'LICENSE' for copying permission.
"""

#config = None
import requests
import time
import sys

config = None
app_emailharvester = None


class AskSearch(object):

def __init__(self, url, word, limit):
self.results = ""
self.totalresults = ""
self.limit = int(limit)
self.page = 1
self.url = url
self.word = word
self.proxy = config["proxy"]
self.userAgent = config["useragent"]
self.counter = 0

def do_search(self):
try:
urly = self.url.format(page=str(self.page), word=self.word)
headers = {'User-Agent': self.userAgent}
if(self.proxy):
proxies = {self.proxy.scheme: "http://" + self.proxy.netloc}
r=requests.get(urly, headers=headers, proxies=proxies)
else:
r=requests.get(urly, headers=headers)

except Exception as e:
print(e)
sys.exit(4)

self.results = r.content.decode(r.encoding)
self.totalresults += self.results

def process(self):
while (self.counter < self.limit):
self.do_search()
time.sleep(1)
self.counter += 10
self.page += 1
print("\tSearching " + str(self.counter) + " results...")

def get_emails(self):
app_emailharvester.parser.extract(self.totalresults, self.word)
return app_emailharvester.parser.emails()


def search(domain, limit):
app_emailharvester.show_message("\n[+] Searching in ASK..\n")
url = "http://www.ask.com/web?q=%40{word}"
app_emailharvester.init_search(url, domain, limit, 0, 100)
app_emailharvester.process()
return app_emailharvester.get_emails()
url = "http://www.ask.com/web?q=%40{word}&page={page}"
search = AskSearch(url, domain, limit)
search.process()
return search.get_emails()


class Plugin:
def __init__(self, app):#, conf
def __init__(self, app, conf):
global app_emailharvester, config
#config = conf
config = conf
app.register_plugin('ask', {'search': search})
app_emailharvester = app

2 changes: 1 addition & 1 deletion plugins/baidu.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def search(domain, limit):


class Plugin:
def __init__(self, app):#, conf
def __init__(self, app, conf):#
global app_emailharvester, config
#config = conf
app.register_plugin('baidu', {'search': search})
Expand Down
2 changes: 1 addition & 1 deletion plugins/bing.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def search(domain, limit):


class Plugin:
def __init__(self, app):#, conf
def __init__(self, app, conf):#
global app_emailharvester, config
#config = conf
app.register_plugin('bing', {'search': search})
Expand Down
2 changes: 1 addition & 1 deletion plugins/dogpile.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def search(domain, limit):


class Plugin:
def __init__(self, app):#, conf
def __init__(self, app, conf):#
global app_emailharvester, config
#config = conf
app.register_plugin('dogpile', {'search': search})
Expand Down
2 changes: 1 addition & 1 deletion plugins/exalead.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def search(domain, limit):


class Plugin:
def __init__(self, app):#, conf
def __init__(self, app, conf):#
global app_emailharvester, config
#config = conf
app.register_plugin('exalead', {'search': search})
Expand Down
4 changes: 2 additions & 2 deletions plugins/google.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@

def search(domain, limit):
app_emailharvester.show_message("\n[+] Searching in Google..\n")
url = 'http://www.google.com/search?num=100&start={counter}&hl=en&q="%40{word}"'
url = 'https://www.google.com/search?num=100&start={counter}&hl=en&q="%40{word}"'
app_emailharvester.init_search(url, domain, limit, 0, 100)
app_emailharvester.process()
return app_emailharvester.get_emails()


class Plugin:
def __init__(self, app):#, conf
def __init__(self, app, conf):#
global app_emailharvester, config
#config = conf
app.register_plugin('google', {'search': search})
Expand Down
44 changes: 44 additions & 0 deletions plugins/googleplus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""
This file is part of EmailHarvester
Copyright (C) 2016 @maldevel
https://github.com/maldevel/EmailHarvester
EmailHarvester - A tool to retrieve Domain email addresses from Search Engines.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
For more see the file 'LICENSE' for copying permission.
"""

#config = None
app_emailharvester = None


def search(domain, limit):
app_emailharvester.show_message("\n[+] Searching in Google+..\n")
#search google+ only with google search engine
#who is gonna have google+ indexed better than google itself?
url = 'https://www.google.com/search?num=100&start={counter}&hl=en&q=site%3Aplus.google.com+intext:"Works at"+-inurl:photos+-inurl:about+-inurl:posts+-inurl:plusones+%40{word}'
app_emailharvester.init_search(url, domain, limit, 0, 100)
app_emailharvester.process()
return app_emailharvester.get_emails()


class Plugin:
def __init__(self, app, conf):#
global app_emailharvester, config
#config = conf
app.register_plugin('googleplus', {'search': search})
app_emailharvester = app

12 changes: 3 additions & 9 deletions plugins/linkedin.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,9 @@
def search(domain, limit):
all_emails = []
app_emailharvester.show_message("\n[+] Searching in Linkedin..\n")

app_emailharvester.show_message("\n[+] Searching in ASK + Linkedin..\n")
askUrl = "http://www.ask.com/web?q=site%3Alinkedin.com+%40{word}"
app_emailharvester.init_search(askUrl, domain, limit, 0, 100)
app_emailharvester.process()
all_emails += app_emailharvester.get_emails()

app_emailharvester.show_message("\n[+] Searching in Yahoo + Linkedin..\n")
yahooUrl = "http://search.yahoo.com/search?p=%40{word}&n=100&ei=UTF-8&va_vt=any&vo_vt=any&ve_vt=any&vp_vt=any&vd=all&vst=0&vf=all&vm=p&fl=0&fr=yfp-t-152&xargs=0&pstart=1&b={counter}"
yahooUrl = "http://search.yahoo.com/search?p=site%3Alinkedin.com+%40{word}&n=100&ei=UTF-8&va_vt=any&vo_vt=any&ve_vt=any&vp_vt=any&vd=all&vst=0&vf=all&vm=p&fl=0&fr=yfp-t-152&xargs=0&pstart=1&b={counter}"
app_emailharvester.init_search(yahooUrl, domain, limit, 1, 100)
app_emailharvester.process()
all_emails += app_emailharvester.get_emails()
Expand All @@ -48,7 +42,7 @@ def search(domain, limit):
all_emails += app_emailharvester.get_emails()

app_emailharvester.show_message("\n[+] Searching in Google + Linkedin..\n")
googleUrl = 'http://www.google.com/search?num=100&start={counter}&hl=en&q=site%3Alinkedin.com+"%40{word}"'
googleUrl = 'https://www.google.com/search?num=100&start={counter}&hl=en&q=site%3Alinkedin.com+"%40{word}"'
app_emailharvester.init_search(googleUrl, domain, limit, 0, 100)
app_emailharvester.process()
all_emails += app_emailharvester.get_emails()
Expand All @@ -71,7 +65,7 @@ def search(domain, limit):


class Plugin:
def __init__(self, app):#, conf
def __init__(self, app, conf):#
global app_emailharvester, config
#config = conf
app.register_plugin('linkedin', {'search': search})
Expand Down
73 changes: 73 additions & 0 deletions plugins/twitter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
"""
This file is part of EmailHarvester
Copyright (C) 2016 @maldevel
https://github.com/maldevel/EmailHarvester
EmailHarvester - A tool to retrieve Domain email addresses from Search Engines.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
For more see the file 'LICENSE' for copying permission.
"""

#config = None
app_emailharvester = None


def search(domain, limit):
all_emails = []
app_emailharvester.show_message("\n[+] Searching in Twitter..\n")

app_emailharvester.show_message("\n[+] Searching in Yahoo + Twitter..\n")
yahooUrl = 'http://search.yahoo.com/search?p=site%3Atwitter.com+intitle:"on Twitter"+%40{word}&n=100&ei=UTF-8&va_vt=any&vo_vt=any&ve_vt=any&vp_vt=any&vd=all&vst=0&vf=all&vm=p&fl=0&fr=yfp-t-152&xargs=0&pstart=1&b={counter}'
app_emailharvester.init_search(yahooUrl, domain, limit, 1, 100)
app_emailharvester.process()
all_emails += app_emailharvester.get_emails()

app_emailharvester.show_message("\n[+] Searching in Bing + Twitter..\n")
bingUrl = 'http://www.bing.com/search?q=site%3Atwitter.com+intitle:"on Twitter"+%40{word}&count=50&first={counter}'
app_emailharvester.init_search(bingUrl, domain, limit, 0, 50)
app_emailharvester.process()
all_emails += app_emailharvester.get_emails()

app_emailharvester.show_message("\n[+] Searching in Google + Twitter..\n")
googleUrl = 'https://www.google.com/search?num=100&start={counter}&hl=en&q=site%3Atwitter.com+intitle:"on Twitter"+"%40{word}"'
app_emailharvester.init_search(googleUrl, domain, limit, 0, 100)
app_emailharvester.process()
all_emails += app_emailharvester.get_emails()

app_emailharvester.show_message("\n[+] Searching in Baidu + Twitter..\n")
url = 'http://www.baidu.com/search/s?wd=site%3Atwitter.com+intitle:"on Twitter"+"%40{word}"&pn={counter}'
app_emailharvester.init_search(url, domain, limit, 0, 10)
app_emailharvester.process()
all_emails += app_emailharvester.get_emails()

app_emailharvester.show_message("\n[+] Searching in Exalead + Twitter..\n")
url = 'http://www.exalead.com/search/web/results/?q=site%3Atwitter.com+intitle:"on Twitter"+%40{word}&elements_per_page=10&start_index={counter}'
app_emailharvester.init_search(url, domain, limit, 0, 50)
app_emailharvester.process()
all_emails += app_emailharvester.get_emails()

#dogpile seems to not support site:

return all_emails


class Plugin:
def __init__(self, app, conf):#
global app_emailharvester, config
#config = conf
app.register_plugin('twitter', {'search': search})
app_emailharvester = app

2 changes: 1 addition & 1 deletion plugins/yahoo.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def search(domain, limit):


class Plugin:
def __init__(self, app):#, conf
def __init__(self, app, conf):#
global app_emailharvester, config
#config = conf
app.register_plugin('yahoo', {'search': search})
Expand Down

0 comments on commit a9c79ac

Please sign in to comment.