Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PEP8 fixes for Gensim #983

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 25 additions & 7 deletions ez_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,19 +32,23 @@
DEFAULT_VERSION = "1.3.2"
DEFAULT_URL = "https://pypi.python.org/packages/source/s/setuptools/"


def _python_cmd(*args):
args = (sys.executable,) + args
return subprocess.call(args) == 0


def _check_call_py24(cmd, *args, **kwargs):
res = subprocess.call(cmd, *args, **kwargs)

class CalledProcessError(Exception):
pass
if not res == 0:
msg = "Command '%s' return non-zero exit status %d" % (cmd, res)
raise CalledProcessError(msg)
vars(subprocess).setdefault('check_call', _check_call_py24)


def _install(tarball, install_args=()):
# extracting the tarball
tmpdir = tempfile.mkdtemp()
Expand Down Expand Up @@ -137,11 +141,11 @@ def use_setuptools(version=DEFAULT_VERSION, download_base=DEFAULT_URL,
e = sys.exc_info()[1]
if was_imported:
sys.stderr.write(
"The required version of setuptools (>=%s) is not available,\n"
"and can't be installed while this script is running. Please\n"
"install a more recent version first, using\n"
"'easy_install -U setuptools'."
"\n\n(Currently using %r)\n" % (version, e.args[0]))
"The required version of setuptools (>=%s) is not available,\n"
"and can't be installed while this script is running. Please\n"
"install a more recent version first, using\n"
"'easy_install -U setuptools'."
"\n\n(Currently using %r)\n" % (version, e.args[0]))
sys.exit(2)
else:
del pkg_resources, sys.modules['pkg_resources'] # reload ok
Expand All @@ -151,6 +155,7 @@ def use_setuptools(version=DEFAULT_VERSION, download_base=DEFAULT_URL,
return _do_download(version, download_base, to_dir,
download_delay)


def _clean_check(cmd, target):
"""
Run the command to download target. If the command fails, clean up before
Expand All @@ -163,6 +168,7 @@ def _clean_check(cmd, target):
os.unlink(target)
raise


def download_file_powershell(url, target):
"""
Download the file at url to target using Powershell (which will validate
Expand All @@ -172,10 +178,12 @@ def download_file_powershell(url, target):
cmd = [
'powershell',
'-Command',
"(new-object System.Net.WebClient).DownloadFile(%(url)r, %(target)r)" % vars(),
"(new-object System.Net.WebClient).DownloadFile(%(url)r, %(target)r)" %
vars(),
]
_clean_check(cmd, target)


def has_powershell():
if platform.system() != 'Windows':
return False
Expand All @@ -192,10 +200,12 @@ def has_powershell():

download_file_powershell.viable = has_powershell


def download_file_curl(url, target):
cmd = ['curl', url, '--silent', '--output', target]
_clean_check(cmd, target)


def has_curl():
cmd = ['curl', '--version']
devnull = open(os.path.devnull, 'wb')
Expand All @@ -210,10 +220,12 @@ def has_curl():

download_file_curl.viable = has_curl


def download_file_wget(url, target):
cmd = ['wget', url, '--quiet', '--output-document', target]
_clean_check(cmd, target)


def has_wget():
cmd = ['wget', '--version']
devnull = open(os.path.devnull, 'wb')
Expand All @@ -228,6 +240,7 @@ def has_wget():

download_file_wget.viable = has_wget


def download_file_insecure(url, target):
"""
Use Python to download the file, even though it cannot authenticate the
Expand All @@ -253,6 +266,7 @@ def download_file_insecure(url, target):

download_file_insecure.viable = lambda: True


def get_best_downloader():
downloaders = [
download_file_powershell,
Expand All @@ -265,6 +279,7 @@ def get_best_downloader():
if dl.viable():
return dl


def download_setuptools(version=DEFAULT_VERSION, download_base=DEFAULT_URL,
to_dir=os.curdir, delay=15,
downloader_factory=get_best_downloader):
Expand Down Expand Up @@ -350,6 +365,7 @@ def _build_install_args(options):
install_args.append('--user')
return install_args


def _parse_args():
"""
Parse the command line for options
Expand All @@ -371,10 +387,12 @@ def _parse_args():
# positional arguments are ignored
return options


def main(version=DEFAULT_VERSION):
"""Install or upgrade setuptools and EasyInstall"""
options = _parse_args()
tarball = download_setuptools(download_base=options.download_base,
tarball = download_setuptools(
download_base=options.download_base,
downloader_factory=options.downloader_factory)
return _install(tarball, _build_install_args(options))

Expand Down
8 changes: 5 additions & 3 deletions gensim/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,18 @@
import logging

try:
__version__ = __import__('pkg_resources').get_distribution('gensim').version
__version__ = __import__(
'pkg_resources').get_distribution('gensim').version
except:
__version__ = '?'


class NullHandler(logging.Handler):
"""For python versions <= 2.6; same as `logging.NullHandler` in 2.7."""

def emit(self, record):
pass

logger = logging.getLogger('gensim')
if len(logger.handlers) == 0: # To ensure reload() doesn't add another one
logger.addHandler(NullHandler())
if len(logger.handlers) == 0: # To ensure reload() doesn't add another one
logger.addHandler(NullHandler())
3 changes: 2 additions & 1 deletion gensim/corpora/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
"""

# bring corpus classes directly into package namespace, to save some typing
from .indexedcorpus import IndexedCorpus # must appear before the other classes
# must appear before the other classes
from .indexedcorpus import IndexedCorpus

from .mmcorpus import MmCorpus
from .bleicorpus import BleiCorpus
Expand Down
36 changes: 25 additions & 11 deletions gensim/corpora/bleicorpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,11 @@ def __init__(self, fname, fname_vocab=None):
fname_base, _ = path.splitext(fname)
fname_dir = path.dirname(fname)
for fname_vocab in [
utils.smart_extension(fname, '.vocab'),
utils.smart_extension(fname, '/vocab.txt'),
utils.smart_extension(fname_base, '.vocab'),
utils.smart_extension(fname_dir, '/vocab.txt'),
]:
utils.smart_extension(fname, '.vocab'),
utils.smart_extension(fname, '/vocab.txt'),
utils.smart_extension(fname_base, '.vocab'),
utils.smart_extension(fname_dir, '/vocab.txt'),
]:
if path.exists(fname_vocab):
break
else:
Expand All @@ -79,7 +79,9 @@ def __iter__(self):
def line2doc(self, line):
parts = utils.to_unicode(line).split()
if int(parts[0]) != len(parts) - 1:
raise ValueError("invalid format in %s: %s" % (self.fname, repr(line)))
raise ValueError(
"invalid format in %s: %s" %
(self.fname, repr(line)))
doc = [part.rsplit(':', 1) for part in parts[1:]]
doc = [(int(p1), float(p2)) for p1, p2 in doc]
return doc
Expand All @@ -96,7 +98,8 @@ def save_corpus(fname, corpus, id2word=None, metadata=False):
call it directly, call `serialize` instead.
"""
if id2word is None:
logger.info("no word id mapping provided; initializing from corpus")
logger.info(
"no word id mapping provided; initializing from corpus")
id2word = utils.dict_from_corpus(corpus)
num_terms = len(id2word)
else:
Expand All @@ -109,14 +112,25 @@ def save_corpus(fname, corpus, id2word=None, metadata=False):
doc = list(doc)
offsets.append(fout.tell())
parts = ["%i:%g" % p for p in doc if abs(p[1]) > 1e-7]
fout.write(utils.to_utf8("%i %s\n" % (len(doc), ' '.join(parts))))
fout.write(
utils.to_utf8(
"%i %s\n" %
(len(doc), ' '.join(parts))))

# write out vocabulary, in a format compatible with Blei's topics.py script
# write out vocabulary, in a format compatible with Blei's topics.py
# script
fname_vocab = utils.smart_extension(fname, '.vocab')
logger.info("saving vocabulary of %i words to %s" % (num_terms, fname_vocab))
logger.info(
"saving vocabulary of %i words to %s" %
(num_terms, fname_vocab))
with utils.smart_open(fname_vocab, 'wb') as fout:
for featureid in xrange(num_terms):
fout.write(utils.to_utf8("%s\n" % id2word.get(featureid, '---')))
fout.write(
utils.to_utf8(
"%s\n" %
id2word.get(
featureid,
'---')))

return offsets

Expand Down
3 changes: 2 additions & 1 deletion gensim/corpora/csvcorpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ def __init__(self, fname, labels):
head = ''.join(itertools.islice(utils.smart_open(self.fname), 5))
self.headers = csv.Sniffer().has_header(head)
self.dialect = csv.Sniffer().sniff(head)
logger.info("sniffed CSV delimiter=%r, headers=%s" % (self.dialect.delimiter, self.headers))
logger.info("sniffed CSV delimiter=%r, headers=%s" %
(self.dialect.delimiter, self.headers))

def __iter__(self):
"""
Expand Down
Loading