Skip to content

Commit

Permalink
import developer information from github
Browse files Browse the repository at this point in the history
  • Loading branch information
Trilarion committed Jan 12, 2021
1 parent 45dffe5 commit 023ca7e
Show file tree
Hide file tree
Showing 914 changed files with 55,843 additions and 9,699 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ __pycache__
/code/archive/**
/code/lgw-import
/code/html/images-download
/private.properties
59 changes: 49 additions & 10 deletions code/github_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,24 @@
gh_entries_file = os.path.join(c.code_path, 'github_entries.txt')
prefix = 'https://github.com/'

blog_alias = {'http://k776.tumblr.com/': 'https://k776.tumblr.com/', 'http://timpetricola.com': 'https://timpetricola.com',
'http:/code.schwitzer.ca': 'https://code.schwitzer.ca/', 'http:\\www.vampier.net': 'https://www.vampier.net/'}
ignored_blogs = ('https://uto.io',)

ignored_languages = ('CSS', 'HTML', 'CMake', 'XSLT', 'ShaderLab')
language_aliases = {'VBA': 'Visual Basic', 'Common Lisp': 'Lisp', 'Game Maker Language': 'Game Maker Script', 'NewLisp': 'Lisp'}

ignored_repos = ('https://github.com/jtc0de/Blitwizard.git','https://github.com/IceReaper/KKnD.git',
'https://github.com/KaidemonLP/Open-Fortress-Source.git', 'https://github.com/danielcrenna/TrueCraft.git')

name_aliases = {'Andreas Rosdal': 'Andreas Røsdal', 'davefancella': 'Dave Fancella', 'himiloshpetrov': 'Milosh Petrov',
'Jeremy Monin': 'Jeremy D. Monin', 'lennertclaeys': 'Lennert Claeys', 'malignantmanor': 'Malignant Manor',
'turulomio': 'Turulomio', '_Shaman': 'Shaman', 'alexandreSalconiDenis': 'Alexandre Salconi-Denis',
'buginator': 'Buginator', 'CiprianKhlud': 'Ciprian Khlud', 'dericpage': 'Deric Page',
'DI Murat Sari': 'Murat Sari', 'DolceTriade': 'Dolce Triade', 'DreamingPsion': 'Dreaming Psion',
'edwardlii': 'Edward Lii', 'erik-vos': 'Erik Vos', 'joevenzon': 'Joe Venzon', 'noamgat': 'Noam Gat',
'Dr. Martin Brumm': 'Martin Brumm'}


def collect_github_entries():
"""
Expand Down Expand Up @@ -36,6 +54,8 @@ def github_import():
:return:
"""
private_properties = json.loads(utils.read_text(c.private_properties_file))

files = json.loads(utils.read_text(gh_entries_file))

all_developers = osg.read_developers()
Expand All @@ -51,10 +71,14 @@ def github_import():
entry = osg.read_entry(file)
code_repositories = entry['Code repository']
repos = [x.value for x in code_repositories if x.startswith(prefix)]
repos[0] += ' @add'
repos = [x for x in repos if '@add' in x]
repos = [x.split(' ')[0] for x in repos]
repos = [x for x in repos if x not in ignored_repos]
for repo in repos:
print(' GH repo {}'.format(repo))

info = osg_github.retrieve_repo_info(repo)
info = osg_github.retrieve_repo_info(repo, private_properties['github-token'])

new_comments = []
# is archived
Expand All @@ -75,18 +99,24 @@ def github_import():

# update comment
for r in code_repositories:
if r.value == repo:
if r.value.startswith(repo):
break
comments = r.comment
if comments:
comments = comments.split(',')
comments = [c.strip() for c in comments if not c.startswith('@')]
r.comment = ', '.join(comments + new_comments)
comments = [c.strip() for c in comments]
comments = [c for c in comments if not c.startswith('@')] # delete old ones
comments += new_comments
else:
comments = new_comments
r.comment = ', '.join(comments)

# language in languages
language = info['language']
if language not in entry['Code language']:
entry['Code language'].append(language)
language = language_aliases.get(language, language)
if language and language not in entry['Code language'] and language not in ignored_languages:
entry['Code language'].append(osg_parse.ValueWithComment(language))
print(' added to languages: {}'.format(language))

# contributors
for contributor in info['contributors']:
Expand All @@ -98,7 +128,15 @@ def github_import():
name = contributor.name
if not name:
name = contributor.login
name = name_aliases.get(name, name)
nickname = '{}@GH'.format(contributor.login)
blog = contributor.blog
if blog:
blog = blog_alias[blog] if blog in blog_alias else blog
if not blog.startswith('http'):
blog = 'https://' + blog
if blog in ignored_blogs:
blog = None

# look up author in entry developers
if name not in entry.get('Developer', []):
Expand All @@ -114,13 +152,14 @@ def github_import():
if any(x.endswith('@GH') for x in dev.get('Contact', [])):
print('warning: already GH contact')
dev['Contact'] = dev.get('Contact', []) + [nickname]
if contributor.blog and contributor.blog not in dev.get('Home', []):
dev['Home'] = dev.get('Home', []) + [contributor.blog]
if blog and blog not in dev.get('Home', []):
dev['Home'] = dev.get('Home', []) + [blog]
# TODO add to games entries!
else:
print(' dev "{}" ({}) added to developer database'.format(name, nickname))
all_developers[name] = {'Name': name, 'Contact': [nickname], 'Games': [entry['Title']]}
if contributor.blog:
all_developers[name]['Home'] = [contributor.blog]
if blog:
all_developers[name]['Home'] = [blog]


entry['Code repository'] = code_repositories
Expand Down
16 changes: 10 additions & 6 deletions code/maintenance_developers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@
"""
# TODO bag of words (split, strip, lowercase) on dev names and try to detect sex and nationality
# TODO for duplicate names, create ignore list
# TODO split devs with multiple gh or sf accounts (unlikely), start with most (like name Adam) - naming convention @01 etc.
# TODO check for devs without contact after gitlab/bitbucket/..
# TODO gitlab/bitbucket import

from utils import osg_ui
from utils import osg
import time
from utils import osg, osg_ui


class DevelopersMaintainer:
Expand All @@ -30,12 +33,13 @@ def check_for_duplicates(self):
if not self.developers:
print('developers not yet loaded')
return
start_time = time.process_time()
developer_names = list(self.developers.keys())
for index, name in enumerate(developer_names):
for other_name in developer_names[index + 1:]:
if osg.name_similarity(str.casefold(name), str.casefold(other_name)) > 0.85:
print(' {} - {} is similar'.format(name, other_name))
print('duplicates checked')
print('duplicates checked (took {:.3f}s)'.format(time.process_time()-start_time))

def check_for_orphans(self):
if not self.developers:
Expand All @@ -44,7 +48,7 @@ def check_for_orphans(self):
for dev in self.developers.values():
if not dev['Games']:
print(' {} has no games'.format(dev['Name']))
print('orphanes checked')
print('orphans checked')

def check_for_missing_developers_in_entries(self):
if not self.developers:
Expand Down Expand Up @@ -81,12 +85,12 @@ def update_developers_from_entries(self):
entry_name = entry['Title']
entry_devs = entry.get('Developer', [])
for entry_dev in entry_devs:
entry_dev = entry_dev.value # ignored the comment
entry_dev = entry_dev.value # ignore a possible comment
if entry_dev in self.developers:
self.developers[entry_dev]['Games'].append(entry_name)
else:
# completely new developer
self.developers[entry_dev] = {'Name': entry_dev, 'Games': entry_name}
self.developers[entry_dev] = {'Name': entry_dev, 'Games': [entry_name]}
print('developers updated')

def read_entries(self):
Expand Down
5 changes: 5 additions & 0 deletions code/maintenance_entries.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@
Sorts the entries in the contents files of each sub folder alphabetically.
"""

# TODO check for within an entry for similar dev names
# TODO special mode (load all and safe all)
# TODO sort devs alphabetically upon save (if not done yet)
# TODO statistics on git repositories (created, stars, forks) and meaningful categories

import os
import re
import datetime
Expand Down
7 changes: 6 additions & 1 deletion code/sourceforge_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,12 @@
'Wraitii': 'Lancelot de Ferrière', 'Simzer': 'Simon Laszlo', 'armin bajramovic': 'Armin Bajramovic',
'bleu tailfly': 'bleutailfly', 'dlh': 'DLH', 'Bjorn Hansen': 'Bjørn Hansen', 'Louens Veen': 'Lourens Veen',
'linley_henzell': 'Linley Henzell', 'Patrice DUHAMEL': 'Patrice Duhamel', 'Etienne SOBOLE': 'Etienne Sobole',
'L. H. [Lubomír]': 'L. H. Lubomír'}
'L. H. [Lubomír]': 'L. H. Lubomír', 'davidjoffe': 'David Joffe', 'EugeneLoza': 'Eugene Loza',
'Kenneth Gangsto': 'Kenneth Gangstø', 'Lucas GAUTHERON': 'Lucas Gautheron', 'Per I Mathisen': 'Per Inge Mathisen',
'wrtlprnft': 'Wrzlprnft', 'daniel_santos': 'Daniel Santos', 'Dark_Sylinc': 'darksylinc',
'Don Llopis': 'Don E. Llopis', 'dwachs': 'Dwachs', 'Pierre-Loup Griffais': 'Pierre-Loup A. Griffais',
'Richard Gobeille': 'Richard C. Gobeille', 'timfelgentreff': 'Tim Felgentreff',
'Dr. Martin Brumm': 'Martin Brumm', 'Dr. Wolf-Dieter Beelitz': 'Wolf-Dieter Beelitz'}

SF_ignore_list = ('', 'Arianne Integration Bot')

Expand Down
2 changes: 1 addition & 1 deletion code/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
web_template_path = os.path.join(code_path, 'html')
web_css_path = os.path.join(web_path, 'css')


private_properties_file = os.path.join(root_path, 'private.properties')
inspirations_file = os.path.join(root_path, 'inspirations.md')
developer_file = os.path.join(root_path, 'developers.md')

Expand Down
14 changes: 10 additions & 4 deletions code/utils/osg_github.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Everything specific to the Github API (via PyGithub).
"""

from github import Github
from github import Github, GithubException


def normalize_repo_name(repo):
Expand All @@ -26,7 +26,7 @@ def repo_get_contributors(repo):
return contributors


def retrieve_repo_info(repos):
def retrieve_repo_info(repos, token=None):
"""
For a list of Github repos, retrieves repo information.
Expand All @@ -36,10 +36,16 @@ def retrieve_repo_info(repos):
if single_repo:
repos = (repos,)
result = []
g = Github()
if token:
g = Github(token)
else:
g = Github()
for repo in repos:
repo = normalize_repo_name(repo)
r = g.get_repo(repo)
try:
r = g.get_repo(repo)
except GithubException as e:
raise RuntimeError(e) # TODO what to do if repo does not exist?
e = {'archived': r.archived, 'contributors': repo_get_contributors(r), 'created': r.created_at, 'description': r.description,
'forks': r.forks_count, 'language': r.language, 'last modified': r.last_modified, 'name': r.name,
'open issues count': r.open_issues_count, 'owner': r.owner, 'stars': r.stargazers_count, 'topics': r.get_topics(), 'repo': repo}
Expand Down
Loading

0 comments on commit 023ca7e

Please sign in to comment.