Skip to content

Data Publish

Data Publish #817

Workflow file for this run

name: Data Publish
permissions:
contents: write
on:
# push:
workflow_dispatch:
inputs:
games:
description: 'Game to Crawl'
required: true
default: 'arknights,fgo,girlsfrontline,genshin,azurlane,neuralcloud,bluearchive,pathtonowhere,nikke,starrail'
skip_crawl:
description: 'Skip Crawl (Type Anything for Yes)'
required: false
default: ''
skip_skin_crawl:
description: 'Skip Skin Crawl (Type Anything for Yes)'
required: false
default: ''
skip_tag_matches:
description: 'Skip Tag Matches (Type Anything for Yes)'
required: false
default: ''
skip_pixiv_names:
description: 'Skip Pixiv Names (Type Anything for Yes)'
required: false
default: ''
skip_pixiv_characters:
description: 'Skip Pixiv Characters (Type Anything for Yes)'
required: false
default: ''
schedule:
- cron: '30 16 * * *'
env:
PIXIV_INTERVAL: '1.7'
PIXIV_MIN_INTERVAL: '1.5'
PIXIV_SLEEP_TIME: '30'
PIXIV_SLEEP_EVERY: '70'
PIXIV_ENSURE_TIMES: '2'
jobs:
data_crawl:
name: Data Crawl
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
game:
- 'bluearchive'
- 'arknights'
- 'fgo'
- 'azurlane'
- 'girlsfrontline'
- 'genshin'
- 'neuralcloud'
- 'pathtonowhere'
- 'nikke'
- 'starrail'
os:
- 'ubuntu-latest'
python-version:
- '3.8'
steps:
- name: Get system version for Linux
if: ${{ contains(matrix.os, 'ubuntu') }}
shell: bash
run: |
echo "OS_NAME=Linux" >> $GITHUB_ENV
echo "IS_WIN=" >> $GITHUB_ENV
echo "IS_MAC=" >> $GITHUB_ENV
- name: Set environment for Cpython
if: ${{ !contains(matrix.python-version, 'pypy') }}
shell: bash
run: |
echo "IS_PYPY=" >> $GITHUB_ENV
- name: Checkout code
uses: actions/checkout@v2
with:
fetch-depth: 20
submodules: 'recursive'
- name: Set up system dependencies on Linux
if: ${{ env.OS_NAME == 'Linux' }}
shell: bash
run: |
sudo apt-get update
sudo apt-get install -y tree cloc wget curl make zip
- name: Set up python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
shell: bash
run: |
python -m pip install --upgrade pip
pip install --upgrade flake8 setuptools wheel twine
pip install -r requirements.txt
pip install -r requirements-test.txt
pip install -r requirements-crawl.txt
- name: Check if is scheduled when not workflow dispatched
if: ${{ github.event_name != 'workflow_dispatch' }}
shell: bash
run: |
echo "SCHEDULED=$(python -m zoo schedule -g ${{ matrix.game }})" >> $GITHUB_ENV
- name: Check if is scheduled when is workflow dispatched
if: ${{ github.event_name == 'workflow_dispatch' && !github.event.inputs.skip_crawl }}
shell: bash
run: |
echo "SCHEDULED=$(python -m zoo scheck -g ${{ matrix.game }} -s ${{ github.event.inputs.games }})" >> $GITHUB_ENV
- name: Check if is scheduled when is workflow dispatched and skipped
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.skip_crawl }}
shell: bash
run: |
echo "SCHEDULED=no" >> $GITHUB_ENV
- name: Test the basic environment
shell: bash
run: |
python -V
pip --version
pip list
tree .
mkdir -p ${{ matrix.game }}
- name: Run Game Crawler
if: ${{ env.SCHEDULED == 'yes' }}
uses: nick-fields/retry@v2
env:
CI: 'true'
with:
shell: bash
timeout_minutes: 180
max_attempts: 5
retry_on: any
command: |
python -m zoo.games.${{ matrix.game }} --help && \
python -m zoo.games.${{ matrix.game }} index_export -O ${{ matrix.game }}
- name: Package The Data
env:
CI: 'true'
shell: bash
run: |
mkdir -p ${{ matrix.game }}
zip -r ${{ matrix.game }}.zip ${{ matrix.game }}
- name: Upload the character databases
uses: actions/upload-artifact@v3
with:
name: character-database
path: ${{ matrix.game }}.zip
data_crawl_upload:
name: Data Crawl Upload
needs:
- data_crawl
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os:
- 'ubuntu-latest'
python-version:
- '3.8'
steps:
- name: Prepare the repository
shell: bash
run: |
sudo apt-get install -y tree
git clone https://huggingface.co/datasets/deepghs/game_characters
ls -al game_characters
- name: Download from artifact
uses: actions/download-artifact@v3
with:
name: character-database
path: game_characters
- name: See what is in this path
shell: bash
run: |
cd game_characters
for zfile in *.zip; do unzip -o $zfile; done
rm -rf *.zip
tree .
- name: Push models to hugging face repostory
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
cd game_characters
git config user.name 'narugo1992'
git config user.email '[email protected]'
git add -A
git diff-index --quiet HEAD || git commit -a -m "dev(narugo): auto sync $(date -R)"
git push https://narugo:[email protected]/datasets/deepghs/game_characters main
skin_crawl:
name: Skin Crawl
runs-on: ${{ matrix.os }}
needs:
- data_crawl_upload
strategy:
fail-fast: false
max-parallel: 1
matrix:
game:
- 'bluearchive'
- 'arknights'
- 'fgo'
- 'azurlane'
- 'girlsfrontline'
- 'genshin'
- 'neuralcloud'
- 'pathtonowhere'
- 'nikke'
- 'starrail'
os:
- 'ubuntu-latest'
python-version:
- '3.8'
steps:
- name: Get system version for Linux
if: ${{ contains(matrix.os, 'ubuntu') }}
shell: bash
run: |
echo "OS_NAME=Linux" >> $GITHUB_ENV
echo "IS_WIN=" >> $GITHUB_ENV
echo "IS_MAC=" >> $GITHUB_ENV
- name: Set environment for Cpython
if: ${{ !contains(matrix.python-version, 'pypy') }}
shell: bash
run: |
echo "IS_PYPY=" >> $GITHUB_ENV
- name: Checkout code
uses: actions/checkout@v2
with:
fetch-depth: 20
submodules: 'recursive'
- name: Set up system dependencies on Linux
if: ${{ env.OS_NAME == 'Linux' }}
shell: bash
run: |
sudo apt-get update
sudo apt-get install -y tree cloc wget curl make zip
- name: Set up python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
shell: bash
run: |
python -m pip install --upgrade pip
pip install --upgrade flake8 setuptools wheel twine
pip install -r requirements.txt
pip install -r requirements-test.txt
pip install -r requirements-crawl.txt
- name: Check if is scheduled when not workflow dispatched
if: ${{ github.event_name != 'workflow_dispatch' }}
shell: bash
run: |
echo "SCHEDULED=$(python -m zoo schedule -g ${{ matrix.game }})" >> $GITHUB_ENV
- name: Check if is scheduled when is workflow dispatched
if: ${{ github.event_name == 'workflow_dispatch' && !github.event.inputs.skip_skin_crawl }}
shell: bash
run: |
echo "SCHEDULED=$(python -m zoo scheck -g ${{ matrix.game }} -s ${{ github.event.inputs.games }})" >> $GITHUB_ENV
- name: Check if is scheduled when is workflow dispatched and skipped
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.skip_skin_crawl }}
shell: bash
run: |
echo "SCHEDULED=no" >> $GITHUB_ENV
- name: Test the basic environment
shell: bash
run: |
python -V
pip --version
pip list
tree .
- name: Run Pixiv Skins Crawler
if: ${{ env.SCHEDULED == 'yes' }}
uses: nick-fields/retry@v2
env:
CI: 'true'
HF_TOKEN: ${{ secrets.HF_TOKEN }}
with:
shell: bash
timeout_minutes: 300
max_attempts: 5
retry_on: any
command: |
python -m zoo.skins -g ${{ matrix.game }}
names_crawl:
name: Names Crawl
runs-on: ${{ matrix.os }}
needs:
- data_crawl_upload
strategy:
fail-fast: false
max-parallel: 1
matrix:
game:
- 'bluearchive'
- 'arknights'
- 'fgo'
- 'azurlane'
- 'girlsfrontline'
- 'genshin'
- 'neuralcloud'
- 'pathtonowhere'
- 'nikke'
- 'starrail'
os:
- 'ubuntu-latest'
python-version:
- '3.8'
steps:
- name: Get system version for Linux
if: ${{ contains(matrix.os, 'ubuntu') }}
shell: bash
run: |
echo "OS_NAME=Linux" >> $GITHUB_ENV
echo "IS_WIN=" >> $GITHUB_ENV
echo "IS_MAC=" >> $GITHUB_ENV
- name: Set environment for Cpython
if: ${{ !contains(matrix.python-version, 'pypy') }}
shell: bash
run: |
echo "IS_PYPY=" >> $GITHUB_ENV
- name: Checkout code
uses: actions/checkout@v2
with:
fetch-depth: 20
submodules: 'recursive'
- name: Set up system dependences on Linux
if: ${{ env.OS_NAME == 'Linux' }}
shell: bash
run: |
sudo apt-get update
sudo apt-get install -y tree cloc wget curl make zip
- name: Set up python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
shell: bash
run: |
python -m pip install --upgrade pip
pip install --upgrade flake8 setuptools wheel twine
pip install -r requirements.txt
pip install -r requirements-test.txt
pip install -r requirements-crawl.txt
- name: Check if is scheduled when not workflow dispatched
if: ${{ github.event_name != 'workflow_dispatch' }}
shell: bash
run: |
echo "SCHEDULED=$(python -m zoo schedule -g ${{ matrix.game }})" >> $GITHUB_ENV
- name: Check if is scheduled when is workflow dispatched
if: ${{ github.event_name == 'workflow_dispatch' && !github.event.inputs.skip_pixiv_names }}
shell: bash
run: |
echo "SCHEDULED=$(python -m zoo scheck -g ${{ matrix.game }} -s ${{ github.event.inputs.games }})" >> $GITHUB_ENV
- name: Check if is scheduled when is workflow dispatched and skipped
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.skip_pixiv_names }}
shell: bash
run: |
echo "SCHEDULED=no" >> $GITHUB_ENV
- name: Test the basic environment
shell: bash
run: |
python -V
pip --version
pip list
tree .
- name: Run Pixiv Names Crawler
if: ${{ env.SCHEDULED == 'yes' }}
env:
CI: 'true'
HF_TOKEN: ${{ secrets.HF_TOKEN }}
REMOTE_PIXIV_SESSION_REPO: ${{ secrets.REMOTE_PIXIV_SESSION_REPO }}
shell: bash
run: |
mkdir -p ${{ matrix.game }} &&\
python -m zoo.resources.pixiv names \
-g ${{ matrix.game }} \
-o ${{ matrix.game }}/pixiv_names.json \
--interval ${{ env.PIXIV_INTERVAL }} \
--min_interval ${{ env.PIXIV_MIN_INTERVAL }} \
--sleep_time ${{ env.PIXIV_SLEEP_TIME }} \
--sleep_every ${{ env.PIXIV_SLEEP_EVERY }} \
--ensure_times ${{ env.PIXIV_ENSURE_TIMES }}
- name: Package The Data
env:
CI: 'true'
shell: bash
run: |
mkdir -p ${{ matrix.game }}
zip -r ${{ matrix.game }}.zip ${{ matrix.game }}
- name: Upload the character databases
uses: actions/upload-artifact@v3
with:
name: character-database
path: ${{ matrix.game }}.zip
- name: Upload the specific database
uses: actions/upload-artifact@v3
with:
name: character-names-db-${{ matrix.game }}
path: ${{ matrix.game }}.zip
chars_crawl:
name: Chars Crawl
runs-on: ${{ matrix.os }}
needs:
- data_crawl_upload
- names_crawl
strategy:
fail-fast: false
max-parallel: 1
matrix:
game:
- 'bluearchive'
- 'arknights'
- 'fgo'
- 'azurlane'
- 'girlsfrontline'
- 'genshin'
- 'neuralcloud'
- 'pathtonowhere'
- 'nikke'
- 'starrail'
os:
- 'ubuntu-latest'
python-version:
- '3.8'
steps:
- name: Get system version for Linux
if: ${{ contains(matrix.os, 'ubuntu') }}
shell: bash
run: |
echo "OS_NAME=Linux" >> $GITHUB_ENV
echo "IS_WIN=" >> $GITHUB_ENV
echo "IS_MAC=" >> $GITHUB_ENV
- name: Set environment for Cpython
if: ${{ !contains(matrix.python-version, 'pypy') }}
shell: bash
run: |
echo "IS_PYPY=" >> $GITHUB_ENV
- name: Checkout code
uses: actions/checkout@v2
with:
fetch-depth: 20
submodules: 'recursive'
- name: Set up system dependences on Linux
if: ${{ env.OS_NAME == 'Linux' }}
shell: bash
run: |
sudo apt-get update
sudo apt-get install -y tree cloc wget curl make zip
- name: Set up python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
shell: bash
run: |
python -m pip install --upgrade pip
pip install --upgrade flake8 setuptools wheel twine
pip install -r requirements.txt
pip install -r requirements-test.txt
pip install -r requirements-crawl.txt
- name: Check if is scheduled when not workflow dispatched
if: ${{ github.event_name != 'workflow_dispatch' }}
shell: bash
run: |
echo "SCHEDULED=$(python -m zoo schedule -g ${{ matrix.game }})" >> $GITHUB_ENV
- name: Check if is scheduled when is workflow dispatched
if: ${{ github.event_name == 'workflow_dispatch' && !github.event.inputs.skip_pixiv_characters }}
shell: bash
run: |
echo "SCHEDULED=$(python -m zoo scheck -g ${{ matrix.game }} -s ${{ github.event.inputs.games }})" >> $GITHUB_ENV
- name: Check if is scheduled when is workflow dispatched and skipped
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.skip_pixiv_characters }}
shell: bash
run: |
echo "SCHEDULED=no" >> $GITHUB_ENV
- name: Download from artifact
uses: actions/download-artifact@v3
with:
name: character-names-db-${{ matrix.game }}
- name: Load Index file from upstream
shell: bash
run: |
ls -al ${{ matrix.game }}.zip
unzip -o ${{ matrix.game }}.zip
ls -al ${{ matrix.game }}
- name: Test the basic environment
shell: bash
run: |
python -V
pip --version
pip list
tree .
- name: Run Pixiv Characters Crawler
if: ${{ env.SCHEDULED == 'yes' }}
env:
CI: 'true'
HF_TOKEN: ${{ secrets.HF_TOKEN }}
REMOTE_PIXIV_SESSION_REPO: ${{ secrets.REMOTE_PIXIV_SESSION_REPO }}
shell: bash
run: |
mkdir -p ${{ matrix.game }} && \
python -m zoo.resources.pixiv characters \
-g ${{ matrix.game }} \
-i ${{ matrix.game }}/pixiv_names.json \
-o ${{ matrix.game }}/pixiv_characters.json \
--interval ${{ env.PIXIV_INTERVAL }} \
--min_interval ${{ env.PIXIV_MIN_INTERVAL }} \
--sleep_time ${{ env.PIXIV_SLEEP_TIME }} \
--sleep_every ${{ env.PIXIV_SLEEP_EVERY }} \
--ensure_times ${{ env.PIXIV_ENSURE_TIMES }}
- name: Package The Data
env:
CI: 'true'
shell: bash
run: |
mkdir -p ${{ matrix.game }}
zip -r ${{ matrix.game }}.zip ${{ matrix.game }}
- name: Upload the character databases
uses: actions/upload-artifact@v3
with:
name: character-database
path: ${{ matrix.game }}.zip
tag_match:
name: Tag Match
runs-on: ${{ matrix.os }}
needs:
- data_crawl_upload
strategy:
fail-fast: false
matrix:
resource:
- 'anime_pictures'
- 'zerochan'
- 'danbooru'
- 'safebooru'
- 'atfbooru'
- 'sankaku'
# - 'rule34'
- 'hypnohub'
- 'xbooru'
- 'konachan'
- 'konachan_net'
- 'yande'
- 'lolibooru'
- 'gelbooru'
- 'wallhaven'
game:
- 'bluearchive'
- 'arknights'
- 'fgo'
- 'azurlane'
- 'girlsfrontline'
- 'genshin'
- 'neuralcloud'
- 'pathtonowhere'
- 'nikke'
- 'starrail'
os:
- 'ubuntu-latest'
python-version:
- '3.8'
steps:
- name: Get system version for Linux
if: ${{ contains(matrix.os, 'ubuntu') }}
shell: bash
run: |
echo "OS_NAME=Linux" >> $GITHUB_ENV
echo "IS_WIN=" >> $GITHUB_ENV
echo "IS_MAC=" >> $GITHUB_ENV
- name: Set environment for Cpython
if: ${{ !contains(matrix.python-version, 'pypy') }}
shell: bash
run: |
echo "IS_PYPY=" >> $GITHUB_ENV
- name: Checkout code
uses: actions/checkout@v2
with:
fetch-depth: 20
submodules: 'recursive'
- name: Set up system dependences on Linux
if: ${{ env.OS_NAME == 'Linux' }}
shell: bash
run: |
sudo apt-get update
sudo apt-get install -y tree cloc wget curl make zip
- name: Set up python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
shell: bash
run: |
python -m pip install --upgrade pip
pip install --upgrade flake8 setuptools wheel twine
pip install -r requirements.txt
pip install -r requirements-test.txt
pip install -r requirements-crawl.txt
- name: Check if is scheduled when not workflow dispatched
if: ${{ github.event_name != 'workflow_dispatch' }}
shell: bash
run: |
echo "SCHEDULED=$(python -m zoo schedule -g ${{ matrix.game }})" >> $GITHUB_ENV
- name: Check if is scheduled when is workflow dispatched
if: ${{ github.event_name == 'workflow_dispatch' && !github.event.inputs.skip_tag_matches }}
shell: bash
run: |
echo "SCHEDULED=$(python -m zoo scheck -g ${{ matrix.game }} -s ${{ github.event.inputs.games }})" >> $GITHUB_ENV
- name: Check if is scheduled when is workflow dispatched and skipped
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.skip_tag_matches }}
shell: bash
run: |
echo "SCHEDULED=no" >> $GITHUB_ENV
- name: Test the basic environment
shell: bash
run: |
python -V
pip --version
pip list
tree .
- name: Run Game Crawler
uses: nick-fields/retry@v2
if: ${{ env.SCHEDULED == 'yes' }}
continue-on-error: true
env:
CI: 'true'
HF_TOKEN: ${{ secrets.HF_TOKEN }}
with:
shell: bash
timeout_minutes: 340
max_attempts: 5
retry_on: any
command: |
python -m zoo.resources.${{ matrix.resource }} chtags_export -g ${{ matrix.game }}
- name: Package The Data
env:
CI: 'true'
shell: bash
run: |
mkdir -p ${{ matrix.game }}
zip -r ${{ matrix.resource }}_${{ matrix.game }}.zip ${{ matrix.game }}
- name: Upload the character databases
uses: actions/upload-artifact@v3
with:
name: character-database
path: ${{ matrix.resource }}_${{ matrix.game }}.zip
data_upload:
name: Data Upload
runs-on: ${{ matrix.os }}
needs:
- data_crawl_upload
- names_crawl
- chars_crawl
- tag_match
strategy:
fail-fast: false
matrix:
os:
- 'ubuntu-latest'
python-version:
- '3.8'
steps:
- name: Prepare the repository
shell: bash
run: |
sudo apt-get install -y tree
git clone https://huggingface.co/datasets/deepghs/game_characters
ls -al game_characters
- name: Download from artifact
uses: actions/download-artifact@v3
with:
name: character-database
path: game_characters
- name: See what is in this path
shell: bash
run: |
cd game_characters
for zfile in *.zip; do unzip -o $zfile; done
rm -rf *.zip
tree .
- name: Push models to hugging face repostory
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
cd game_characters
git config user.name 'narugo1992'
git config user.email '[email protected]'
git add -A
git diff-index --quiet HEAD || git commit -a -m "dev(narugo): auto sync $(date -R)"
git push https://narugo:[email protected]/datasets/deepghs/game_characters main
final_complete:
name: Final Complete
runs-on: ${{ matrix.os }}
needs:
- data_upload
- skin_crawl
strategy:
fail-fast: false
matrix:
os:
- 'ubuntu-latest'
python-version:
- '3.8'
steps:
- name: Get the Numbers
run: |
echo "UPDATE_DATE=$(date +'%Y-%m-%d %H:%M %Z')" >> $GITHUB_ENV
- name: Create Update Time Badge
uses: schneegans/[email protected]
with:
auth: ${{ secrets.GIST_SECRET }}
gistID: ${{ secrets.BADGE_GIST_ID }}
filename: data_last_update.json
label: Data Updated
message: ${{ env.UPDATE_DATE }}
color: yellowgreen
- name: Repository Dispatch
uses: peter-evans/repository-dispatch@v2
with:
token: ${{ secrets.PAT }}
repository: narugo1992/games_character_ranking
event-type: data-refreshed