Skip to content

Image Site Tags

Image Site Tags #579

Workflow file for this run

name: Image Site Tags
permissions:
contents: write
on:
# push:
workflow_dispatch:
schedule:
- cron: '30 14 * * *'
jobs:
data_crawl:
name: Data Crawl
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
resource:
- 'anime_pictures'
# - 'zerochan' # do not refresh zerochan, it limited the page to 1000
- 'danbooru'
- 'safebooru'
- 'atfbooru'
- 'sankaku'
- 'rule34'
- 'hypnohub'
- 'xbooru'
- 'konachan'
- 'konachan_net'
- 'yande'
# - 'lolibooru' # do not refresh lolibooru, it has been closed
- 'gelbooru'
- 'pixiv'
- 'pixiv_en'
- 'wallhaven'
os:
- 'ubuntu-latest'
python-version:
- '3.8'
steps:
- name: Get system version for Linux
if: ${{ contains(matrix.os, 'ubuntu') }}
shell: bash
run: |
echo "OS_NAME=Linux" >> $GITHUB_ENV
echo "IS_WIN=" >> $GITHUB_ENV
echo "IS_MAC=" >> $GITHUB_ENV
- name: Set environment for Cpython
if: ${{ !contains(matrix.python-version, 'pypy') }}
shell: bash
run: |
echo "IS_PYPY=" >> $GITHUB_ENV
- name: Checkout code
uses: actions/checkout@v2
with:
fetch-depth: 20
submodules: 'recursive'
- name: Set up system dependences on Linux
if: ${{ env.OS_NAME == 'Linux' }}
shell: bash
run: |
sudo apt-get update
sudo apt-get install -y tree cloc wget curl make zip
- name: Set up python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
shell: bash
run: |
python -m pip install --upgrade pip
pip install --upgrade flake8 setuptools wheel twine
pip install -r requirements.txt
pip install -r requirements-test.txt
pip install -r requirements-crawl.txt
- name: Test the basic environment
shell: bash
run: |
python -V
pip --version
pip list
tree .
- name: Prepare Data Directory
shell: bash
run: |
mkdir -p tags_db
- name: Run Tag Crawlers
uses: nick-fields/retry@v2
continue-on-error: true
env:
CI: 'true'
HF_TOKEN: ${{ secrets.HF_TOKEN }}
SANKAKU_USERNAME: ${{ secrets.SANKAKU_USERNAME }}
SANKAKU_PASSWORD: ${{ secrets.SANKAKU_PASSWORD }}
with:
shell: bash
timeout_minutes: 180
max_attempts: 5
retry_on: any
command: |
python -m zoo.resources.${{ matrix.resource }} tags_export -o tags_db && \
echo "CRAWL_SUCCESS=yes" >> $GITHUB_ENV
ls -al tags_db
- name: Package Data to Zip Archive
shell: bash
if: ${{ env.CRAWL_SUCCESS == 'yes' }}
run: |
cd tags_db
zip -r ${{ matrix.resource }}.zip *
cd ..
tree tags_db
- name: Upload the character databases
uses: actions/upload-artifact@v3
if: ${{ env.CRAWL_SUCCESS == 'yes' }}
with:
name: character-database
path: tags_db/${{ matrix.resource }}.zip
data_upload:
name: Data Upload
runs-on: ${{ matrix.os }}
needs:
- data_crawl
strategy:
fail-fast: false
matrix:
os:
- 'ubuntu-latest'
python-version:
- '3.8'
steps:
- name: Prepare the repository
shell: bash
run: |
sudo apt-get install -y tree
pip install -U hfutils
- name: Download from artifact
uses: actions/download-artifact@v3
with:
name: character-database
path: site_tags
- name: See what is in this path
shell: bash
run: |
mkdir -p site_tags
cd site_tags
for zfile in *.zip; do unzip -o $zfile; done
rm -rf *.zip
tree .
- name: Push models to hugging face repository
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
hfutils whoami
hfutils upload -r deepghs/site_tags -i site_tags -d . -m "dev(narugo): auto sync"