Image Site Tags #579
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Image Site Tags | |
permissions: | |
contents: write | |
on: | |
# push: | |
workflow_dispatch: | |
schedule: | |
- cron: '30 14 * * *' | |
jobs: | |
data_crawl: | |
name: Data Crawl | |
runs-on: ${{ matrix.os }} | |
strategy: | |
fail-fast: false | |
matrix: | |
resource: | |
- 'anime_pictures' | |
# - 'zerochan' # do not refresh zerochan, it limited the page to 1000 | |
- 'danbooru' | |
- 'safebooru' | |
- 'atfbooru' | |
- 'sankaku' | |
- 'rule34' | |
- 'hypnohub' | |
- 'xbooru' | |
- 'konachan' | |
- 'konachan_net' | |
- 'yande' | |
# - 'lolibooru' # do not refresh lolibooru, it has been closed | |
- 'gelbooru' | |
- 'pixiv' | |
- 'pixiv_en' | |
- 'wallhaven' | |
os: | |
- 'ubuntu-latest' | |
python-version: | |
- '3.8' | |
steps: | |
- name: Get system version for Linux | |
if: ${{ contains(matrix.os, 'ubuntu') }} | |
shell: bash | |
run: | | |
echo "OS_NAME=Linux" >> $GITHUB_ENV | |
echo "IS_WIN=" >> $GITHUB_ENV | |
echo "IS_MAC=" >> $GITHUB_ENV | |
- name: Set environment for Cpython | |
if: ${{ !contains(matrix.python-version, 'pypy') }} | |
shell: bash | |
run: | | |
echo "IS_PYPY=" >> $GITHUB_ENV | |
- name: Checkout code | |
uses: actions/checkout@v2 | |
with: | |
fetch-depth: 20 | |
submodules: 'recursive' | |
- name: Set up system dependences on Linux | |
if: ${{ env.OS_NAME == 'Linux' }} | |
shell: bash | |
run: | | |
sudo apt-get update | |
sudo apt-get install -y tree cloc wget curl make zip | |
- name: Set up python ${{ matrix.python-version }} | |
uses: actions/setup-python@v2 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: Install dependencies | |
shell: bash | |
run: | | |
python -m pip install --upgrade pip | |
pip install --upgrade flake8 setuptools wheel twine | |
pip install -r requirements.txt | |
pip install -r requirements-test.txt | |
pip install -r requirements-crawl.txt | |
- name: Test the basic environment | |
shell: bash | |
run: | | |
python -V | |
pip --version | |
pip list | |
tree . | |
- name: Prepare Data Directory | |
shell: bash | |
run: | | |
mkdir -p tags_db | |
- name: Run Tag Crawlers | |
uses: nick-fields/retry@v2 | |
continue-on-error: true | |
env: | |
CI: 'true' | |
HF_TOKEN: ${{ secrets.HF_TOKEN }} | |
SANKAKU_USERNAME: ${{ secrets.SANKAKU_USERNAME }} | |
SANKAKU_PASSWORD: ${{ secrets.SANKAKU_PASSWORD }} | |
with: | |
shell: bash | |
timeout_minutes: 180 | |
max_attempts: 5 | |
retry_on: any | |
command: | | |
python -m zoo.resources.${{ matrix.resource }} tags_export -o tags_db && \ | |
echo "CRAWL_SUCCESS=yes" >> $GITHUB_ENV | |
ls -al tags_db | |
- name: Package Data to Zip Archive | |
shell: bash | |
if: ${{ env.CRAWL_SUCCESS == 'yes' }} | |
run: | | |
cd tags_db | |
zip -r ${{ matrix.resource }}.zip * | |
cd .. | |
tree tags_db | |
- name: Upload the character databases | |
uses: actions/upload-artifact@v3 | |
if: ${{ env.CRAWL_SUCCESS == 'yes' }} | |
with: | |
name: character-database | |
path: tags_db/${{ matrix.resource }}.zip | |
data_upload: | |
name: Data Upload | |
runs-on: ${{ matrix.os }} | |
needs: | |
- data_crawl | |
strategy: | |
fail-fast: false | |
matrix: | |
os: | |
- 'ubuntu-latest' | |
python-version: | |
- '3.8' | |
steps: | |
- name: Prepare the repository | |
shell: bash | |
run: | | |
sudo apt-get install -y tree | |
pip install -U hfutils | |
- name: Download from artifact | |
uses: actions/download-artifact@v3 | |
with: | |
name: character-database | |
path: site_tags | |
- name: See what is in this path | |
shell: bash | |
run: | | |
mkdir -p site_tags | |
cd site_tags | |
for zfile in *.zip; do unzip -o $zfile; done | |
rm -rf *.zip | |
tree . | |
- name: Push models to hugging face repository | |
env: | |
HF_TOKEN: ${{ secrets.HF_TOKEN }} | |
run: | | |
hfutils whoami | |
hfutils upload -r deepghs/site_tags -i site_tags -d . -m "dev(narugo): auto sync" |