Skip to content

Commit

Permalink
Merge pull request #113 from UPPMAX/main
Browse files Browse the repository at this point in the history
Add Continuous Integration script to reproduce error `assert q.is_cuda and k.is_cuda and v.is_cuda`
  • Loading branch information
Zhihan1996 authored Dec 14, 2024
2 parents c0e55cc + 105bb1c commit 63a0e88
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 0 deletions.
24 changes: 24 additions & 0 deletions .github/workflows/check_build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Checks if the build works,
# by installing the requirements
# and then running the example code

name: Check build

on:
push:
branches:
- main
jobs:
check_build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: 3.8

- name: install required packages
run: python3 -m pip install -r requirements.txt

- name: run example code shown in README
run: python3 .github/workflows/example_huggingface_v4_28.py
16 changes: 16 additions & 0 deletions .github/workflows/example_huggingface_newer_than_v4_28.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from transformers.models.bert.configuration_bert import BertConfig

config = BertConfig.from_pretrained("zhihan1996/DNABERT-2-117M")
model = AutoModel.from_pretrained("zhihan1996/DNABERT-2-117M", trust_remote_code=True, config=config)

dna = "ACGTAGCATCGGATCTATCTATCGACACTTGGTTATCGATCTACGAGCATCTCGTTAGC"
inputs = tokenizer(dna, return_tensors = 'pt')["input_ids"]
hidden_states = model(inputs)[0] # [1, sequence_length, 768]

# embedding with mean pooling
embedding_mean = torch.mean(hidden_states[0], dim=0)
print(embedding_mean.shape) # expect to be 768

# embedding with max pooling
embedding_max = torch.max(hidden_states[0], dim=0)[0]
print(embedding_max.shape) # expect to be 768
17 changes: 17 additions & 0 deletions .github/workflows/example_huggingface_v4_28.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import torch
from transformers import AutoTokenizer, AutoModel

tokenizer = AutoTokenizer.from_pretrained("zhihan1996/DNABERT-2-117M", trust_remote_code=True)
model = AutoModel.from_pretrained("zhihan1996/DNABERT-2-117M", trust_remote_code=True)

dna = "ACGTAGCATCGGATCTATCTATCGACACTTGGTTATCGATCTACGAGCATCTCGTTAGC"
inputs = tokenizer(dna, return_tensors = 'pt')["input_ids"]
hidden_states = model(inputs)[0] # [1, sequence_length, 768]

# embedding with mean pooling
embedding_mean = torch.mean(hidden_states[0], dim=0)
print(embedding_mean.shape) # expect to be 768

# embedding with max pooling
embedding_max = torch.max(hidden_states[0], dim=0)[0]
print(embedding_max.shape) # expect to be 768

0 comments on commit 63a0e88

Please sign in to comment.