Skip to content

Commit

Permalink
update handler
Browse files Browse the repository at this point in the history
Signed-off-by: pandyamarut <[email protected]>
  • Loading branch information
pandyamarut committed Sep 17, 2024
1 parent 0a34ed8 commit 85815b5
Showing 1 changed file with 14 additions and 2 deletions.
16 changes: 14 additions & 2 deletions src/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,20 @@
import runpod
from typing import List
from tensorrt_llm import LLM, SamplingParams
from huggingface_hub import login

# Enable build caching
os.environ["TLLM_HLAPI_BUILD_CACHE"] = "1"
# Optionally, set a custom cache directory
# os.environ["TLLM_HLAPI_BUILD_CACHE_ROOT"] = "/path/to/custom/cache"
#HF_TOKEN for downloading models



hf_token = os.environ["HF_TOKEN"]
login(token=hf_token)



class TRTLLMWorker:
def __init__(self, model_path: str):
Expand All @@ -19,12 +28,15 @@ def generate(self, prompts: List[str], max_tokens: int = 100) -> List[str]:
results = []
for output in outputs:
results.append(output.outputs[0].text)

return results

# Initialize the worker outside the handler
# This ensures the model is loaded only once when the serverless function starts
worker = TRTLLMWorker("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
# this path is hf model "<org_name>/model_name" egs: meta-llama/Meta-Llama-3.1-8B-Instruct
model_path = os.environ["MODEL_PATH"]
worker = TRTLLMWorker(model_path)



def handler(job):
"""Handler function that will be used to process jobs."""
Expand Down

0 comments on commit 85815b5

Please sign in to comment.