[Excutorch][Llama] Decouple input sequence length from kv cache context length #7556
trunk.yml
on: pull_request
gather-models
6s
test-arm-backend-delegation
/
linux-job
20m 40s
test-arm-reference-delegation
/
linux-job
26m 43s
test-coreml-delegate
/
macos-job
51m 8s
test-apple-model
/
job
Matrix: test-custom-ops-macos
Matrix: test-demo-backend-delegation
Matrix: test-huggingface-transformers
Matrix: test-llama-runner-mac
Matrix: test-llama-runner-qnn-linux
Matrix: test-pybind-build-macos
Matrix: test-qnn-model
Matrix: test-selective-build-macos
Matrix: test-models-macos