diff --git a/.bruno/LocalAI Test Requests/tts/musicgen.bru b/.bruno/LocalAI Test Requests/tts/musicgen.bru index a720b8b1c780..900173eb54dd 100644 --- a/.bruno/LocalAI Test Requests/tts/musicgen.bru +++ b/.bruno/LocalAI Test Requests/tts/musicgen.bru @@ -16,7 +16,7 @@ headers { body:json { { - "backend": "transformers-musicgen", + "backend": "transformers", "model": "facebook/musicgen-small", "input": "80s Synths playing Jazz" } diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 8fa0cca5a664..570ac569e841 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -81,10 +81,6 @@ updates: directory: "/backend/python/transformers" schedule: interval: "weekly" - - package-ecosystem: "pip" - directory: "/backend/python/transformers-musicgen" - schedule: - interval: "weekly" - package-ecosystem: "pip" directory: "/backend/python/vllm" schedule: diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index 3c2fee37aff3..eacd3ab0f568 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -153,27 +153,27 @@ jobs: make --jobs=5 --output-sync=target -C backend/python/openvoice make --jobs=5 --output-sync=target -C backend/python/openvoice test - tests-transformers-musicgen: - runs-on: ubuntu-latest - steps: - - name: Clone - uses: actions/checkout@v4 - with: - submodules: true - - name: Dependencies - run: | - sudo apt-get update - sudo apt-get install build-essential ffmpeg - # Install UV - curl -LsSf https://astral.sh/uv/install.sh | sh - sudo apt-get install -y ca-certificates cmake curl patch python3-pip - sudo apt-get install -y libopencv-dev - pip install --user --no-cache-dir grpcio-tools==1.64.1 + # tests-transformers-musicgen: + # runs-on: ubuntu-latest + # steps: + # - name: Clone + # uses: actions/checkout@v4 + # with: + # submodules: true + # - name: Dependencies + # run: | + # sudo apt-get update + # sudo apt-get install build-essential ffmpeg + # # Install UV + # curl -LsSf https://astral.sh/uv/install.sh | sh + # sudo apt-get install -y ca-certificates cmake curl patch python3-pip + # sudo apt-get install -y libopencv-dev + # pip install --user --no-cache-dir grpcio-tools==1.64.1 - - name: Test transformers-musicgen - run: | - make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen - make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test + # - name: Test transformers-musicgen + # run: | + # make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen + # make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test # tests-bark: # runs-on: ubuntu-latest diff --git a/Dockerfile b/Dockerfile index 354ef2983653..9fb07516ea13 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,7 +15,7 @@ ARG TARGETARCH ARG TARGETVARIANT ENV DEBIAN_FRONTEND=noninteractive -ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh" +ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh" RUN apt-get update && \ @@ -448,9 +448,6 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAG ; fi && \ if [[ ( "${EXTRA_BACKENDS}" =~ "diffusers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ make -C backend/python/diffusers \ - ; fi && \ - if [[ ( "${EXTRA_BACKENDS}" =~ "transformers-musicgen" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ - make -C backend/python/transformers-musicgen \ ; fi RUN if [[ ( "${EXTRA_BACKENDS}" =~ "kokoro" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ diff --git a/Makefile b/Makefile index f08d1a9c615b..03468ffb0a8a 100644 --- a/Makefile +++ b/Makefile @@ -583,10 +583,10 @@ protogen-go-clean: $(RM) bin/* .PHONY: protogen-python -protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen kokoro-protogen vllm-protogen openvoice-protogen +protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen kokoro-protogen vllm-protogen openvoice-protogen .PHONY: protogen-python-clean -protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean +protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean parler-tts-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean .PHONY: autogptq-protogen autogptq-protogen: @@ -668,14 +668,6 @@ parler-tts-protogen: parler-tts-protogen-clean: $(MAKE) -C backend/python/parler-tts protogen-clean -.PHONY: transformers-musicgen-protogen -transformers-musicgen-protogen: - $(MAKE) -C backend/python/transformers-musicgen protogen - -.PHONY: transformers-musicgen-protogen-clean -transformers-musicgen-protogen-clean: - $(MAKE) -C backend/python/transformers-musicgen protogen-clean - .PHONY: kokoro-protogen kokoro-protogen: $(MAKE) -C backend/python/kokoro protogen @@ -712,7 +704,6 @@ prepare-extra-conda-environments: protogen-python $(MAKE) -C backend/python/sentencetransformers $(MAKE) -C backend/python/rerankers $(MAKE) -C backend/python/transformers - $(MAKE) -C backend/python/transformers-musicgen $(MAKE) -C backend/python/parler-tts $(MAKE) -C backend/python/kokoro $(MAKE) -C backend/python/openvoice diff --git a/backend/python/transformers-musicgen/Makefile b/backend/python/transformers-musicgen/Makefile deleted file mode 100644 index 06badf6d1d51..000000000000 --- a/backend/python/transformers-musicgen/Makefile +++ /dev/null @@ -1,29 +0,0 @@ -.PHONY: transformers-musicgen -transformers-musicgen: protogen - bash install.sh - -.PHONY: run -run: protogen - @echo "Running transformers..." - bash run.sh - @echo "transformers run." - -.PHONY: test -test: protogen - @echo "Testing transformers..." - bash test.sh - @echo "transformers tested." - -.PHONY: protogen -protogen: backend_pb2_grpc.py backend_pb2.py - -.PHONY: protogen-clean -protogen-clean: - $(RM) backend_pb2_grpc.py backend_pb2.py - -backend_pb2_grpc.py backend_pb2.py: - python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto - -.PHONY: clean -clean: protogen-clean - rm -rf venv __pycache__ \ No newline at end of file diff --git a/backend/python/transformers-musicgen/README.md b/backend/python/transformers-musicgen/README.md deleted file mode 100644 index bf7fef840948..000000000000 --- a/backend/python/transformers-musicgen/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# Creating a separate environment for the transformers project - -``` -make transformers-musicgen -``` \ No newline at end of file diff --git a/backend/python/transformers-musicgen/backend.py b/backend/python/transformers-musicgen/backend.py deleted file mode 100644 index b9f1facf042a..000000000000 --- a/backend/python/transformers-musicgen/backend.py +++ /dev/null @@ -1,176 +0,0 @@ -#!/usr/bin/env python3 -""" -Extra gRPC server for MusicgenForConditionalGeneration models. -""" -from concurrent import futures - -import argparse -import signal -import sys -import os - -import time -import backend_pb2 -import backend_pb2_grpc - -import grpc - -from scipy.io import wavfile -from transformers import AutoProcessor, MusicgenForConditionalGeneration - -_ONE_DAY_IN_SECONDS = 60 * 60 * 24 - -# If MAX_WORKERS are specified in the environment use it, otherwise default to 1 -MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1')) - -# Implement the BackendServicer class with the service methods -class BackendServicer(backend_pb2_grpc.BackendServicer): - """ - A gRPC servicer for the backend service. - - This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding. - """ - def Health(self, request, context): - """ - A gRPC method that returns the health status of the backend service. - - Args: - request: A HealthRequest object that contains the request parameters. - context: A grpc.ServicerContext object that provides information about the RPC. - - Returns: - A Reply object that contains the health status of the backend service. - """ - return backend_pb2.Reply(message=bytes("OK", 'utf-8')) - - def LoadModel(self, request, context): - """ - A gRPC method that loads a model into memory. - - Args: - request: A LoadModelRequest object that contains the request parameters. - context: A grpc.ServicerContext object that provides information about the RPC. - - Returns: - A Result object that contains the result of the LoadModel operation. - """ - model_name = request.Model - try: - self.processor = AutoProcessor.from_pretrained(model_name) - self.model = MusicgenForConditionalGeneration.from_pretrained(model_name) - except Exception as err: - return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") - - return backend_pb2.Result(message="Model loaded successfully", success=True) - - def SoundGeneration(self, request, context): - model_name = request.model - if model_name == "": - return backend_pb2.Result(success=False, message="request.model is required") - try: - self.processor = AutoProcessor.from_pretrained(model_name) - self.model = MusicgenForConditionalGeneration.from_pretrained(model_name) - inputs = None - if request.text == "": - inputs = self.model.get_unconditional_inputs(num_samples=1) - elif request.HasField('src'): - # TODO SECURITY CODE GOES HERE LOL - # WHO KNOWS IF THIS WORKS??? - sample_rate, wsamples = wavfile.read('path_to_your_file.wav') - - if request.HasField('src_divisor'): - wsamples = wsamples[: len(wsamples) // request.src_divisor] - - inputs = self.processor( - audio=wsamples, - sampling_rate=sample_rate, - text=[request.text], - padding=True, - return_tensors="pt", - ) - else: - inputs = self.processor( - text=[request.text], - padding=True, - return_tensors="pt", - ) - - tokens = 256 - if request.HasField('duration'): - tokens = int(request.duration * 51.2) # 256 tokens = 5 seconds, therefore 51.2 tokens is one second - guidance = 3.0 - if request.HasField('temperature'): - guidance = request.temperature - dosample = True - if request.HasField('sample'): - dosample = request.sample - audio_values = self.model.generate(**inputs, do_sample=dosample, guidance_scale=guidance, max_new_tokens=tokens) - print("[transformers-musicgen] SoundGeneration generated!", file=sys.stderr) - sampling_rate = self.model.config.audio_encoder.sampling_rate - wavfile.write(request.dst, rate=sampling_rate, data=audio_values[0, 0].numpy()) - print("[transformers-musicgen] SoundGeneration saved to", request.dst, file=sys.stderr) - print("[transformers-musicgen] SoundGeneration for", file=sys.stderr) - print("[transformers-musicgen] SoundGeneration requested tokens", tokens, file=sys.stderr) - print(request, file=sys.stderr) - except Exception as err: - return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") - return backend_pb2.Result(success=True) - - -# The TTS endpoint is older, and provides fewer features, but exists for compatibility reasons - def TTS(self, request, context): - model_name = request.model - if model_name == "": - return backend_pb2.Result(success=False, message="request.model is required") - try: - self.processor = AutoProcessor.from_pretrained(model_name) - self.model = MusicgenForConditionalGeneration.from_pretrained(model_name) - inputs = self.processor( - text=[request.text], - padding=True, - return_tensors="pt", - ) - tokens = 512 # No good place to set the "length" in TTS, so use 10s as a sane default - audio_values = self.model.generate(**inputs, max_new_tokens=tokens) - print("[transformers-musicgen] TTS generated!", file=sys.stderr) - sampling_rate = self.model.config.audio_encoder.sampling_rate - write_wav(request.dst, rate=sampling_rate, data=audio_values[0, 0].numpy()) - print("[transformers-musicgen] TTS saved to", request.dst, file=sys.stderr) - print("[transformers-musicgen] TTS for", file=sys.stderr) - print(request, file=sys.stderr) - except Exception as err: - return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") - return backend_pb2.Result(success=True) - - -def serve(address): - server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)) - backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server) - server.add_insecure_port(address) - server.start() - print("[transformers-musicgen] Server started. Listening on: " + address, file=sys.stderr) - - # Define the signal handler function - def signal_handler(sig, frame): - print("[transformers-musicgen] Received termination signal. Shutting down...") - server.stop(0) - sys.exit(0) - - # Set the signal handlers for SIGINT and SIGTERM - signal.signal(signal.SIGINT, signal_handler) - signal.signal(signal.SIGTERM, signal_handler) - - try: - while True: - time.sleep(_ONE_DAY_IN_SECONDS) - except KeyboardInterrupt: - server.stop(0) - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Run the gRPC server.") - parser.add_argument( - "--addr", default="localhost:50051", help="The address to bind the server to." - ) - args = parser.parse_args() - print(f"[transformers-musicgen] startup: {args}", file=sys.stderr) - serve(args.addr) diff --git a/backend/python/transformers-musicgen/install.sh b/backend/python/transformers-musicgen/install.sh deleted file mode 100755 index 36443ef1c559..000000000000 --- a/backend/python/transformers-musicgen/install.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash -set -e - -source $(dirname $0)/../common/libbackend.sh - -# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links. -# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match. -# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index -# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index -if [ "x${BUILD_PROFILE}" == "xintel" ]; then - EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match" -fi - -installRequirements diff --git a/backend/python/transformers-musicgen/requirements-cpu.txt b/backend/python/transformers-musicgen/requirements-cpu.txt deleted file mode 100644 index 2021fc201f7e..000000000000 --- a/backend/python/transformers-musicgen/requirements-cpu.txt +++ /dev/null @@ -1,3 +0,0 @@ -transformers -accelerate -torch==2.4.1 \ No newline at end of file diff --git a/backend/python/transformers-musicgen/requirements-cublas11.txt b/backend/python/transformers-musicgen/requirements-cublas11.txt deleted file mode 100644 index cd2c9fdb668b..000000000000 --- a/backend/python/transformers-musicgen/requirements-cublas11.txt +++ /dev/null @@ -1,4 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -transformers -accelerate -torch==2.4.1+cu118 \ No newline at end of file diff --git a/backend/python/transformers-musicgen/requirements-cublas12.txt b/backend/python/transformers-musicgen/requirements-cublas12.txt deleted file mode 100644 index 2021fc201f7e..000000000000 --- a/backend/python/transformers-musicgen/requirements-cublas12.txt +++ /dev/null @@ -1,3 +0,0 @@ -transformers -accelerate -torch==2.4.1 \ No newline at end of file diff --git a/backend/python/transformers-musicgen/requirements-hipblas.txt b/backend/python/transformers-musicgen/requirements-hipblas.txt deleted file mode 100644 index 122b20326817..000000000000 --- a/backend/python/transformers-musicgen/requirements-hipblas.txt +++ /dev/null @@ -1,4 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.0 -transformers -accelerate -torch==2.4.1+rocm6.0 \ No newline at end of file diff --git a/backend/python/transformers-musicgen/requirements-intel.txt b/backend/python/transformers-musicgen/requirements-intel.txt deleted file mode 100644 index ac2feb420af6..000000000000 --- a/backend/python/transformers-musicgen/requirements-intel.txt +++ /dev/null @@ -1,8 +0,0 @@ ---extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch==2.3.110+xpu -transformers -oneccl_bind_pt==2.3.100+xpu -accelerate -torch==2.3.1+cxx11.abi -optimum[openvino] -setuptools \ No newline at end of file diff --git a/backend/python/transformers-musicgen/requirements.txt b/backend/python/transformers-musicgen/requirements.txt deleted file mode 100644 index f58e1e80d272..000000000000 --- a/backend/python/transformers-musicgen/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -grpcio==1.69.0 -protobuf -scipy==1.14.0 -certifi \ No newline at end of file diff --git a/backend/python/transformers-musicgen/run.sh b/backend/python/transformers-musicgen/run.sh deleted file mode 100755 index 375c07e5f426..000000000000 --- a/backend/python/transformers-musicgen/run.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -source $(dirname $0)/../common/libbackend.sh - -startBackend $@ \ No newline at end of file diff --git a/backend/python/transformers-musicgen/test.py b/backend/python/transformers-musicgen/test.py deleted file mode 100644 index 295de65eb2b7..000000000000 --- a/backend/python/transformers-musicgen/test.py +++ /dev/null @@ -1,100 +0,0 @@ -""" -A test script to test the gRPC service -""" -import unittest -import subprocess -import time -import backend_pb2 -import backend_pb2_grpc - -import grpc - - -class TestBackendServicer(unittest.TestCase): - """ - TestBackendServicer is the class that tests the gRPC service - """ - def setUp(self): - """ - This method sets up the gRPC service by starting the server - """ - self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"]) - time.sleep(10) - - def tearDown(self) -> None: - """ - This method tears down the gRPC service by terminating the server - """ - self.service.terminate() - self.service.wait() - - def test_server_startup(self): - """ - This method tests if the server starts up successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.Health(backend_pb2.HealthMessage()) - self.assertEqual(response.message, b'OK') - except Exception as err: - print(err) - self.fail("Server failed to start") - finally: - self.tearDown() - - def test_load_model(self): - """ - This method tests if the model is loaded successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small")) - self.assertTrue(response.success) - self.assertEqual(response.message, "Model loaded successfully") - except Exception as err: - print(err) - self.fail("LoadModel service failed") - finally: - self.tearDown() - - def test_tts(self): - """ - This method tests if TTS is generated successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small")) - self.assertTrue(response.success) - tts_request = backend_pb2.TTSRequest(text="80s TV news production music hit for tonight's biggest story") - tts_response = stub.TTS(tts_request) - self.assertIsNotNone(tts_response) - except Exception as err: - print(err) - self.fail("TTS service failed") - finally: - self.tearDown() - - def test_sound_generation(self): - """ - This method tests if SoundGeneration is generated successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small")) - self.assertTrue(response.success) - sg_request = backend_pb2.SoundGenerationRequest(text="80s TV news production music hit for tonight's biggest story") - sg_response = stub.SoundGeneration(sg_request) - self.assertIsNotNone(sg_response) - except Exception as err: - print(err) - self.fail("SoundGeneration service failed") - finally: - self.tearDown() \ No newline at end of file diff --git a/backend/python/transformers-musicgen/test.sh b/backend/python/transformers-musicgen/test.sh deleted file mode 100755 index 6940b0661df2..000000000000 --- a/backend/python/transformers-musicgen/test.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e - -source $(dirname $0)/../common/libbackend.sh - -runUnittests diff --git a/backend/python/transformers/backend.py b/backend/python/transformers/backend.py index 2075012ed5cf..3f6838ad1d79 100644 --- a/backend/python/transformers/backend.py +++ b/backend/python/transformers/backend.py @@ -22,6 +22,8 @@ XPU=os.environ.get("XPU", "0") == "1" from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria +from transformers import AutoProcessor, MusicgenForConditionalGeneration +from scipy.io import wavfile _ONE_DAY_IN_SECONDS = 60 * 60 * 24 @@ -191,6 +193,9 @@ def LoadModel(self, request, context): export=True, device=device_map) self.OV = True + elif request.Type == "MusicgenForConditionalGeneration": + self.processor = AutoProcessor.from_pretrained(model_name) + self.model = MusicgenForConditionalGeneration.from_pretrained(model_name) else: print("Automodel", file=sys.stderr) self.model = AutoModel.from_pretrained(model_name, @@ -201,19 +206,22 @@ def LoadModel(self, request, context): torch_dtype=compute) if request.ContextSize > 0: self.max_tokens = request.ContextSize - else: + elif request.Type != "MusicgenForConditionalGeneration": self.max_tokens = self.model.config.max_position_embeddings + else: + self.max_tokens = 512 - self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True) - self.XPU = False - - if XPU and self.OV == False: - self.XPU = True - try: - print("Optimizing model", model_name, "to XPU.", file=sys.stderr) - self.model = ipex.optimize_transformers(self.model, inplace=True, dtype=torch.float16, device="xpu") - except Exception as err: - print("Not using XPU:", err, file=sys.stderr) + if request.Type != "MusicgenForConditionalGeneration": + self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True) + self.XPU = False + + if XPU and self.OV == False: + self.XPU = True + try: + print("Optimizing model", model_name, "to XPU.", file=sys.stderr) + self.model = ipex.optimize_transformers(self.model, inplace=True, dtype=torch.float16, device="xpu") + except Exception as err: + print("Not using XPU:", err, file=sys.stderr) except Exception as err: print("Error:", err, file=sys.stderr) @@ -380,6 +388,93 @@ async def PredictStream(self, request, context): finally: await iterations.aclose() + def SoundGeneration(self, request, context): + model_name = request.model + try: + if self.processor is None: + if model_name == "": + return backend_pb2.Result(success=False, message="request.model is required") + self.processor = AutoProcessor.from_pretrained(model_name) + if self.model is None: + if model_name == "": + return backend_pb2.Result(success=False, message="request.model is required") + self.model = MusicgenForConditionalGeneration.from_pretrained(model_name) + inputs = None + if request.text == "": + inputs = self.model.get_unconditional_inputs(num_samples=1) + elif request.HasField('src'): + # TODO SECURITY CODE GOES HERE LOL + # WHO KNOWS IF THIS WORKS??? + sample_rate, wsamples = wavfile.read('path_to_your_file.wav') + + if request.HasField('src_divisor'): + wsamples = wsamples[: len(wsamples) // request.src_divisor] + + inputs = self.processor( + audio=wsamples, + sampling_rate=sample_rate, + text=[request.text], + padding=True, + return_tensors="pt", + ) + else: + inputs = self.processor( + text=[request.text], + padding=True, + return_tensors="pt", + ) + + tokens = 256 + if request.HasField('duration'): + tokens = int(request.duration * 51.2) # 256 tokens = 5 seconds, therefore 51.2 tokens is one second + guidance = 3.0 + if request.HasField('temperature'): + guidance = request.temperature + dosample = True + if request.HasField('sample'): + dosample = request.sample + audio_values = self.model.generate(**inputs, do_sample=dosample, guidance_scale=guidance, max_new_tokens=tokens) + print("[transformers-musicgen] SoundGeneration generated!", file=sys.stderr) + sampling_rate = self.model.config.audio_encoder.sampling_rate + wavfile.write(request.dst, rate=sampling_rate, data=audio_values[0, 0].numpy()) + print("[transformers-musicgen] SoundGeneration saved to", request.dst, file=sys.stderr) + print("[transformers-musicgen] SoundGeneration for", file=sys.stderr) + print("[transformers-musicgen] SoundGeneration requested tokens", tokens, file=sys.stderr) + print(request, file=sys.stderr) + except Exception as err: + return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") + return backend_pb2.Result(success=True) + + +# The TTS endpoint is older, and provides fewer features, but exists for compatibility reasons + def TTS(self, request, context): + model_name = request.model + try: + if self.processor is None: + if model_name == "": + return backend_pb2.Result(success=False, message="request.model is required") + self.processor = AutoProcessor.from_pretrained(model_name) + if self.model is None: + if model_name == "": + return backend_pb2.Result(success=False, message="request.model is required") + self.model = MusicgenForConditionalGeneration.from_pretrained(model_name) + inputs = self.processor( + text=[request.text], + padding=True, + return_tensors="pt", + ) + tokens = 512 # No good place to set the "length" in TTS, so use 10s as a sane default + audio_values = self.model.generate(**inputs, max_new_tokens=tokens) + print("[transformers-musicgen] TTS generated!", file=sys.stderr) + sampling_rate = self.model.config.audio_encoder.sampling_rate + wavfile.write(request.dst, rate=sampling_rate, data=audio_values[0, 0].numpy()) + print("[transformers-musicgen] TTS saved to", request.dst, file=sys.stderr) + print("[transformers-musicgen] TTS for", file=sys.stderr) + print(request, file=sys.stderr) + except Exception as err: + return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") + return backend_pb2.Result(success=True) + async def serve(address): # Start asyncio gRPC server server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)) diff --git a/backend/python/transformers/requirements.txt b/backend/python/transformers/requirements.txt index a1eea776835c..262dd17af74a 100644 --- a/backend/python/transformers/requirements.txt +++ b/backend/python/transformers/requirements.txt @@ -1,4 +1,5 @@ grpcio==1.69.0 protobuf certifi -setuptools \ No newline at end of file +setuptools +scipy==1.14.0 \ No newline at end of file diff --git a/backend/python/transformers/test.py b/backend/python/transformers/test.py index aab3c05e3574..305b0a938c52 100644 --- a/backend/python/transformers/test.py +++ b/backend/python/transformers/test.py @@ -19,6 +19,7 @@ def setUp(self): This method sets up the gRPC service by starting the server """ self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"]) + time.sleep(10) def tearDown(self) -> None: """ @@ -31,7 +32,6 @@ def test_server_startup(self): """ This method tests if the server starts up successfully """ - time.sleep(10) try: self.setUp() with grpc.insecure_channel("localhost:50051") as channel: @@ -48,7 +48,6 @@ def test_load_model(self): """ This method tests if the model is loaded successfully """ - time.sleep(10) try: self.setUp() with grpc.insecure_channel("localhost:50051") as channel: @@ -66,7 +65,6 @@ def test_embedding(self): """ This method tests if the embeddings are generated successfully """ - time.sleep(10) try: self.setUp() with grpc.insecure_channel("localhost:50051") as channel: @@ -80,5 +78,60 @@ def test_embedding(self): except Exception as err: print(err) self.fail("Embedding service failed") + finally: + self.tearDown() + + def test_audio_load_model(self): + """ + This method tests if the model is loaded successfully + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small",Type="MusicgenForConditionalGeneration")) + self.assertTrue(response.success) + self.assertEqual(response.message, "Model loaded successfully") + except Exception as err: + print(err) + self.fail("LoadModel service failed") + finally: + self.tearDown() + + def test_tts(self): + """ + This method tests if TTS is generated successfully + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small",Type="MusicgenForConditionalGeneration")) + self.assertTrue(response.success) + tts_request = backend_pb2.TTSRequest(text="80s TV news production music hit for tonight's biggest story") + tts_response = stub.TTS(tts_request) + self.assertIsNotNone(tts_response) + except Exception as err: + print(err) + self.fail("TTS service failed") + finally: + self.tearDown() + + def test_sound_generation(self): + """ + This method tests if SoundGeneration is generated successfully + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.LoadModel(backend_pb2.ModelOptions(Model="facebook/musicgen-small",Type="MusicgenForConditionalGeneration")) + self.assertTrue(response.success) + sg_request = backend_pb2.SoundGenerationRequest(text="80s TV news production music hit for tonight's biggest story") + sg_response = stub.SoundGeneration(sg_request) + self.assertIsNotNone(sg_response) + except Exception as err: + print(err) + self.fail("SoundGeneration service failed") finally: self.tearDown() \ No newline at end of file