diff --git a/cookbooks/Gradio/huggingface.aiconfig.json b/cookbooks/Gradio/huggingface.aiconfig.json index 35567a0cd..073c925b5 100644 --- a/cookbooks/Gradio/huggingface.aiconfig.json +++ b/cookbooks/Gradio/huggingface.aiconfig.json @@ -4,17 +4,23 @@ "metadata": { "parameters": {}, "models": { - "TextGeneration": { - "model": "stevhliu/my_awesome_billsum_model", - "min_length": 10, - "max_length": 30 + "AudioSpeechRecognition": { + "model": "openai/whisper-small" }, - "ImageToText": { + "Image2Text": { "model": "Salesforce/blip-image-captioning-base" }, "Text2Speech": { "model": "suno/bark" }, + "Text2Image": { + "model": "runwayml/stable-diffusion-v1-5" + }, + "TextGeneration": { + "model": "stevhliu/my_awesome_billsum_model", + "min_length": 10, + "max_length": 30 + }, "TextSummarization": { "model": "facebook/bart-large-cnn" }, @@ -24,16 +30,13 @@ }, "default_model": "TextGeneration", "model_parsers": { + "AudioSpeechRecognition": "HuggingFaceAutomaticSpeechRecognitionTransformer", "Image2Text": "HuggingFaceImage2TextTransformer", - "Salesforce/blip-image-captioning-base": "HuggingFaceImage2TextTransformer", "Text2Speech": "HuggingFaceText2SpeechTransformer", - "suno/bark": "HuggingFaceText2SpeechTransformer", + "Text2Image": "HuggingFaceText2ImageTransformer", "TextGeneration": "HuggingFaceTextGenerationTransformer", - // "stevhliu/my_awesome_billsum_model": "HuggingFaceTextGenerationTransformer", "TextSummarization": "HuggingFaceTextSummarizationTransformer", - "facebook/bart-large-cnn": "HuggingFaceTextSummarizationTransformer", - "TextTranslation": "HuggingFaceTextTranslationTransformer", - "translation_en_to_fr": "HuggingFaceTextTranslationTransformer" + "TextTranslation": "HuggingFaceTextTranslationTransformer" } }, "description": "The Tale of the Quick Brown Fox", @@ -52,22 +55,14 @@ "parameters": { "city": "New York" } - }, - "outputs": [ - { - "output_type": "execute_result", - "execution_count": 0, - "data": " a sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden sudden", - "metadata": {} - } - ] + } }, { "name": "translate_instruction", "input": "Tell the tale of {{topic}}", "metadata": { "model": { - "name": "TextGeneration", + "name": "TextTranslation", "settings": { "min_length": "", "max_new_tokens": 100 @@ -76,8 +71,7 @@ "parameters": { "topic": "the quick brown fox" } - }, - "outputs": [] + } }, { "name": "summarize_story", @@ -88,8 +82,7 @@ "settings": {} }, "parameters": {} - }, - "outputs": [] + } }, { "name": "generate_audio_title", @@ -97,8 +90,7 @@ "metadata": { "model": "Text2Speech", "parameters": {} - }, - "outputs": [] + } }, { "name": "generate_caption", @@ -113,8 +105,7 @@ "metadata": { "model": "Image2Text", "parameters": {} - }, - "outputs": [] + } }, { "name": "openai_gen_itinerary", @@ -124,8 +115,7 @@ "parameters": { "order_by": "geographic location" } - }, - "outputs": [] + } }, { "name": "Audio Speech Recognition", @@ -138,10 +128,9 @@ ] }, "metadata": { - "model": "openai/whisper-small", + "model": "AudioSpeechRecognition", "parameters": {} - }, - "outputs": [] + } } ], "$schema": "https://json.schemastore.org/aiconfig-1.0" diff --git a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/automatic_speech_recognition.py b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/automatic_speech_recognition.py index c935e333b..5319edc97 100644 --- a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/automatic_speech_recognition.py +++ b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/automatic_speech_recognition.py @@ -94,7 +94,12 @@ async def run_inference(self, prompt: Prompt, aiconfig: "AIConfigRuntime", optio model_settings = self.get_model_settings(prompt, aiconfig) [pipeline_creation_data, _] = refine_pipeline_creation_params(model_settings) - model_name = aiconfig.get_model_name(prompt) + + model_name: str = aiconfig.get_model_name(prompt) + # TODO: Clean this up after we allow people in the AIConfig UI to specify their + # own model name for HuggingFace tasks. This isn't great but it works for now + if (model_name == "TextTranslation"): + model_name = self._get_default_model_name() if isinstance(model_name, str) and model_name not in self.pipelines: device = self._get_device() @@ -139,6 +144,9 @@ def get_output_text( if isinstance(output_data, str): return output_data return "" + + def _get_default_model_name(self) -> str: + return "openai/whisper-small" def validate_attachment_type_is_audio(attachment: Attachment): diff --git a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_2_image.py b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_2_image.py index f61b1f8b8..8f33cd79d 100644 --- a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_2_image.py +++ b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_2_image.py @@ -289,6 +289,11 @@ async def run_inference(self, prompt: Prompt, aiconfig: "AIConfigRuntime", optio print(pipeline_building_disclaimer_message) model_name: str = aiconfig.get_model_name(prompt) + # TODO: Clean this up after we allow people in the AIConfig UI to specify their + # own model name for HuggingFace tasks. This isn't great but it works for now + if (model_name == "Text2Image"): + model_name = self._get_default_model_name() + # TODO (rossdanlm): Figure out a way to save model and re-use checkpoint # Otherwise right now a lot of these models are taking 5 mins to load with 50 # num_inference_steps (default value). See here for more details: @@ -364,6 +369,9 @@ def _get_device(self) -> str: return "mps" return "cpu" + def _get_default_model_name(self) -> str: + return "runwayml/stable-diffusion-v1-5" + def _refine_responses( response_images: List[Image.Image], nsfw_content_detected: List[bool], diff --git a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_2_speech.py b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_2_speech.py index 85dee4add..2e6bf5187 100644 --- a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_2_speech.py +++ b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_2_speech.py @@ -192,6 +192,11 @@ async def run_inference(self, prompt: Prompt, aiconfig: "AIConfigRuntime", optio [pipeline_creation_data, _] = refine_pipeline_creation_params(model_settings) model_name: str = aiconfig.get_model_name(prompt) + # TODO: Clean this up after we allow people in the AIConfig UI to specify their + # own model name for HuggingFace tasks. This isn't great but it works for now + if (model_name == "Text2Speech"): + model_name = self._get_default_model_name() + if isinstance(model_name, str) and model_name not in self.synthesizers: self.synthesizers[model_name] = pipeline("text-to-speech", model_name) synthesizer = self.synthesizers[model_name] @@ -229,3 +234,6 @@ def get_output_text( elif isinstance(output.data, str): return output.data return "" + + def _get_default_model_name(self) -> str: + return "suno/bark" diff --git a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_generation.py b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_generation.py index e4b553af1..91441ee8e 100644 --- a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_generation.py +++ b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_generation.py @@ -240,10 +240,13 @@ async def run_inference( completion_data = await self.deserialize(prompt, aiconfig, options, parameters) completion_data["text_inputs"] = completion_data.pop("prompt", None) - model_name : str = aiconfig.get_model_name(prompt) + model_name: str = aiconfig.get_model_name(prompt) + # TODO: Clean this up after we allow people in the AIConfig UI to specify their + # own model name for HuggingFace tasks. This isn't great but it works for now + if (model_name == "TextGeneration"): + model_name = self._get_default_model_name() + if isinstance(model_name, str) and model_name not in self.generators: - print(f"Rossdan Loading model {prompt.metadata.model}") - print(f"Rossdan Loading model {model_name}") self.generators[model_name] = pipeline('text-generation', model=model_name) generator = self.generators[model_name] @@ -305,3 +308,6 @@ def get_output_text( # calls so shouldn't get here, but just being safe return json.dumps(output_data.value, indent=2) return "" + + def _get_default_model_name(self) -> str: + return "stevhliu/my_awesome_billsum_model" diff --git a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_summarization.py b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_summarization.py index bba735b4f..7ed79e5cb 100644 --- a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_summarization.py +++ b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_summarization.py @@ -239,6 +239,11 @@ async def run_inference(self, prompt: Prompt, aiconfig: "AIConfigRuntime", optio inputs = completion_data.pop("prompt", None) model_name: str = aiconfig.get_model_name(prompt) + # TODO: Clean this up after we allow people in the AIConfig UI to specify their + # own model name for HuggingFace tasks. This isn't great but it works for now + if (model_name == "TextSummarization"): + model_name = self._get_default_model_name() + if isinstance(model_name, str) and model_name not in self.summarizers: self.summarizers[model_name] = pipeline("summarization", model=model_name) summarizer = self.summarizers[model_name] @@ -303,3 +308,6 @@ def get_output_text( # calls so shouldn't get here, but just being safe return json.dumps(output_data.value, indent=2) return "" + + def _get_default_model_name(self) -> str: + return "facebook/bart-large-cnn" diff --git a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_translation.py b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_translation.py index 9ee8bb357..fcfb5c0ae 100644 --- a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_translation.py +++ b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_translation.py @@ -239,6 +239,11 @@ async def run_inference(self, prompt: Prompt, aiconfig: "AIConfigRuntime", optio inputs = completion_data.pop("prompt", None) model_name: str = aiconfig.get_model_name(prompt) + # TODO: Clean this up after we allow people in the AIConfig UI to specify their + # own model name for HuggingFace tasks. This isn't great but it works for now + if (model_name == "TextTranslation"): + model_name = self._get_default_model_name() + if isinstance(model_name, str) and model_name not in self.translators: self.translators[model_name] = pipeline(model_name) translator = self.translators[model_name] @@ -297,3 +302,6 @@ def get_output_text( # calls so shouldn't get here, but just being safe return json.dumps(output_data.value, indent=2) return "" + + def _get_default_model_name(self) -> str: + return "translation_en_to_fr"