From 199467f50fbc9ce912c42131450aafc3550075e9 Mon Sep 17 00:00:00 2001
From: Jonathan Lessinger <jonathan@lastmileai.dev>
Date: Tue, 9 Jan 2024 14:55:02 -0500
Subject: [PATCH] [AIC-py] hf example

Idea for this config:

* translate instructions from french to english: "racontez l'histoire du vif renard brun"
  -> translate_fr_to_en prompt
    -> generate story
      -> summarize story
        -> generate audio title saying the summary
        -> generate image of the summary as well

Somehow we can connect image-text and ASR as well, not sure yet.


This PR is a starting point for the idea above. Currently I'm having it translate the other way.
Added example prompts for mt, summarization, and tts.
---
 .../hf_local_example.aiconfig.json            | 68 ++++++++++++++
 .../local_inference/run_hf_example.py         | 90 +++++++++++++++++++
 .../local_inference/text_2_speech.py          |  1 +
 .../local_inference/text_summarization.py     |  1 +
 4 files changed, 160 insertions(+)
 create mode 100644 extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/hf_local_example.aiconfig.json
 create mode 100644 extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/run_hf_example.py

diff --git a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/hf_local_example.aiconfig.json b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/hf_local_example.aiconfig.json
new file mode 100644
index 000000000..877ab6da7
--- /dev/null
+++ b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/hf_local_example.aiconfig.json
@@ -0,0 +1,68 @@
+{
+  "name": "The Tale of the Quick Brown Fox",
+  "schema_version": "latest",
+  "metadata": {
+    "parameters": {},
+    "models": {
+      "stevhliu/my_awesome_billsum_model": {
+        "model": "stevhliu/my_awesome_billsum_model",
+        "min_length": 10,
+        "max_length": 30
+      },
+      "Salesforce/blip-image-captioning-base": {
+        "model": "Salesforce/blip-image-captioning-base"
+      }
+    },
+    "model_parsers": {
+      "suno/bark": "HuggingFaceText2SpeechTransformer"
+    },
+    "default_model": "stevhliu/my_awesome_billsum_model"
+  },
+  "description": "The Tale of the Quick Brown Fox",
+  "prompts": [
+    {
+      "name": "translate_instruction",
+      "input": "Tell the tale of {{topic}}",
+      "outputs": [],
+      "metadata": {
+        "model": "translation_en_to_fr",
+        "parameters": {
+          "topic": "the quick brown fox"
+        }
+      }
+    },
+    {
+      "name": "summarize_story",
+      "input": "Once upon a time, in a lush and vibrant forest, there lived a magnificent creature known as the Quick Brown Fox. This fox was unlike any other, possessing incredible speed and agility that awed all the animals in the forest. With its fur as golden as the sun and its eyes as sharp as emeralds, the Quick Brown Fox was admired by everyone, from the tiniest hummingbird to the mightiest bear. The fox had a kind heart and would often lend a helping paw to those in need. The Quick Brown Fox had a particular fondness for games and challenges. It loved to test its skills against others, always seeking new adventures to satisfy its boundless curiosity. Its favorite game was called \"The Great Word Hunt,\" where it would embark on a quest to find hidden words scattered across the forest.",
+      "outputs": [],
+      "metadata": {
+        "model": "stevhliu/my_awesome_billsum_model"
+      }
+    },
+    {
+      "name": "generate_audio_title",
+      "input": "The Quick Brown Fox was admired by all the animals in the forest.",
+      "metadata": {
+        "model": {
+          "name": "suno/bark",
+          "settings": {}
+        }
+      }
+    },
+    {
+      "name": "generate_caption",
+      "input": {
+        "attachments": [
+          {
+            "mime_type": "image/png",
+            "data": "/Users/jonathan/Desktop/pic.png"
+          }
+        ]
+      },
+      "metadata": {
+        "model": "Salesforce/blip-image-captioning-base"
+      }
+    }
+  ],
+  "$schema": "https://json.schemastore.org/aiconfig-1.0"
+}
diff --git a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/run_hf_example.py b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/run_hf_example.py
new file mode 100644
index 000000000..6e5c344a7
--- /dev/null
+++ b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/run_hf_example.py
@@ -0,0 +1,90 @@
+# import ModelParserRegistry from aiconfig
+import asyncio
+import base64
+import sys
+from aiconfig.registry import ModelParserRegistry
+from aiconfig_extension_hugging_face.local_inference.text_2_speech import HuggingFaceText2SpeechTransformer
+from aiconfig_extension_hugging_face.local_inference.text_generation import HuggingFaceTextGenerationTransformer
+from aiconfig_extension_hugging_face.local_inference.text_summarization import HuggingFaceTextSummarizationTransformer
+from aiconfig_extension_hugging_face.local_inference.text_translation import HuggingFaceTextTranslationTransformer
+from aiconfig_extension_hugging_face.local_inference.image_2_text import HuggingFaceImage2TextTransformer
+from aiconfig import AIConfigRuntime, InferenceOptions, CallbackManager
+
+
+async def run(hf_aiconfig_path: str):
+    for model_parser in [
+        HuggingFaceText2SpeechTransformer(),
+        # HuggingFaceTextGenerationTransformer(),
+    ]:
+        ModelParserRegistry.register_model_parser(model_parser)
+
+    AIConfigRuntime.register_model_parser(HuggingFaceTextTranslationTransformer(), "translation_en_to_fr")
+    AIConfigRuntime.register_model_parser(HuggingFaceTextSummarizationTransformer(), "stevhliu/my_awesome_billsum_model")
+    AIConfigRuntime.register_model_parser(HuggingFaceImage2TextTransformer(), "Salesforce/blip-image-captioning-base")
+    ModelParserRegistry.register_model_parser(HuggingFaceText2SpeechTransformer())
+    # AIConfigRuntime.register_model_parser(mp, "text_2_speech")
+    # AIConfigRuntime.register_model_parser(mp, "suno/bark")
+
+    config = AIConfigRuntime.load(hf_aiconfig_path)
+    config.callback_manager = CallbackManager([])
+
+    options = InferenceOptions(stream=False)
+
+    # out1 = await config.run(
+    #     #
+    #     "translate_instruction",
+    #     options=options,
+    # )
+    # print(f"{out1=}")
+
+    # out2 = await config.run(
+    #     #
+    #     "generate_story",
+    #     options=options,
+    # )
+    # print(f"{out2=}")
+
+    # out3 = await config.run(
+    #     #
+    #     "summarize_story",
+    #     options=options,
+    # )
+
+    # print(f"{out3=}")
+
+    # out4 = await config.run(
+    #     #
+    #     "generate_audio_title",
+    #     options=options,
+    # )
+
+    # print(f"{out4=}")
+    # with open("story_title.wav", "wb") as f:
+    #     encoded = out4[0].data.value
+    #     decoded_binary = base64.b64decode(encoded.encode("utf-8"))
+    #     f.write(decoded_binary)
+
+    # print("Stream")
+    # options = InferenceOptions(stream=True, stream_callback=print_stream)
+    # out = await config.run("test_hf_trans", options=options)
+    # print("Output:\n", out)
+
+    out5 = await config.run(
+        #
+        "generate_caption",
+        options=options,
+    )
+
+    print(f"{out5=}")
+
+
+async def main(argv: list[str]):
+    print("Starting!")
+    path = argv[1]
+    print(f"Loading aiconfig from {path}")
+    await run(path)
+    print("Done!")
+
+
+if __name__ == "__main__":
+    asyncio.run(main(sys.argv))
diff --git a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_2_speech.py b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_2_speech.py
index 85dee4add..0b97e3433 100644
--- a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_2_speech.py
+++ b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_2_speech.py
@@ -198,6 +198,7 @@ async def run_inference(self, prompt: Prompt, aiconfig: "AIConfigRuntime", optio
 
         completion_data = await self.deserialize(prompt, aiconfig, options, parameters)
         inputs = completion_data.pop("prompt", None)
+        print("Running text to speech model. This might take a while, please be patient...")
         response = synthesizer(inputs, **completion_data)
 
         outputs: List[Output] = []
diff --git a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_summarization.py b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_summarization.py
index bba735b4f..04bea7c26 100644
--- a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_summarization.py
+++ b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_summarization.py
@@ -255,6 +255,7 @@ async def run_inference(self, prompt: Prompt, aiconfig: "AIConfigRuntime", optio
         output = None
 
         def _summarize():
+            print(f"{inputs=}, {completion_data=}")
             return summarizer(inputs, **completion_data)
 
         if not should_stream: