From 66cea797f9ab96255f921554aa5eb5e07d27d3ad Mon Sep 17 00:00:00 2001 From: kaushal07wick Date: Tue, 26 Dec 2023 23:08:25 +0530 Subject: [PATCH 1/3] contextual_compression_in_RAG --- .../Contextual-Compression-with-RAG/README.md | 15 + .../main.ipynb | 672 ++++++++++++++++++ 2 files changed, 687 insertions(+) create mode 100644 examples/Contextual-Compression-with-RAG/README.md create mode 100644 examples/Contextual-Compression-with-RAG/main.ipynb diff --git a/examples/Contextual-Compression-with-RAG/README.md b/examples/Contextual-Compression-with-RAG/README.md new file mode 100644 index 00000000..fe11eca4 --- /dev/null +++ b/examples/Contextual-Compression-with-RAG/README.md @@ -0,0 +1,15 @@ +# 馃攳Search engine using SAM & CLIP + +Open In Colab +[![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/context-aware-chatbot-using-llama-2-lancedb-as-vector-database-4d771d95c755) + + +### 馃帿 Enhance RAG: Integrate Contextual Compression and Filtering for Precision. +Follow the Colab Notebook for full code. + +## Interface 馃専 + +1. Load the Model from Huggingface. +2. Instantiate Contextual Compressor from Langchain. +3. Create a pipeline with different retrieving filters. +4. Get the answer using QA chain. \ No newline at end of file diff --git a/examples/Contextual-Compression-with-RAG/main.ipynb b/examples/Contextual-Compression-with-RAG/main.ipynb new file mode 100644 index 00000000..4eaf21e7 --- /dev/null +++ b/examples/Contextual-Compression-with-RAG/main.ipynb @@ -0,0 +1,672 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 11, + "id": "e8b87ed8-7b08-484a-a7a3-6ea54b0dce99", + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "!pip install -qU langchain huggingface_hub lancedb pypdf python-dotenv transformers sentence-transformers" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "id": "96b1065d-f22c-45f3-8286-2d93a54086b8", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.llms import HuggingFaceHub\n", + "from langchain.document_loaders import PyPDFLoader\n", + "from langchain.vectorstores import Chroma\n", + "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", + "from langchain.embeddings import SentenceTransformerEmbeddings\n", + "from langchain.prompts import PromptTemplate\n", + "from dotenv import load_dotenv\n", + "from langchain.llms import OpenAI\n", + "import lancedb" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "id": "6a6c70ab-a66e-4913-a063-70f059f46699", + "metadata": {}, + "outputs": [ + { + "name": "stdin", + "output_type": "stream", + "text": [ + "Enter HuggingFace Hub Token: 路路路路路路路路\n" + ] + } + ], + "source": [ + "import os\n", + "from getpass import getpass\n", + "os.environ[\"HUGGINGFACEHUB_API_TOKEN\"] = getpass(\"Enter HuggingFace Hub Token:\")" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "id": "627249f2-3861-4ad9-8dc3-f0a53cc9336d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "7\n", + "SCIENCE GLOSSARY \n", + " \n", + "Abiotic: A nonliving factor or element (e.g., light, w ater, heat, rock, energy, mineral). \n", + " \n", + "Acid deposition: Precipitation w ith a pH less than 5.6 that form s in the atmosphere w hen certain pollutants mix \n", + "with water vapor. \n", + " \n", + "Allele: Any of a set of possible forms of a gene. \n", + " \n", + "Biochemical conversion: The changing of organic matter into other chemical forms. \n", + " \n", + "Biological diversity: The variety and complexity of species present a nd interacting in an ecosystem and the relative \n", + "abundance of each. \n", + " \n", + "Biomass conversion: The changing of organic matter that has been produced by photosynthesis into useful liquid, gas \n", + "or fuel. \n", + " \n", + "Biomedical technology: The application of health care theories to develop methods, products and tools to maintain or \n", + "improve homeostasis. \n", + " \n", + "Biomes: A community of living organisms of a single major ecological region. \n", + " \n", + "Biotechnology: The w ays that humans apply biological concepts to produce products and provide services. \n", + " \n", + "Biotic: An environmental factor related to or produced by living organisms. \n", + " \n", + "Carbon chemistry: The science of the composition, structure, pr operties and reactions of carbon based matter, \n", + "especially of atomic and molecular systems; sometimes referred to as organic chemistry. \n", + " \n", + "Closing the loop: A link in the circular chain of recycling events th at promotes the use of products made w ith \n", + "recycled materials. \n", + " \n", + "Commodities: Economic goods or products before they are processed and/or given a brand name, such as a \n", + "product of agriculture. \n", + " 1\n" + ] + } + ], + "source": [ + "loader = PyPDFLoader(\"Science_Glossary.pdf\")\n", + "documents = loader.load()\n", + "print(len(documents))\n", + "print(documents[0].page_content)" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "id": "a0935ff9-ec5e-4af3-8194-694d6e09f14b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "22\n", + "page_content='SCIENCE GLOSSARY \\n \\nAbiotic: A nonliving factor or element (e.g., light, w ater, heat, rock, energy, mineral). \\n \\nAcid deposition: Precipitation w ith a pH less than 5.6 that form s in the atmosphere w hen certain pollutants mix \\nwith water vapor. \\n \\nAllele: Any of a set of possible forms of a gene. \\n \\nBiochemical conversion: The changing of organic matter into other chemical forms. \\n \\nBiological diversity: The variety and complexity of species present a nd interacting in an ecosystem and the relative \\nabundance of each. \\n \\nBiomass conversion: The changing of organic matter that has been produced by photosynthesis into useful liquid, gas \\nor fuel.' metadata={'source': 'Science_Glossary.pdf', 'page': 0}\n" + ] + } + ], + "source": [ + "text_splitter = RecursiveCharacterTextSplitter(chunk_size=700,chunk_overlap=70)\n", + "final_doc = text_splitter.split_documents(documents)\n", + "print(len(final_doc))\n", + "print(final_doc[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "id": "f8934f57-45d0-4e50-89ad-0cb2fc0d5b25", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "No sentence-transformers model found with name C:\\Users\\Lenovo/.cache\\torch\\sentence_transformers\\llmware_industry-bert-insurance-v0.1. Creating a new one with MEAN pooling.\n" + ] + } + ], + "source": [ + "embeddings = SentenceTransformerEmbeddings(model_name=\"llmware/industry-bert-insurance-v0.1\")" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "id": "387d5d94-b66d-455b-9bb1-f62a9aaf1a7a", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Lenovo\\Documents\\workspace\\lim\\Lib\\site-packages\\huggingface_hub\\utils\\_deprecation.py:131: FutureWarning: 'InferenceApi' (from 'huggingface_hub.inference_api') is deprecated and will be removed from version '1.0'. `InferenceApi` client is deprecated in favor of the more feature-complete `InferenceClient`. Check out this guide to learn how to convert your script to use it: https://huggingface.co/docs/huggingface_hub/guides/inference#legacy-inferenceapi-client.\n", + " warnings.warn(warning_message, FutureWarning)\n" + ] + } + ], + "source": [ + "repo_id =\"llmware/bling-sheared-llama-1.3b-0.1\"\n", + "llm = HuggingFaceHub(repo_id=repo_id,\n", + " model_kwargs={\"temperature\":0.3,\"max_length\":500})" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "id": "92e8431c-03e0-4a22-b934-2f3a076f25b2", + "metadata": {}, + "outputs": [], + "source": [ + "def pretty_print_docs(docs):\n", + " print(f\"\\n{'-'* 100}\\n\".join([F\"Document{i+1}:\\n\\n\" + d.page_content for i,d in enumerate(docs)]))" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "id": "09ce605a-93ab-4734-8784-3b57dd330722", + "metadata": {}, + "outputs": [], + "source": [ + "import lancedb\n", + "context_data = lancedb.connect(\"./.lancedb\")\n", + "table = context_data.create_table(\"context\", data=[\n", + " {\"vector\": embeddings.embed_query(\"Hello World\"), \"text\": \"Hello World\", \"id\": \"1\"}\n", + "], mode=\"overwrite\")" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "id": "9f600c90-86b1-4f56-9741-372f8c1e3146", + "metadata": {}, + "outputs": [], + "source": [ + "#initialize the retriever\n", + "database = LanceDB.from_documents(final_doc, embeddings, connection=table)" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "id": "24279bd0-0bc6-4a80-b297-0ec2f8717642", + "metadata": {}, + "outputs": [], + "source": [ + "retriever_d = database.as_retriever(search_kwargs={\"k\": 3})" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "id": "3f3b0406-f2ff-4ca4-bfaa-6063d3cfd249", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Document1:\n", + "\n", + "body of w ater; also called a drainage basin. \n", + " \n", + "Wetlands: Lands w here w ater saturation is the domi nant factor determining the nature of the soil \n", + "development and the plant and animal communities (e.g., sloughs, estuaries, marshes). \n", + " \n", + " \n", + " 7\n", + "----------------------------------------------------------------------------------------------------\n", + "Document2:\n", + "\n", + "developed state. \n", + " \n", + "Endangered species: A species that is in danger of extinction throughout all or a significant portion of its range. \n", + " \n", + "Engineering: The application of scientific, physical, mechan ical and mathematical principles to design \n", + "processes, products and structures that improve the quality of life. \n", + " \n", + "Environment: The total of the surroundings (air, w ater, soil, vege tation, people, w ildlife) influencing each living \n", + "being鈥檚 existence, including physical, biological a nd all other factors; the surroundings of a plant \n", + "or animals including other plants or animals, climate and location. \n", + " 2\n", + "----------------------------------------------------------------------------------------------------\n", + "Document3:\n", + "\n", + "Scale: Relates concepts and ideas to one another by some measurement (e.g., quantitative, numeral, \n", + "abstract, ideological); provides a measure of siz e and/or incremental change. \n", + " \n", + "Science: Search for understanding the natural w orld using inquiry and experimentation. \n", + " \n", + "Shredder: Through chew ing and/or grinding, microorganisms feed on non-w oody coarse particulate matter, \n", + "primarily leaves. \n", + " \n", + "Stream order: Energy and nutrient flow that increases as w ater moves tow ard the oceans (e.g., the smallest \n", + "stream (primary) that ends w hen rivers flow into oceans). \n", + " \n", + "Succession: The series of changes that occur in an ecosystem w ith the passing of time.\n" + ] + } + ], + "source": [ + "docs = retriever_d.get_relevant_documents(query=\"What is Wetlands?\")\n", + "pretty_print_docs(docs)" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "id": "fe7799d1-5f00-4b99-8fc2-f1054cbb9c23", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.retrievers import ContextualCompressionRetriever\n", + "from langchain.retrievers.document_compressors import LLMChainExtractor\n", + "\n", + "#creating the compressor\n", + "compressor = LLMChainExtractor.from_llm(llm=llm)\n", + "\n", + "#compressor retriver = base retriever + compressor\n", + "compression_retriever = ContextualCompressionRetriever(base_retriever=retriever_d,\n", + " base_compressor=compressor)" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "id": "545a5c95-4676-44e0-8878-29c3be3c970d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Given the following question and context, extract any part of the context *AS IS* that is relevant to answer the question. If none of the context is relevant return NO_OUTPUT. \n", + "\n", + "Remember, *DO NOT* edit the extracted parts of the context.\n", + "\n", + "> Question: {question}\n", + "> Context:\n", + ">>>\n", + "{context}\n", + ">>>\n", + "Extracted relevant parts:\n" + ] + } + ], + "source": [ + "print(compressor.llm_chain.prompt.template)" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "id": "287004b8-9d32-4c44-a038-889d826e5799", + "metadata": {}, + "outputs": [ + { + "name": "stdin", + "output_type": "stream", + "text": [ + " 路路路路路路路路\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Document1:\n", + "\n", + "developed state. \n", + " \n", + "Endangered species: A species that is in danger of extinction throughout all or a significant portion of its range. \n", + " \n", + "Engineering: The application of scientific, physical, mechan ical and mathematical principles to design \n", + "processes, products and structures that improve the quality of life. \n", + " \n", + "Environment: The total of the surroundings (air, w ater, soil, vege tation, people, w ildlife) influencing each living \n", + "being鈥檚 existence, including physical, biological a nd all other factors; the surroundings of a plant \n", + "or animals including other plants or animals, climate and location. \n", + " 2\n", + "----------------------------------------------------------------------------------------------------\n", + "Document2:\n", + "\n", + "Niche (ecological) : The role played by an organism in an ecosystem; its food preferences, requirements for shelter, \n", + "special behaviors and the timing of its activities (e.g., nocturnal, diurnal), interaction w ith other \n", + "organisms and its habitat. \n", + " \n", + "Nonpoint source pollution: Contamination that originates from ma ny locations that all discharge into a location (e.g., a lake, \n", + "stream, land area). \n", + " \n", + "Nonrenew able resources: Substances (e.g., oil, gas, coal, copper, gol d) that, once used, cannot be replaced in this geological \n", + "age. \n", + " \n", + "Nova: A variable star that suddenly increases in bright ness to several times its normal magnitude and\n", + "----------------------------------------------------------------------------------------------------\n", + "Document3:\n", + "\n", + "and age relationships of rock units and the occu rrences of structural features, mineral deposits \n", + "and fossil localities). \n", + " \n", + "Groundw ater: Water that infiltrates the soil and is located in underground reservoirs called aquifers. \n", + " \n", + "Hazardous w aste: A solid that, because of its quantity or concentr ation or its physical, chemical or infectious \n", + "characteristics, may cause or pose a substantial present or potential haz ard to human health or \n", + "the environment w hen improperly treated, stored , transported or disposed of, or otherw ise \n", + "managed. \n", + " \n", + "Homeostasis: The tendency for a system to remain in a state of equilibrium by resisting change. \n", + " \n", + " 3\n" + ] + } + ], + "source": [ + "from getpass import getpass\n", + "import os\n", + "from langchain.embeddings import OpenAIEmbeddings\n", + "from langchain.retrievers.document_compressors import EmbeddingsFilter\n", + "\n", + "os.environ[\"OPENAI_API_KEY \"] = getpass()\n", + "embdeddings_filter = EmbeddingsFilter(embeddings=embeddings)\n", + "compression_retriever_filter = ContextualCompressionRetriever(base_retriever=retriever_d,\n", + " base_compressor=embdeddings_filter)\n", + "\n", + "compressed_docs = compression_retriever_filter.get_relevant_documents(query=\"What is the Environment?\")\n", + "pretty_print_docs(compressed_docs)" + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "id": "d010a946-e53a-467c-aac6-69c454d48d13", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "{'query': 'What is Environment?', 'result': ' Environment'}" + ] + }, + "execution_count": 113, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langchain.chains import RetrievalQA\n", + "qa = RetrievalQA.from_chain_type(llm=llm,\n", + " chain_type=\"stuff\",\n", + " retriever=compression_retriever_filter,\n", + " verbose=True)\n", + "#Ask Question\n", + "qa(\"What is Environment?\")" + ] + }, + { + "cell_type": "markdown", + "id": "2052e24a-1f50-408d-bf10-df1c2a4b5af6", + "metadata": {}, + "source": [ + "# Pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "id": "cb8d3941-fd91-47ad-80ef-863df0242b38", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "base_compressor=DocumentCompressorPipeline(transformers=[EmbeddingsRedundantFilter(embeddings=HuggingFaceEmbeddings(client=SentenceTransformer(\n", + " (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel \n", + " (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})\n", + "), model_name='llmware/industry-bert-insurance-v0.1', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False), similarity_fn=, similarity_threshold=0.95), EmbeddingsFilter(embeddings=HuggingFaceEmbeddings(client=SentenceTransformer(\n", + " (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel \n", + " (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})\n", + "), model_name='llmware/industry-bert-insurance-v0.1', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False), similarity_fn=, k=5, similarity_threshold=None)]) base_retriever=VectorStoreRetriever(tags=['LanceDB', 'HuggingFaceEmbeddings'], vectorstore=, search_kwargs={'k': 3})\n", + "Document1:\n", + "\n", + "developed state. \n", + " \n", + "Endangered species: A species that is in danger of extinction throughout all or a significant portion of its range. \n", + " \n", + "Engineering: The application of scientific, physical, mechan ical and mathematical principles to design \n", + "processes, products and structures that improve the quality of life. \n", + " \n", + "Environment: The total of the surroundings (air, w ater, soil, vege tation, people, w ildlife) influencing each living \n", + "being鈥檚 existence, including physical, biological a nd all other factors; the surroundings of a plant \n", + "or animals including other plants or animals, climate and location. \n", + " 2\n", + "----------------------------------------------------------------------------------------------------\n", + "Document2:\n", + "\n", + "Niche (ecological) : The role played by an organism in an ecosystem; its food preferences, requirements for shelter, \n", + "special behaviors and the timing of its activities (e.g., nocturnal, diurnal), interaction w ith other \n", + "organisms and its habitat. \n", + " \n", + "Nonpoint source pollution: Contamination that originates from ma ny locations that all discharge into a location (e.g., a lake, \n", + "stream, land area). \n", + " \n", + "Nonrenew able resources: Substances (e.g., oil, gas, coal, copper, gol d) that, once used, cannot be replaced in this geological \n", + "age. \n", + " \n", + "Nova: A variable star that suddenly increases in bright ness to several times its normal magnitude and\n", + "----------------------------------------------------------------------------------------------------\n", + "Document3:\n", + "\n", + "and age relationships of rock units and the occu rrences of structural features, mineral deposits \n", + "and fossil localities). \n", + " \n", + "Groundw ater: Water that infiltrates the soil and is located in underground reservoirs called aquifers. \n", + " \n", + "Hazardous w aste: A solid that, because of its quantity or concentr ation or its physical, chemical or infectious \n", + "characteristics, may cause or pose a substantial present or potential haz ard to human health or \n", + "the environment w hen improperly treated, stored , transported or disposed of, or otherw ise \n", + "managed. \n", + " \n", + "Homeostasis: The tendency for a system to remain in a state of equilibrium by resisting change. \n", + " \n", + " 3\n" + ] + } + ], + "source": [ + "from langchain.document_transformers import EmbeddingsRedundantFilter\n", + "from langchain.retrievers.document_compressors import DocumentCompressorPipeline\n", + "\n", + "redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)\n", + "relevant_filter = EmbeddingsFilter(embeddings=embeddings,k=5)\n", + "\n", + "#creating the pipeline\n", + "compressed_pipeline = DocumentCompressorPipeline(transformers=[redundant_filter,relevant_filter])\n", + "\n", + "# compressor retriever\n", + "comp_pipe_retrieve = ContextualCompressionRetriever(base_retriever=retriever_d,\n", + " base_compressor=compressed_pipeline)\n", + "\n", + "# print the prompt\n", + "print(comp_pipe_retrieve)\n", + "\n", + "# Get relevant documents\n", + "compressed_docs = comp_pipe_retrieve.get_relevant_documents(query=\"What is Environment?\")\n", + "pretty_print_docs(compressed_docs)" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "id": "49da9c3f-c7ed-4a2c-821c-05c9c4d6c561", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Document1:\n", + "\n", + "developed state. \n", + " \n", + "Endangered species: A species that is in danger of extinction throughout all or a significant portion of its range. \n", + " \n", + "Engineering: The application of scientific, physical, mechan ical and mathematical principles to design \n", + "processes, products and structures that improve the quality of life. \n", + " \n", + "Environment: The total of the surroundings (air, w ater, soil, vege tation, people, w ildlife) influencing each living \n", + "being鈥檚 existence, including physical, biological a nd all other factors; the surroundings of a plant \n", + "or animals including other plants or animals, climate and location. \n", + " 2\n", + "----------------------------------------------------------------------------------------------------\n", + "Document2:\n", + "\n", + "Niche (ecological) : The role played by an organism in an ecosystem; its food preferences, requirements for shelter, \n", + "special behaviors and the timing of its activities (e.g., nocturnal, diurnal), interaction w ith other \n", + "organisms and its habitat. \n", + " \n", + "Nonpoint source pollution: Contamination that originates from ma ny locations that all discharge into a location (e.g., a lake, \n", + "stream, land area). \n", + " \n", + "Nonrenew able resources: Substances (e.g., oil, gas, coal, copper, gol d) that, once used, cannot be replaced in this geological \n", + "age. \n", + " \n", + "Nova: A variable star that suddenly increases in bright ness to several times its normal magnitude and\n", + "----------------------------------------------------------------------------------------------------\n", + "Document3:\n", + "\n", + "and age relationships of rock units and the occu rrences of structural features, mineral deposits \n", + "and fossil localities). \n", + " \n", + "Groundw ater: Water that infiltrates the soil and is located in underground reservoirs called aquifers. \n", + " \n", + "Hazardous w aste: A solid that, because of its quantity or concentr ation or its physical, chemical or infectious \n", + "characteristics, may cause or pose a substantial present or potential haz ard to human health or \n", + "the environment w hen improperly treated, stored , transported or disposed of, or otherw ise \n", + "managed. \n", + " \n", + "Homeostasis: The tendency for a system to remain in a state of equilibrium by resisting change. \n", + " \n", + " 3\n" + ] + } + ], + "source": [ + "compressed_docs = compression_retriever_pipeline.get_relevant_documents(query=\"What is Environment?\")\n", + "pretty_print_docs(compressed_docs)" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "id": "9afff673-d096-4bd5-b4fb-867cec1c9bda", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Document1:\n", + "\n", + "and age relationships of rock units and the occu rrences of structural features, mineral deposits \n", + "and fossil localities). \n", + " \n", + "Groundw ater: Water that infiltrates the soil and is located in underground reservoirs called aquifers. \n", + " \n", + "Hazardous w aste: A solid that, because of its quantity or concentr ation or its physical, chemical or infectious \n", + "characteristics, may cause or pose a substantial present or potential haz ard to human health or \n", + "the environment w hen improperly treated, stored , transported or disposed of, or otherw ise \n", + "managed. \n", + " \n", + "Homeostasis: The tendency for a system to remain in a state of equilibrium by resisting change. \n", + " \n", + " 3\n", + "----------------------------------------------------------------------------------------------------\n", + "Document2:\n", + "\n", + "tunnels, containers of various types). \n", + " \n", + "Radioactive isotope: An atom that gives off nuclear radiation and has the same number of protons (atomic number) as \n", + "another atom but a different number of neutrons. \n", + " \n", + "Recycling : Collecting and reprocessing a resource or product to make into new products. \n", + " \n", + "Regulation: A rule or order issued by an executive authorit y or regulatory agency of a government and having \n", + "the force of law . \n", + " \n", + "Renew able: A naturally occurring raw material or form of energy that w ill be replenished through natural \n", + "ecological cycles or sound management practices (e.g., the sun, w ind, w ater, trees).\n", + "----------------------------------------------------------------------------------------------------\n", + "Document3:\n", + "\n", + "Hydrology: The scientific study of the properties, distribution and effects of w ater on the earth鈥檚 surface, in \n", + "the soil and underlying rocks and in the atmosphere. \n", + " \n", + "Hypothesis: An assertion subject to verification or proof as a premise from w hich a conclusion is draw n. \n", + " \n", + "Incinerating: Burning to ashes; reducing to ashes. \n", + " \n", + "Information technology: The technical means that humans create to store and transmit information. \n", + " \n", + "Inquiry: A systematic process for using know ledge and skills to acquire and apply new know ledge. \n", + " \n", + "Instructional technology: Any mechanical aid (including com puter technology) used to assist in or enhance the process of \n", + "teaching and learning.\n" + ] + } + ], + "source": [ + "compressed_docs = compression_retriever_pipeline.get_relevant_documents(query=\"What is Hazardous waste?\")\n", + "pretty_print_docs(compressed_docs)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 6016350b39390e17049244cf92254c4ec90258c1 Mon Sep 17 00:00:00 2001 From: kaushal07wick Date: Tue, 26 Dec 2023 23:12:42 +0530 Subject: [PATCH 2/3] update main Readme --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 253d9c17..2efdc700 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,8 @@ If you're looking for in-depth tutorial-like examples, checkout the [tutorials]( | [Facial Recognition](./examples/facial_recognition) | Open In Colab | | [Accelerate Vector Search Applications Using OpenVINO](./tutorials/Sentiment-Analysis-using-LanceDB) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/Accelerate-Vector-Search-Applications-Using-OpenVINO/clip_text_image_search.ipynb) | [![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/accelerate-vector-search-applications-using-openvino-51366eabf866)| | [Search Within Images](./examples/search-within-images-with-sam-and-clip) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/examples/search-within-images-with-sam-and-clip/main.ipynb) |[![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/search-within-an-image-331b54e4285e)| +| [Contextual-Compression-with-RAG](.examples/Contextual-Compression-with-RAG) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/examples/Contextual-Compression-with-RAG/main.ipynb) |[![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/search-within-an-image-331b54e4285e)| + From c7a5b904f3cd53374316227d059383f72c79eaa5 Mon Sep 17 00:00:00 2001 From: kaushal07wick Date: Tue, 9 Jan 2024 21:31:28 +0530 Subject: [PATCH 3/3] new_changes --- README.md | 3 +-- examples/Contextual-Compression-with-RAG/README.md | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 2efdc700..5e22e99a 100644 --- a/README.md +++ b/README.md @@ -56,8 +56,7 @@ If you're looking for in-depth tutorial-like examples, checkout the [tutorials]( | [Facial Recognition](./examples/facial_recognition) | Open In Colab | | [Accelerate Vector Search Applications Using OpenVINO](./tutorials/Sentiment-Analysis-using-LanceDB) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/tutorials/Accelerate-Vector-Search-Applications-Using-OpenVINO/clip_text_image_search.ipynb) | [![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/accelerate-vector-search-applications-using-openvino-51366eabf866)| | [Search Within Images](./examples/search-within-images-with-sam-and-clip) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/examples/search-within-images-with-sam-and-clip/main.ipynb) |[![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/search-within-an-image-331b54e4285e)| -| [Contextual-Compression-with-RAG](.examples/Contextual-Compression-with-RAG) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/examples/Contextual-Compression-with-RAG/main.ipynb) |[![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/search-within-an-image-331b54e4285e)| - +| [Contextual-Compression-with-RAG](.examples/Contextual-Compression-with-RAG) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/examples/Contextual-Compression-with-RAG/main.ipynb) |[![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://medium.com/etoai/enhance-rag-integrate-contextual-compression-and-filtering-for-precision-a29d4a810301) diff --git a/examples/Contextual-Compression-with-RAG/README.md b/examples/Contextual-Compression-with-RAG/README.md index fe11eca4..bf0b2d95 100644 --- a/examples/Contextual-Compression-with-RAG/README.md +++ b/examples/Contextual-Compression-with-RAG/README.md @@ -1,7 +1,7 @@ # 馃攳Search engine using SAM & CLIP -Open In Colab -[![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/context-aware-chatbot-using-llama-2-lancedb-as-vector-database-4d771d95c755) +Open In Colab +[![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://medium.com/etoai/enhance-rag-integrate-contextual-compression-and-filtering-for-precision-a29d4a810301) ### 馃帿 Enhance RAG: Integrate Contextual Compression and Filtering for Precision.