Skip to content

Commit

Permalink
Merge pull request #1057 from MichaelDecent/pkg2
Browse files Browse the repository at this point in the history
Add Swarmauri Keyword Extractor Plugin
  • Loading branch information
cobycloud authored Jan 9, 2025
2 parents a51d05f + 5510ea5 commit ce48530
Show file tree
Hide file tree
Showing 8 changed files with 155 additions and 2 deletions.
1 change: 1 addition & 0 deletions pkgs/standards/swarmauri_parser_keywordextractor/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Swarmauri Example Plugin
53 changes: 53 additions & 0 deletions pkgs/standards/swarmauri_parser_keywordextractor/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
[tool.poetry]
name = "swarmauri_parser_keywordextractor"
version = "0.6.0.dev1"
description = "This repository includes an example of a First Class Swarmauri Example."
authors = ["Jacob Stewart <[email protected]>"]
license = "Apache-2.0"
readme = "README.md"
repository = "http://github.com/swarmauri/swarmauri-sdk"
classifiers = [
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12"
]

[tool.poetry.dependencies]
python = ">=3.10,<3.13"

# Swarmauri
swarmauri_core = { path = "../../core" }
swarmauri_base = { path = "../../base" }

[tool.poetry.group.dev.dependencies]
flake8 = "^7.0"
pytest = "^8.0"
pytest-asyncio = ">=0.24.0"
pytest-xdist = "^3.6.1"
pytest-json-report = "^1.5.0"
python-dotenv = "*"
requests = "^2.32.3"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"

[tool.pytest.ini_options]
norecursedirs = ["combined", "scripts"]

markers = [
"test: standard test",
"unit: Unit tests",
"integration: Integration tests",
"acceptance: Acceptance tests",
"experimental: Experimental tests"
]
log_cli = true
log_cli_level = "INFO"
log_cli_format = "%(asctime)s [%(levelname)s] %(message)s"
log_cli_date_format = "%Y-%m-%d %H:%M:%S"
asyncio_default_fixture_loop_scope = "function"

[tool.poetry.plugins."swarmauri.parsers"]
KeywordExtractorParser = "swarmauri_parser_keywordextractor:KeywordExtractorParser"
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import yake
from typing import List, Union, Any, Literal
from pydantic import ConfigDict, PrivateAttr
from swarmauri_standard.documents.Document import Document
from swarmauri_base.parsers.ParserBase import ParserBase
from swarmauri_core.ComponentBase import ComponentBase


@ComponentBase.register_type(ParserBase, "KeywordExtractorParser")
class KeywordExtractorParser(ParserBase):
"""
Extracts keywords from text using the YAKE keyword extraction library.
"""

lang: str = "en"
num_keywords: int = 10
_kw_extractor: yake.KeywordExtractor = PrivateAttr(default=None)
model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
type: Literal["KeywordExtractorParser"] = "KeywordExtractorParser"

def __init__(self, **data):
super().__init__(**data)
self._kw_extractor = yake.KeywordExtractor(
lan=self.lang,
n=3,
dedupLim=0.9,
dedupFunc="seqm",
windowsSize=1,
top=self.num_keywords,
features=None,
)

def parse(self, data: Union[str, Any]) -> List[Document]:
"""
Extract keywords from input text and return as list of Document instances containing keyword information.
Parameters:
- data (Union[str, Any]): The input text from which to extract keywords.
Returns:
- List[Document]: A list of Document instances, each containing information about an extracted keyword.
"""
# Ensure data is in string format for analysis
text = str(data) if not isinstance(data, str) else data

# Extract keywords using YAKE
keywords = self._kw_extractor.extract_keywords(text)

# Create Document instances for each keyword
documents = [
Document(content=keyword, metadata={"score": score})
for index, (keyword, score) in enumerate(keywords)
]

return documents
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from .KeywordExtractorParser import KeywordExtractorParser

__version__ = "0.6.0.dev26"
__long_desc__ = """
# Swarmauri Keyword Extractor Plugin
This repository includes an Keyword Extractor of a Swarmauri Plugin.
Visit us at: https://swarmauri.com
Follow us at: https://github.com/swarmauri
Star us at: https://github.com/swarmauri/swarmauri-sdk
"""
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import pytest
from swarmauri_parser_keywordextractor.KeywordExtractorParser import (
KeywordExtractorParser as Parser,
)


@pytest.mark.unit
def test_ubc_resource():
parser = Parser()
assert parser.resource == "Parser"


@pytest.mark.unit
def test_ubc_type():
parser = Parser()
assert parser.type == "KeywordExtractorParser"


@pytest.mark.unit
def test_serialization():
parser = Parser()
assert parser.id == Parser.model_validate_json(parser.model_dump_json()).id


@pytest.mark.unit
def test_parse():
assert Parser().parse("test two burgers")[2].resource == "Document"
assert Parser().parse("test two burgers")[2].content == "burgers"
2 changes: 1 addition & 1 deletion pkgs/standards/swarmauri_standard/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ full = [
"aiofiles", "aiohttp",
#"cohere", "mistralai", "fal-client", "google-generativeai", "openai",
#"nltk", "textblob",
"yake",
#"yake",
"beautifulsoup4",
"scikit-learn",
#"gensim", "scipy", "scikit-learn",
Expand Down
1 change: 1 addition & 0 deletions pkgs/swarmauri/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ full = [
]
doc2vecvectorstore = ["swarmauri_doc2vec_vectorstore"]
matplotlib_tool = ["swarmauri_tool_matplotlib"]
keywordextractor_parser = ["swarmauri_parser_keywordextractor"]

[tool.setuptools]
namespace_packages = ["swarmauri"]
Expand Down
3 changes: 2 additions & 1 deletion pkgs/swarmauri/swarmauri/plugin_citizenship_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ class PluginCitizenshipRegistry:
"swarmauri.parsers.BeautifulSoupElementParser": "swarmauri_standard.parsers.BeautifulSoupElementParser",
"swarmauri.parsers.CSVParser": "swarmauri_standard.parsers.CSVParser",
"swarmauri.parsers.HTMLTagStripParser": "swarmauri_standard.parsers.HTMLTagStripParser",
"swarmauri.parsers.KeywordExtractorParser": "swarmauri_standard.parsers.KeywordExtractorParser",
# "swarmauri.parsers.KeywordExtractorParser": "swarmauri_standard.parsers.KeywordExtractorParser",
"swarmauri.parsers.Md2HtmlParser": "swarmauri_standard.parsers.Md2HtmlParser",
"swarmauri.parsers.OpenAPISpecParser": "swarmauri_standard.parsers.OpenAPISpecParser",
"swarmauri.parsers.PhoneNumberExtractorParser": "swarmauri_standard.parsers.PhoneNumberExtractorParser",
Expand Down Expand Up @@ -226,6 +226,7 @@ class PluginCitizenshipRegistry:
"swarmauri.vector_stores.Doc2VecEmbedding": "swarmauri_vectorstore_doc2vec.Doc2VecEmbedding",
"swarmauri.tools.MatplotlibCsvTool": "swarmauri_tool_matplotlib.MatplotlibCsvTool",
"swarmauri.tools.MatplotlibTool": "swarmauri_tool_matplotlib.MatplotlibTool",
"swarmauri.parsers.KeywordExtractorParser": "swarmauri_parser_keywordextractor.KeywordExtractorParser",
}
SECOND_CLASS_REGISTRY: Dict[str, str] = {}
THIRD_CLASS_REGISTRY: Dict[str, str] = {}
Expand Down

0 comments on commit ce48530

Please sign in to comment.