-
-
Notifications
You must be signed in to change notification settings - Fork 6
/
config.yaml
110 lines (89 loc) · 4.11 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# Configuration file for UglyFeed
# GitHub Repository: https://github.com/fabriziosalmi/UglyFeed
# Documentation: https://github.com/fabriziosalmi/UglyFeed/docs
# source feeds to process
input_feeds_path: "input/feeds.txt"
# Pre-processing source feeds
similarity_threshold: 0.50 # Minimum similarity score to consider a match
preprocessing:
remove_html: true # Remove HTML tags from the text
lowercase: true # Convert text to lowercase
remove_punctuation: true # Remove punctuation from the text
lemmatization: true # Apply lemmatization
use_stemming: false # Do not use stemming
additional_stopwords: [] # List of additional stopwords to remove
vectorization:
method: 'tfidf' # Method for text vectorization, can be 'tfidf' only atm
ngram_range: [1, 2] # Consider unigrams and bigrams
max_df: 0.85 # Max document frequency for terms
min_df: 0.01 # Min document frequency for terms
max_features: 5000 # Maximum number of features to extract
similarity_options:
method: 'dbscan' # Clustering method: 'dbscan', 'kmeans', 'agglomerative'
eps: 0.5 # DBSCAN: The maximum distance between two samples for one to be considered as in the neighborhood of the other
min_samples: 2 # DBSCAN: The number of samples in a neighborhood for a point to be considered as a core point
n_clusters: 5 # KMeans and Agglomerative: Number of clusters
linkage: 'average' # Agglomerative: Linkage criterion
# API configuration settings (llm_processor.py)
# Only one API configuration should be active at any time.
api_config:
selected_api: "Groq" # Active API: "OpenAI", "Groq", "Anthropic", "Ollama"
# Example configurations for different APIs:
# OpenAI API configuration
# openai_api_url: "https://api.openai.com/v1/chat/completions"
# openai_api_key: "your_openai_api_key"
# openai_model: "gpt-3.5-turbo"
# Groq API configuration
groq_api_url: "https://api.groq.com/openai/v1/chat/completions"
groq_api_key: "your_groq_api_key"
groq_model: "llama3-8b-8192"
# Anthropic API configuration
# anthropic_api_key: "your_anthropic_api_key"
# anthropic_api_url: "https://api.anthropic.com/v1/messages"
# anthropic_model: "claude-3-haiku-20240307"
# Ollama API configuration
# ollama_api_url: "http://localhost:11434/api/chat"
# ollama_model: "phi3"
# Folder configuration settings
folders:
output_folder: "output" # Directory for storing aggregated feeds
rewritten_folder: "rewritten" # Directory for storing rewritten content
# content_prefix moved to external file and prompt_file option
prompt_file: "prompt_IT.txt"
# prompt_file: "prompt_EN.txt"
# You can find more prompts in the prompts folder
# Limit settings for content retrieval and processing (json2rss.py)
max_items: 50 # Maximum number of items to process
max_age_days: 10 # Maximum age of items in days to be considered
feed_title: "UglyFeed RSS"
feed_link: "https://github.com/fabriziosalmi/UglyFeed"
feed_description: "A dynamically generated feed using UglyFeed."
feed_language: "it"
feed_self_link: "https://raw.githubusercontent.com/fabriziosalmi/UglyFeed/main/examples/uglyfeed-source-1.xml"
author: "UglyFeed"
category: "Technology"
copyright: "UglyFeed"
# Moderation settings (replace bad words listed in the words_file with *****)
moderation:
enabled: false # Enable or disable content moderation
words_file: 'moderation/IT.txt' # File containing words to be moderated
allow_duplicates: false # Remove source links repetitions
# Scheduler settings
scheduling_enabled: false # Enable or disable scheduling
scheduling_interval: 4 # Interval for scheduling
scheduling_period: hours # Period unit for scheduling (e.g., hours, minutes)
# Custom HTTP server port
http_server_port: 8001 # Port for the HTTP server
# Deployment settings (deploy_xml.py)
github_token: "your_github_token"
gitlab_token: "your_gitlab_token"
github_repo: "your_github_username/uglyfeed-cdn"
gitlab_repo: "your_gitlab_username/uglyfeed-cdn"
enable_github: false # Enable or disable GitHub deployment
enable_gitlab: false # Enable or disable GitLab deployment
# TTS
tts_input: "rewritten"
tts_output: "media"
tts_language: "it"
speed_factor: 1.25
enable_tts: false