From f67ca5c03c7d39af2d6dee144e7fd78e27603159 Mon Sep 17 00:00:00 2001
From: Keith Gaughan <k@stereochro.me>
Date: Tue, 17 Dec 2024 21:25:45 +0000
Subject: [PATCH] Allow for the configuration of the size of the fetch queue
 and workers

The defaults should be fine in 99% of cases, but some may want to tune
them down on more powerful machines.
---
 cmd/mercury/main.go           |  2 +-
 internal/config.go            | 40 +++++++++++++++++++++++------------
 internal/manifest/manifest.go |  9 +++-----
 3 files changed, 31 insertions(+), 20 deletions(-)

diff --git a/cmd/mercury/main.go b/cmd/mercury/main.go
index 0333ccf..587545e 100644
--- a/cmd/mercury/main.go
+++ b/cmd/mercury/main.go
@@ -67,7 +67,7 @@ func main() {
 	// Populate the manifest with the contents of the config file
 	manifest.Populate(config.Feeds)
 	if !*flags.NoFetch {
-		manifest.Prime(config.Cache, config.Timeout.Duration)
+		manifest.Prime(config.Cache, config.Timeout.Duration, config.Parallelism, config.JobQueueDepth)
 	}
 	if err := manifest.Save(manifestPath); err != nil {
 		log.Fatal(err)
diff --git a/internal/config.go b/internal/config.go
index 60b6a33..0436da8 100644
--- a/internal/config.go
+++ b/internal/config.go
@@ -5,6 +5,7 @@ import (
 	"io/fs"
 	"os"
 	"path/filepath"
+	"runtime"
 
 	"github.com/BurntSushi/toml"
 	"github.com/kgaughan/mercury/internal/manifest"
@@ -14,19 +15,21 @@ import (
 
 // Config describes our configuration.
 type Config struct {
-	Name         string          `toml:"name"`
-	URL          string          `toml:"url"`
-	Owner        string          `toml:"owner"`
-	Email        string          `toml:"email"`
-	FeedID       string          `toml:"feed_id"`
-	Cache        string          `toml:"cache"`
-	Timeout      utils.Duration  `toml:"timeout"`
-	themePath    string          `toml:"theme"`
-	Theme        fs.FS           `toml:"-"`
-	Output       string          `toml:"output"`
-	Feeds        []manifest.Feed `toml:"feed"`
-	ItemsPerPage int             `toml:"items"`
-	MaxPages     int             `toml:"max_pages"`
+	Name          string          `toml:"name"`
+	URL           string          `toml:"url"`
+	Owner         string          `toml:"owner"`
+	Email         string          `toml:"email"`
+	FeedID        string          `toml:"feed_id"`
+	Cache         string          `toml:"cache"`
+	Timeout       utils.Duration  `toml:"timeout"`
+	themePath     string          `toml:"theme"`
+	Theme         fs.FS           `toml:"-"`
+	Output        string          `toml:"output"`
+	Feeds         []manifest.Feed `toml:"feed"`
+	ItemsPerPage  int             `toml:"items"`
+	MaxPages      int             `toml:"max_pages"`
+	JobQueueDepth int             `toml:"job_queue_depth"`
+	Parallelism   int             `toml:"parallelism"`
 }
 
 // Load loads our configuration file.
@@ -37,6 +40,9 @@ func (c *Config) Load(path string) error {
 	c.Output = "./output"
 	c.ItemsPerPage = 10
 	c.MaxPages = 5
+	// These are both somewhat arbitrary
+	c.JobQueueDepth = 2 * runtime.NumCPU()
+	c.Parallelism = runtime.NumCPU()
 
 	if _, err := toml.DecodeFile(path, c); err != nil {
 		return fmt.Errorf("cannot load configuration: %w", err)
@@ -47,6 +53,14 @@ func (c *Config) Load(path string) error {
 		return fmt.Errorf("cannot normalize configuration path: %w", err)
 	}
 
+	// Enforce some sensible lower bounds on feed fetching parallelism
+	if c.Parallelism < 1 {
+		c.Parallelism = 1
+	}
+	if c.JobQueueDepth < c.Parallelism {
+		c.JobQueueDepth = c.Parallelism
+	}
+
 	c.Cache = filepath.Join(configDir, c.Cache)
 	if c.themePath == "" {
 		c.Theme = dflt.Theme
diff --git a/internal/manifest/manifest.go b/internal/manifest/manifest.go
index 01b34fe..7501092 100644
--- a/internal/manifest/manifest.go
+++ b/internal/manifest/manifest.go
@@ -5,7 +5,6 @@ import (
 	"fmt"
 	"log"
 	"os"
-	"runtime"
 	"sync"
 	"time"
 
@@ -56,13 +55,11 @@ func (m *Manifest) Save(path string) error {
 	return nil
 }
 
-func (m *Manifest) Prime(cache string, timeout time.Duration) {
+func (m *Manifest) Prime(cache string, timeout time.Duration, parallelism, jobQueueDepth int) {
 	var wg sync.WaitGroup
 
-	// The channel depth is kind of arbitrary.
-	jobs := make(chan *fetchJob, 2*runtime.NumCPU())
-
-	for i := 0; i < runtime.NumCPU(); i++ {
+	jobs := make(chan *fetchJob, jobQueueDepth)
+	for i := 0; i < parallelism; i++ {
 		wg.Add(1)
 		go func() {
 			defer wg.Done()