-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ae3ae71
commit 54f31f8
Showing
6 changed files
with
256 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
name: Go | ||
|
||
on: [ push, pull_request ] | ||
jobs: | ||
|
||
build: | ||
name: Build | ||
runs-on: ubuntu-latest | ||
steps: | ||
|
||
- name: Set up Go 1.x | ||
uses: actions/setup-go@v2 | ||
with: | ||
go-version: ^1.21 | ||
id: go | ||
|
||
- name: Check out code into the Go module directory | ||
uses: actions/checkout@v2 | ||
|
||
- name: Get dependencies | ||
run: go mod download | ||
|
||
- name: Test | ||
run: go test -v ./... |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,13 @@ | ||
# pdfcopy | ||
Quickie script to take screenshots of URLs for copyright submissions | ||
# pdfcopy [![GoDoc](https://godoc.org/github.com/spotlightpa/pdfcopy?status.svg)](https://godoc.org/github.com/spotlightpa/pdfcopy) [![Go Report Card](https://goreportcard.com/badge/github.com/spotlightpa/pdfcopy)](https://goreportcard.com/report/github.com/spotlightpa/pdfcopy) | ||
|
||
Quickie script to take screenshots of URLs for copyright submissions. | ||
|
||
## Installation | ||
|
||
First install [Go](http://golang.org). | ||
|
||
If you just want to install the binary to your current directory and don't care about the source code, run | ||
|
||
```bash | ||
GOBIN="$(pwd)" go install github.com/spotlightpa/pdfcopy@latest | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,180 @@ | ||
package app | ||
|
||
import ( | ||
"context" | ||
"crypto/md5" | ||
"errors" | ||
"flag" | ||
"fmt" | ||
"io" | ||
"io/fs" | ||
"log" | ||
"os" | ||
"os/exec" | ||
"os/signal" | ||
"path/filepath" | ||
|
||
"github.com/carlmjohnson/csv" | ||
"github.com/carlmjohnson/flagx" | ||
"github.com/carlmjohnson/flagx/lazyio" | ||
"github.com/carlmjohnson/flowmatic" | ||
"github.com/carlmjohnson/versioninfo" | ||
) | ||
|
||
const AppName = "pdfcopy" | ||
|
||
func CLI(args []string) error { | ||
var app appEnv | ||
err := app.ParseArgs(args) | ||
if err != nil { | ||
return err | ||
} | ||
if err = app.Exec(); err != nil { | ||
fmt.Fprintf(os.Stderr, "Error: %v\n", err) | ||
} | ||
return err | ||
} | ||
|
||
func (app *appEnv) ParseArgs(args []string) error { | ||
fl := flag.NewFlagSet(AppName, flag.ContinueOnError) | ||
src := lazyio.FileOrURL(lazyio.StdIO, nil) | ||
app.src = src | ||
fl.Var(src, "src", "source file or URL") | ||
fl.StringVar(&app.dst, "dst", "output.pdf", "destination `filepath`") | ||
fl.StringVar(&app.temp, "temp", "", "temporary `filepath` for downloads and intermediate PDFs") | ||
fl.IntVar(&app.maxProcs, "workers", 10, "number of workers") | ||
app.Logger = log.New(os.Stderr, AppName+" ", log.LstdFlags) | ||
flagx.BoolFunc(fl, "silent", "log debug output", func() error { | ||
app.Logger.SetOutput(io.Discard) | ||
return nil | ||
}) | ||
fl.Usage = func() { | ||
fmt.Fprintf(fl.Output(), `copyrightpdfs - %s | ||
Download stuff and screenshot it | ||
Usage: | ||
copyrightpdfs [options] | ||
Options: | ||
`, versioninfo.Version) | ||
fl.PrintDefaults() | ||
} | ||
versioninfo.AddFlag(fl) | ||
if err := fl.Parse(args); err != nil { | ||
return err | ||
} | ||
if err := flagx.ParseEnv(fl, AppName); err != nil { | ||
return err | ||
} | ||
return nil | ||
} | ||
|
||
type appEnv struct { | ||
src io.ReadCloser | ||
temp, dst string | ||
maxProcs int | ||
*log.Logger | ||
} | ||
|
||
func (app *appEnv) Exec() (err error) { | ||
// Open list of URLs | ||
urls, err := app.readURLs() | ||
if err != nil { | ||
return err | ||
} | ||
if app.temp == "" { | ||
// Make temp directory | ||
tempdir, err := os.MkdirTemp("", "") | ||
if err != nil { | ||
return err | ||
} | ||
app.temp = tempdir | ||
} | ||
app.Logger.Printf("tempdir %q", app.temp) | ||
|
||
// Start some Flowmatic groups | ||
ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt) | ||
defer stop() | ||
|
||
err = flowmatic.Each(app.maxProcs, urls, func(url string) error { | ||
return app.buildPDF(ctx, url) | ||
}) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
// Once they're all done' | ||
// pdftk ./*.pdf cat output merged.pdf | ||
// TODO: The order of PDFs is random. Fix that somehow | ||
args, err := filepath.Glob(filepath.Join(app.temp, "*.pdf")) | ||
if err != nil { | ||
return err | ||
} | ||
args = append(args, "cat", "output", app.dst) | ||
cmd := exec.CommandContext(ctx, "pdftk", args...) | ||
cmd.Stdout = os.Stdout | ||
cmd.Stderr = os.Stderr | ||
app.Logger.Printf("pdftk cat %s", app.dst) | ||
if err := cmd.Run(); err != nil { | ||
return err | ||
} | ||
|
||
return err | ||
} | ||
|
||
func (app *appEnv) readURLs() ([]string, error) { | ||
var urls []string | ||
fr := csv.NewFieldReader(app.src) | ||
for fr.Scan() { | ||
urls = append(urls, fr.Field("url")) | ||
} | ||
return urls, fr.Err() | ||
} | ||
|
||
func (app *appEnv) buildPDF(ctx context.Context, url string) error { | ||
hash := md5.Sum([]byte(url)) | ||
png := fmt.Sprintf("%0x.png", hash) | ||
pdf := fmt.Sprintf("%0x.pdf", hash) | ||
|
||
// Skip if stat file | ||
_, err := os.Stat(filepath.Join(app.temp, png)) | ||
switch { | ||
case err == nil: | ||
app.Logger.Printf("have %s", png) | ||
case !errors.Is(err, fs.ErrNotExist): | ||
return err | ||
default: | ||
app.Logger.Printf("start %0x from %q", hash, url) | ||
// TODO retry in loop | ||
cmd := exec.CommandContext(ctx, "shot-scraper", "--reduced-motion", | ||
// TODO figure out whether to use #content or not | ||
"-s", "#content", | ||
"-p", "16", "--output", png, url) | ||
cmd.Dir = app.temp | ||
cmd.Stderr = os.Stderr | ||
cmd.Stdout = os.Stdout | ||
if err := cmd.Run(); err != nil { | ||
// mark this up | ||
return fmt.Errorf("problem with %q from %q: %w", png, url, err) | ||
} | ||
} | ||
// Skip if stat file | ||
_, err = os.Stat(filepath.Join(app.temp, pdf)) | ||
switch { | ||
case err == nil: | ||
app.Logger.Printf("have %s", pdf) | ||
return nil | ||
case !errors.Is(err, fs.ErrNotExist): | ||
return err | ||
default: | ||
cmd := exec.CommandContext(ctx, "convert", png, pdf) | ||
cmd.Dir = app.temp | ||
if err := cmd.Run(); err != nil { | ||
return err | ||
} | ||
} | ||
app.Logger.Printf("done %0x from %q", hash, url) | ||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
module github.com/spotlightpa/pdfcopy | ||
|
||
go 1.21 | ||
|
||
require ( | ||
github.com/carlmjohnson/csv v1.20.0 | ||
github.com/carlmjohnson/exitcode v0.20.2 | ||
github.com/carlmjohnson/flagx v0.22.2 | ||
github.com/carlmjohnson/flowmatic v0.23.4 | ||
github.com/carlmjohnson/versioninfo v0.22.5 | ||
) | ||
|
||
require github.com/carlmjohnson/deque v0.23.1 // indirect |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
github.com/carlmjohnson/be v0.22.3 h1:XwpxXz+wHvZ6O+i/IxcVQaGinsDkF99bpq0VXno6Voc= | ||
github.com/carlmjohnson/be v0.22.3/go.mod h1:KAgPUh0HpzWYZZI+IABdo80wTgY43YhbdsiLYAaSI/Q= | ||
github.com/carlmjohnson/csv v1.20.0 h1:/QBKcZdJY0FMerK6m+uMgZRwmQ33Uj5NBqkqcitstcY= | ||
github.com/carlmjohnson/csv v1.20.0/go.mod h1:I2Bsj5YPd8aGaLfXelcf4ANJpFlrPYkgpJnwCsTdNbI= | ||
github.com/carlmjohnson/deque v0.23.1 h1:X2HOJM9xcglY03deMZ0oZ1V2xtbqYV7dJDnZiSZN4Ak= | ||
github.com/carlmjohnson/deque v0.23.1/go.mod h1:LF5NJjICBrEOPx84pxPL4nCimy5n9NQjxKi5cXkh+8U= | ||
github.com/carlmjohnson/exitcode v0.20.2 h1:vE6rmkCGNA4kO4m1qwWIa77PKlUBVg46cNjs22eAOXE= | ||
github.com/carlmjohnson/exitcode v0.20.2/go.mod h1:MZ6ThCDx517DQcrpYnnns1pLh8onjFl+B/AsrOrdmpc= | ||
github.com/carlmjohnson/flagx v0.22.2 h1:UXf7gL4Ffv5RIH/HKp8CGNzDyopgezFLrDO1m4F8jWc= | ||
github.com/carlmjohnson/flagx v0.22.2/go.mod h1:obobISvBnxgEXPLBITVXhRUOlSlzza1SGt34M64CPJc= | ||
github.com/carlmjohnson/flowmatic v0.23.4 h1:SfK6f+zKUlw4aga1ph+7/csqVeUAWnBxfqKN5gvQzzs= | ||
github.com/carlmjohnson/flowmatic v0.23.4/go.mod h1:Jpvyl591Dvkt9chYpnVupjxlKvqkZ9CtCmqL4wfQD7U= | ||
github.com/carlmjohnson/versioninfo v0.22.5 h1:O00sjOLUAFxYQjlN/bzYTuZiS0y6fWDQjMRvwtKgwwc= | ||
github.com/carlmjohnson/versioninfo v0.22.5/go.mod h1:QT9mph3wcVfISUKd0i9sZfVrPviHuSF+cUtLjm2WSf8= |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
package main | ||
|
||
import ( | ||
"os" | ||
|
||
"github.com/carlmjohnson/exitcode" | ||
"github.com/spotlightpa/pdfcopy/app" | ||
) | ||
|
||
func main() { | ||
exitcode.Exit(app.CLI(os.Args[1:])) | ||
} |