Skip to content

Commit

Permalink
Solutions for challange01
Browse files Browse the repository at this point in the history
  • Loading branch information
Matthias Fasching authored and fasmat committed Mar 1, 2024
1 parent 8dba011 commit 48cc814
Show file tree
Hide file tree
Showing 4 changed files with 209 additions and 3 deletions.
53 changes: 53 additions & 0 deletions challenge01/challenge01_solution_01.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package challenge01

import (
"fmt"

"dojo/challenge01/fetcher"
)

type Crawler01 struct {
fetched map[string]bool
}

func New01() *Crawler01 {
return &Crawler01{
fetched: make(map[string]bool),
}
}

func NewWithRateLimit01(...interface{}) Crawler {
// Needed for TODO3
return 0
}

// Crawl uses fetcher to recursively crawl
// pages starting with url, to a maximum of depth.
func (c *Crawler01) Crawl(url string, depth int, fetcher fetcher.Fetcher) {
if depth <= 0 {
return
}
if c.checkFetched(url) {
fmt.Printf("skipping: %s\n", url)
return
}
c.fetched[url] = true

body, urls, err := fetcher.Fetch(url)
if err != nil {
fmt.Println(err)
return
}
fmt.Printf("found: %s %q\n", url, body)
for _, u := range urls {
c.Crawl(u, depth-1, fetcher)
}
}

func (c *Crawler01) checkFetched(url string) bool {
if c.fetched[url] {
return true
}
c.fetched[url] = true
return false
}
71 changes: 71 additions & 0 deletions challenge01/challenge01_solution_02.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
package challenge01

import (
"fmt"
"sync"

"dojo/challenge01/fetcher"
)

type Crawler02 struct {
fetched map[string]bool
wg *sync.WaitGroup
mu sync.Mutex
}

func New02() *Crawler02 {
c := &Crawler02{
fetched: make(map[string]bool),
wg: new(sync.WaitGroup),
}
c.wg.Add(1)
return c
}

func NewWithRateLimit02(...interface{}) Crawler {
// Needed for TODO3
return 0
}

// Crawl uses fetcher to recursively crawl
// pages starting with url, to a maximum of depth.
func (c *Crawler02) Crawl(url string, depth int, fetcher fetcher.Fetcher) {
go c.crawHandler(url, depth, fetcher)

c.wg.Wait()
}

func (c *Crawler02) crawHandler(url string, depth int, fetcher fetcher.Fetcher) {
defer c.wg.Done()
if depth <= 0 {
return
}

if c.checkFetched(url) {
fmt.Printf("skipping: %s\n", url)
return
}

body, urls, err := fetcher.Fetch(url)
if err != nil {
fmt.Println(err)
return
}
fmt.Printf("found: %s %q\n", url, body)

c.wg.Add(len(urls))
for _, u := range urls {
go c.Crawl(u, depth-1, fetcher)
}
}

func (c *Crawler02) checkFetched(url string) bool {
c.mu.Lock()
defer c.mu.Unlock()

if c.fetched[url] {
return true
}
c.fetched[url] = true
return false
}
82 changes: 82 additions & 0 deletions challenge01/challenge01_solution_03.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
package challenge01

import (
"fmt"
"sync"
"time"

"dojo/challenge01/fetcher"
)

type Crawler03 struct {
fetched map[string]bool
wg *sync.WaitGroup
mu sync.Mutex
ticker *time.Ticker
}

func New03() *Crawler03 {
c := &Crawler03{
fetched: make(map[string]bool),
wg: new(sync.WaitGroup),
}
c.wg.Add(1)
return c
}

func NewWithRateLimit03(rate time.Duration) *Crawler03 {
c := &Crawler03{
fetched: make(map[string]bool),
wg: new(sync.WaitGroup),
ticker: time.NewTicker(rate),
}
c.wg.Add(1)
return c
}

// Crawl uses fetcher to recursively crawl
// pages starting with url, to a maximum of depth.
func (c *Crawler03) Crawl(url string, depth int, fetcher fetcher.Fetcher) {
go c.crawHandler(url, depth, fetcher)

c.wg.Wait()
}

func (c *Crawler03) crawHandler(url string, depth int, fetcher fetcher.Fetcher) {
defer c.wg.Done()
if depth <= 0 {
return
}

if c.checkFetched(url) {
fmt.Printf("skipping: %s\n", url)
return
}

if c.ticker != nil {
<-c.ticker.C
}

body, urls, err := fetcher.Fetch(url)
if err != nil {
fmt.Println(err)
return
}
fmt.Printf("found: %s %q\n", url, body)

c.wg.Add(len(urls))
for _, u := range urls {
go c.Crawl(u, depth-1, fetcher)
}
}

func (c *Crawler03) checkFetched(url string) bool {
c.mu.Lock()
defer c.mu.Unlock()

if c.fetched[url] {
return true
}
c.fetched[url] = true
return false
}
6 changes: 3 additions & 3 deletions challenge01/challenge01_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ func Test_Challenge01_00_Basic_Functionality(t *testing.T) {
func Test_Challenge01_01_Do_not_Fetch_URLs_Twice(t *testing.T) {
f := fetcher.Distinct()

c := New()
c := New01()
c.Crawl("https://golang.org/", 4, f)

assert.True(t, f.Completed(), "Not all URLs fetched")
Expand All @@ -36,7 +36,7 @@ func Test_Challenge01_02_Be_More_Efficient(t *testing.T) {

done := make(chan bool)
go func() {
c := New()
c := New02()
c.Crawl("https://golang.org/", 4, f)
close(done)
}()
Expand All @@ -56,7 +56,7 @@ func Test_Challenge01_02_Be_More_Efficient(t *testing.T) {
func Test_Challenge01_03_RateLimit_Requests(t *testing.T) {
f := fetcher.RateLimited()

c := NewWithRateLimit(2 * time.Second)
c := NewWithRateLimit03(2 * time.Second)
c.Crawl("https://golang.org/", 4, f)

assert.True(t, f.Completed(), "Not all URLs fetched")
Expand Down

0 comments on commit 48cc814

Please sign in to comment.