Skip to content

Commit

Permalink
Merge branch 'master' into dependecies-update
Browse files Browse the repository at this point in the history
  • Loading branch information
nappex committed May 16, 2023
2 parents b80bb73 + f2c4aa4 commit 8b2e7d1
Show file tree
Hide file tree
Showing 11 changed files with 65 additions and 20 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,5 @@ onigumo-*.tar
# Temporary files, for example, from tests.
/tmp/

onigumo
# Ignore onigumo escript file
/onigumo
7 changes: 7 additions & 0 deletions lib/cli.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
defmodule Onigumo.CLI do
def main([component]) do
module = Module.safe_concat("Onigumo", component)
root_path = File.cwd!()
module.main(root_path)
end
end
6 changes: 3 additions & 3 deletions lib/onigumo_downloader.ex → lib/onigumo/downloader.ex
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ defmodule Onigumo.Downloader do
def main(root_path) do
http_client().start()

download_urls_from_file(root_path)
create_download_stream(root_path)
|> Stream.run()
end

def download_urls_from_file(root_path) do
def create_download_stream(root_path) do
root_path
|> load_urls()
|> Stream.map(&download_url(&1, root_path))
Expand Down Expand Up @@ -50,7 +50,7 @@ defmodule Onigumo.Downloader do
end

def create_file_name(url) do
Hash.md5(url, :hex)
Onigumo.Utilities.Hash.md5(url, :hex)
end

defp http_client() do
Expand Down
6 changes: 0 additions & 6 deletions lib/onigumo_cli.ex

This file was deleted.

7 changes: 7 additions & 0 deletions lib/spider/html.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
defmodule Onigumo.Spider.HTML do
def find_links(document) do
Floki.parse_document!(document)
|> Floki.find("a")
|> Floki.attribute("href")
end
end
2 changes: 1 addition & 1 deletion lib/hash.ex → lib/utilities/hash.ex
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
defmodule Hash do
defmodule Onigumo.Utilities.Hash do
def md5(data, fmt) do
hash(:md5, data)
|> format(fmt)
Expand Down
5 changes: 4 additions & 1 deletion mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ defmodule Onigumo.MixProject do
# {:dep_from_hexpm, "~> 0.3.0"},
# {:dep_from_git, git: "https://github.com/elixir-lang/my_dep.git", tag: "0.1.0"},
{:httpoison, "~> 1.8"},
{:mox, "~> 1.0", only: :test}
{:mox, "~> 1.0", only: :test},

# Spider toolbox dependencies
{:floki, "~> 0.32"}
]
end

Expand Down
3 changes: 3 additions & 0 deletions mix.lock
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
%{
<<<<<<< HEAD
"certifi": {:hex, :certifi, "2.9.0", "6f2a475689dd47f19fb74334859d460a2dc4e3252a3324bd2111b8f0429e7e21", [:rebar3], [], "hexpm", "266da46bdb06d6c6d35fde799bcb28d36d985d424ad7c08b5bb48f5b5cdd4641"},
"hackney": {:hex, :hackney, "1.18.1", "f48bf88f521f2a229fc7bae88cf4f85adc9cd9bcf23b5dc8eb6a1788c662c4f6", [:rebar3], [{:certifi, "~>2.9.0", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "~>6.1.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "~>1.0.0", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:parse_trans, "3.3.1", [hex: :parse_trans, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "~>1.1.0", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}, {:unicode_util_compat, "~>0.7.0", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "a4ecdaff44297e9b5894ae499e9a070ea1888c84afdd1fd9b7b2bc384950128e"},
"httpoison": {:hex, :httpoison, "1.8.2", "9eb9c63ae289296a544842ef816a85d881d4a31f518a0fec089aaa744beae290", [:mix], [{:hackney, "~> 1.17", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm", "2bb350d26972e30c96e2ca74a1aaf8293d61d0742ff17f01e0279fef11599921"},
"floki": {:hex, :floki, "0.32.1", "dfe3b8db3b793939c264e6f785bca01753d17318d144bd44b407fb3493acaa87", [:mix], [{:html_entities, "~> 0.5.0", [hex: :html_entities, repo: "hexpm", optional: false]}], "hexpm", "d4b91c713e4a784a3f7b1e3cc016eefc619f6b1c3898464222867cafd3c681a3"},
"html_entities": {:hex, :html_entities, "0.5.2", "9e47e70598da7de2a9ff6af8758399251db6dbb7eebe2b013f2bbd2515895c3c", [:mix], [], "hexpm", "c53ba390403485615623b9531e97696f076ed415e8d8058b1dbaa28181f4fdcc"},
"idna": {:hex, :idna, "6.1.1", "8a63070e9f7d0c62eb9d9fcb360a7de382448200fbbd1b106cc96d3d8099df8d", [:rebar3], [{:unicode_util_compat, "~>0.7.0", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "92376eb7894412ed19ac475e4a86f7b413c1b9fbb5bd16dccd57934157944cea"},
"metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm", "69b09adddc4f74a40716ae54d140f93beb0fb8978d8636eaded0c31b6f099f16"},
"mimerl": {:hex, :mimerl, "1.2.0", "67e2d3f571088d5cfd3e550c383094b47159f3eee8ffa08e64106cdf5e981be3", [:rebar3], [], "hexpm", "f278585650aa581986264638ebf698f8bb19df297f66ad91b18910dfc6e19323"},
Expand Down
8 changes: 4 additions & 4 deletions test/hash_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -53,28 +53,28 @@ defmodule HashTest do

for {data, hash_hex, _} <- @known_md5s do
test("hash MD5 #{inspect(data)} in hexadecimal") do
hash = Hash.md5(unquote(data), :hex)
hash = Onigumo.Utilities.Hash.md5(unquote(data), :hex)
assert(hash == unquote(hash_hex))
end
end

for {data, _, hash_bin} <- @known_md5s do
test("hash MD5 #{inspect(data)} in binary") do
hash = Hash.md5(unquote(data), :bin)
hash = Onigumo.Utilities.Hash.md5(unquote(data), :bin)
assert(hash == unquote(hash_bin))
end
end

for {format, hash} <- @formatted_hashes do
test("format #{inspect(@binary_hash)} in #{inspect(format)}") do
formatted = Hash.format(@binary_hash, unquote(format))
formatted = Onigumo.Utilities.Hash.format(@binary_hash, unquote(format))
assert(formatted == unquote(hash))
end
end

for {func, known_hash} <- @known_hashes do
test("hash #{inspect(@known_hash_data)} with #{inspect(func)}") do
computed_hash = Hash.hash(unquote(func), @known_hash_data)
computed_hash = Onigumo.Utilities.Hash.hash(unquote(func), @known_hash_data)
assert(computed_hash == unquote(known_hash))
end
end
Expand Down
8 changes: 4 additions & 4 deletions test/onigumo_downloader_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,17 @@ defmodule OnigumoDownloaderTest do
end
end

describe("Onigumo.Downloader.download_urls_from_file/1") do
describe("Onigumo.Downloader.create_download_stream/1") do
@tag :tmp_dir
test("download URLs from the input file", %{tmp_dir: tmp_dir}) do
test("download URLs from the input file with a created stream", %{tmp_dir: tmp_dir}) do
expect(HTTPoisonMock, :get!, length(@urls), &prepare_response/1)

input_path_env = Application.get_env(:onigumo, :input_path)
input_path_tmp = Path.join(tmp_dir, input_path_env)
input_file_content = prepare_input(@urls)
File.write!(input_path_tmp, input_file_content)

Onigumo.Downloader.download_urls_from_file(tmp_dir) |> Stream.run()
Onigumo.Downloader.create_download_stream(tmp_dir) |> Stream.run()

Enum.map(@urls, &assert_downloaded(&1, tmp_dir))
end
Expand Down Expand Up @@ -116,7 +116,7 @@ defmodule OnigumoDownloaderTest do
input_url = "https://onigumo.local/hello.html"
created_file_name = Onigumo.Downloader.create_file_name(input_url)

expected_file_name = Hash.md5(input_url, :hex)
expected_file_name = Onigumo.Utilities.Hash.md5(input_url, :hex)
assert(created_file_name == expected_file_name)
end
end
Expand Down
30 changes: 30 additions & 0 deletions test/spider_html_test.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
defmodule SpiderHtmlTest do
use ExUnit.Case

@urls [
"http://onigumo.local/hello.html",
"http://onigumo.local/bye.html"
]
@html ~s(<!doctype html>
<html>
<head>
<link href="/media/examples/link-element-example.css" rel="stylesheet">
</head>
<body>
<section id="content">
<p class="headline">Floki</p>
<a href="http://onigumo.local/hello.html">Hello</a>
<a href="http://onigumo.local/bye.html">Bye</a>
<a id="nothing"></a>
<span data-model="user">onigumo</span>
</section>
</body>
</html>)

describe("Onigumo.Spider.HTML.find_links/1") do
test("find links in href attributes of 'a' tags") do
links = Onigumo.Spider.HTML.find_links(@html)
assert links == @urls
end
end
end

0 comments on commit 8b2e7d1

Please sign in to comment.