Floki
+ Hello + Bye + + onigumo +diff --git a/lib/spider_html.ex b/lib/spider_html.ex new file mode 100644 index 0000000..c3d3557 --- /dev/null +++ b/lib/spider_html.ex @@ -0,0 +1,7 @@ +defmodule Spider.HTML do + def find_links(document) do + Floki.parse_document!(document) + |> Floki.find("a") + |> Floki.attribute("href") + end +end diff --git a/mix.exs b/mix.exs index 6bfe9ac..77e7906 100644 --- a/mix.exs +++ b/mix.exs @@ -25,7 +25,10 @@ defmodule Onigumo.MixProject do # {:dep_from_hexpm, "~> 0.3.0"}, # {:dep_from_git, git: "https://github.com/elixir-lang/my_dep.git", tag: "0.1.0"}, {:httpoison, "~> 1.8"}, - {:mox, "~> 1.0", only: :test} + {:mox, "~> 1.0", only: :test}, + + # Spider toolbox dependencies + {:floki, "~> 0.32"} ] end diff --git a/mix.lock b/mix.lock index e7e0922..e318bc3 100644 --- a/mix.lock +++ b/mix.lock @@ -1,6 +1,8 @@ %{ "certifi": {:hex, :certifi, "2.8.0", "d4fb0a6bb20b7c9c3643e22507e42f356ac090a1dcea9ab99e27e0376d695eba", [:rebar3], [], "hexpm", "6ac7efc1c6f8600b08d625292d4bbf584e14847ce1b6b5c44d983d273e1097ea"}, + "floki": {:hex, :floki, "0.32.1", "dfe3b8db3b793939c264e6f785bca01753d17318d144bd44b407fb3493acaa87", [:mix], [{:html_entities, "~> 0.5.0", [hex: :html_entities, repo: "hexpm", optional: false]}], "hexpm", "d4b91c713e4a784a3f7b1e3cc016eefc619f6b1c3898464222867cafd3c681a3"}, "hackney": {:hex, :hackney, "1.18.0", "c4443d960bb9fba6d01161d01cd81173089686717d9490e5d3606644c48d121f", [:rebar3], [{:certifi, "~>2.8.0", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "~>6.1.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "~>1.0.0", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:parse_trans, "3.3.1", [hex: :parse_trans, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "~>1.1.0", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}, {:unicode_util_compat, "~>0.7.0", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "9afcda620704d720db8c6a3123e9848d09c87586dc1c10479c42627b905b5c5e"}, + "html_entities": {:hex, :html_entities, "0.5.2", "9e47e70598da7de2a9ff6af8758399251db6dbb7eebe2b013f2bbd2515895c3c", [:mix], [], "hexpm", "c53ba390403485615623b9531e97696f076ed415e8d8058b1dbaa28181f4fdcc"}, "httpoison": {:hex, :httpoison, "1.8.0", "6b85dea15820b7804ef607ff78406ab449dd78bed923a49c7160e1886e987a3d", [:mix], [{:hackney, "~> 1.17", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm", "28089eaa98cf90c66265b6b5ad87c59a3729bea2e74e9d08f9b51eb9729b3c3a"}, "idna": {:hex, :idna, "6.1.1", "8a63070e9f7d0c62eb9d9fcb360a7de382448200fbbd1b106cc96d3d8099df8d", [:rebar3], [{:unicode_util_compat, "~>0.7.0", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "92376eb7894412ed19ac475e4a86f7b413c1b9fbb5bd16dccd57934157944cea"}, "metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm", "69b09adddc4f74a40716ae54d140f93beb0fb8978d8636eaded0c31b6f099f16"}, diff --git a/test/spider_html_test.exs b/test/spider_html_test.exs new file mode 100644 index 0000000..fb18136 --- /dev/null +++ b/test/spider_html_test.exs @@ -0,0 +1,30 @@ +defmodule SpiderHtmlTest do + use ExUnit.Case + + @urls [ + "http://onigumo.local/hello.html", + "http://onigumo.local/bye.html" + ] + @html ~s( + +
+ + + +Floki
+ Hello + Bye + + onigumo +