From 4d43f5d947c73ef013f13969fdd2ba697eddb136 Mon Sep 17 00:00:00 2001 From: Neil Berkman Date: Wed, 11 Dec 2019 10:02:01 -0800 Subject: [PATCH 1/6] format code --- .formatter.exs | 5 ++ benchmark.exs | 2 +- lib/furlex.ex | 94 +++++++++++++++------------- lib/furlex/fetcher.ex | 15 +++-- lib/furlex/oembed.ex | 33 +++++----- lib/furlex/parser.ex | 48 ++++++++------ lib/furlex/parser/facebook.ex | 6 +- lib/furlex/parser/html.ex | 32 +++++----- lib/furlex/parser/json_ld.ex | 4 +- lib/furlex/parser/twitter.ex | 6 +- mix.exs | 10 ++- test/furlex/fetcher_test.exs | 10 +-- test/furlex/oembed_test.exs | 24 +++---- test/furlex/parser/facebook_test.exs | 15 +++-- test/furlex/parser/html_test.exs | 14 +++-- test/furlex/parser/json_ld_test.exs | 27 ++++---- test/furlex/parser/twitter_test.exs | 15 +++-- test/furlex/parser_test.exs | 45 ++++++------- test/furlex_test.exs | 19 +++--- test/test_helper.exs | 2 +- 20 files changed, 228 insertions(+), 198 deletions(-) create mode 100644 .formatter.exs diff --git a/.formatter.exs b/.formatter.exs new file mode 100644 index 0000000..3b4db1a --- /dev/null +++ b/.formatter.exs @@ -0,0 +1,5 @@ +[ + import_deps: [], + inputs: ["*.{ex,exs}", "{config,lib,test}/**/*.{ex,exs}"], + subdirectories: [] +] diff --git a/benchmark.exs b/benchmark.exs index 34c842c..28f77a5 100644 --- a/benchmark.exs +++ b/benchmark.exs @@ -1,7 +1,7 @@ ## Parsers ### Vimeo -vimeo = File.read! "./test/fixtures/vimeo.html" +vimeo = File.read!("./test/fixtures/vimeo.html") Benchee.run(%{ "facebook" => fn -> Furlex.Parser.Facebook.parse(vimeo) end, diff --git a/lib/furlex.ex b/lib/furlex.ex index 8d74e2e..134fb83 100644 --- a/lib/furlex.ex +++ b/lib/furlex.ex @@ -12,26 +12,33 @@ defmodule Furlex do alias Furlex.Parser.{Facebook, HTML, JsonLD, Twitter} defstruct [ - :canonical_url, :oembed, :facebook, :twitter, :json_ld, :other, :status_code + :canonical_url, + :oembed, + :facebook, + :twitter, + :json_ld, + :other, + :status_code ] @type t :: %__MODULE__{ - canonical_url: String.t, - oembed: nil | Map.t, - facebook: Map.t, - twitter: Map.t, - json_ld: List.t, - other: Map.t, - status_code: Integer.t, - } + canonical_url: String.t(), + oembed: nil | Map.t(), + facebook: Map.t(), + twitter: Map.t(), + json_ld: List.t(), + other: Map.t(), + status_code: Integer.t() + } @doc false def start(_type, _args) do import Supervisor.Spec - opts = [strategy: :one_for_one, name: Furlex.Supervisor] + opts = [strategy: :one_for_one, name: Furlex.Supervisor] + children = [ - worker(Furlex.Oembed, [[name: Furlex.Oembed]]), + worker(Furlex.Oembed, [[name: Furlex.Oembed]]) ] Supervisor.start_link(children, opts) @@ -45,32 +52,31 @@ defmodule Furlex do unfurl/2 also accepts a keyword list that will be passed to HTTPoison. """ - @spec unfurl(String.t, Keyword.t) :: {:ok, __MODULE__.t} | {:error, Atom.t} + @spec unfurl(String.t(), Keyword.t()) :: {:ok, __MODULE__.t()} | {:error, Atom.t()} def unfurl(url, opts \\ []) do with {:ok, {body, status_code}, oembed} <- fetch(url, opts), - {:ok, results} <- parse(body) - do - {:ok, %__MODULE__{ - canonical_url: Parser.extract_canonical(body), - oembed: oembed, - facebook: results.facebook, - twitter: results.twitter, - json_ld: results.json_ld, - other: results.other, - status_code: status_code, - }} + {:ok, results} <- parse(body) do + {:ok, + %__MODULE__{ + canonical_url: Parser.extract_canonical(body), + oembed: oembed, + facebook: results.facebook, + twitter: results.twitter, + json_ld: results.json_ld, + other: results.other, + status_code: status_code + }} end end defp fetch(url, opts) do - fetch = Task.async Fetcher, :fetch, [ url, opts ] - fetch_oembed = Task.async Fetcher, :fetch_oembed, [ url, opts ] - yield = Task.yield_many [fetch, fetch_oembed] - - with [ fetch, fetch_oembed ] <- yield, - {_fetch, {:ok, {:ok, body, status_code}}} <- fetch, - {_fetch_oembed, {:ok, {:ok, oembed}}} <- fetch_oembed - do + fetch = Task.async(Fetcher, :fetch, [url, opts]) + fetch_oembed = Task.async(Fetcher, :fetch_oembed, [url, opts]) + yield = Task.yield_many([fetch, fetch_oembed]) + + with [fetch, fetch_oembed] <- yield, + {_fetch, {:ok, {:ok, body, status_code}}} <- fetch, + {_fetch_oembed, {:ok, {:ok, oembed}}} <- fetch_oembed do {:ok, {body, status_code}, oembed} else _ -> {:error, :fetch_error} @@ -78,21 +84,21 @@ defmodule Furlex do end defp parse(body) do - parse = &Task.async(&1, :parse, [ body ]) + parse = &Task.async(&1, :parse, [body]) tasks = Enum.map([Facebook, Twitter, JsonLD, HTML], parse) - with [ facebook, twitter, json_ld, other ] <- Task.yield_many(tasks), - {_facebook, {:ok, {:ok, facebook}}} <- facebook, - {_twitter, {:ok, {:ok, twitter}}} <- twitter, - {_json_ld, {:ok, {:ok, json_ld}}} <- json_ld, - {_other, {:ok, {:ok, other}}} <- other - do - {:ok, %{ - facebook: facebook, - twitter: twitter, - json_ld: json_ld, - other: other - }} + with [facebook, twitter, json_ld, other] <- Task.yield_many(tasks), + {_facebook, {:ok, {:ok, facebook}}} <- facebook, + {_twitter, {:ok, {:ok, twitter}}} <- twitter, + {_json_ld, {:ok, {:ok, json_ld}}} <- json_ld, + {_other, {:ok, {:ok, other}}} <- other do + {:ok, + %{ + facebook: facebook, + twitter: twitter, + json_ld: json_ld, + other: other + }} else _ -> {:error, :parse_error} end diff --git a/lib/furlex/fetcher.ex b/lib/furlex/fetcher.ex index 540f9d6..58e90b6 100644 --- a/lib/furlex/fetcher.ex +++ b/lib/furlex/fetcher.ex @@ -12,32 +12,31 @@ defmodule Furlex.Fetcher do @doc """ Fetches a url and extracts the body """ - @spec fetch(String.t, List.t) :: {:ok, String.t, Integer.t} | {:error, Atom.t} + @spec fetch(String.t(), List.t()) :: {:ok, String.t(), Integer.t()} | {:error, Atom.t()} def fetch(url, opts \\ []) do case HTTPoison.get(url, [], opts) do {:ok, %{body: body, status_code: status_code}} -> {:ok, body, status_code} - other -> other + other -> other end end @doc """ Fetches oembed data for the given url """ - @spec fetch_oembed(String.t, List.t) :: {:ok, String.t} | {:ok, nil} | {:error, Atom.t} + @spec fetch_oembed(String.t(), List.t()) :: {:ok, String.t()} | {:ok, nil} | {:error, Atom.t()} def fetch_oembed(url, opts \\ []) do with {:ok, endpoint} <- Oembed.endpoint_from_url(url), - params = %{"url" => url}, - opts = Keyword.put(opts, :params, params), + params = %{"url" => url}, + opts = Keyword.put(opts, :params, params), {:ok, response} <- HTTPoison.get(endpoint, [], opts), - {:ok, body} <- @json_library.decode(response.body) - do + {:ok, body} <- @json_library.decode(response.body) do {:ok, body} else {:error, :no_oembed_provider} -> {:ok, nil} other -> - "Could not fetch oembed for #{inspect url}: #{inspect other}" + "Could not fetch oembed for #{inspect(url)}: #{inspect(other)}" |> Logger.error() {:ok, nil} diff --git a/lib/furlex/oembed.ex b/lib/furlex/oembed.ex index ab185be..2a01ae1 100644 --- a/lib/furlex/oembed.ex +++ b/lib/furlex/oembed.ex @@ -16,22 +16,24 @@ defmodule Furlex.Oembed do Soft fetch will fetch cached providers. Hard fetch requests providers from oembed.com and purges the cache. """ - @spec fetch_providers(Atom.t) :: {:ok, List.t} | {:error, Atom.t} + @spec fetch_providers(Atom.t()) :: {:ok, List.t()} | {:error, Atom.t()} def fetch_providers(type \\ :soft) + def fetch_providers(:hard) do case get("/providers.json") do {:ok, %{body: providers}} -> - GenServer.cast __MODULE__, {:providers, providers} + GenServer.cast(__MODULE__, {:providers, providers}) {:ok, providers} - other -> - Logger.error "Could not fetch providers: #{inspect other}" + other -> + Logger.error("Could not fetch providers: #{inspect(other)}") {:error, :fetch_error} end end + def fetch_providers(_soft) do case GenServer.call(__MODULE__, :providers) do - nil -> fetch_providers(:hard) + nil -> fetch_providers(:hard) providers -> {:ok, providers} end end @@ -47,10 +49,10 @@ defmodule Furlex.Oembed do iex> Oembed.endpoint_from_url "https://vimeo.com/88856141", %{"format" => "xml"} {:ok, "https://vimeo.com/api/oembed.xml"} """ - @spec endpoint_from_url(String.t, Map.t) :: {:ok, String.t} | {:error, Atom.t} + @spec endpoint_from_url(String.t(), Map.t()) :: {:ok, String.t()} | {:error, Atom.t()} def endpoint_from_url(url, params \\ %{"format" => "json"}, opts \\ []) do case provider_from_url(url, opts) do - nil -> + nil -> {:error, :no_oembed_provider} provider -> @@ -60,39 +62,38 @@ defmodule Furlex.Oembed do # Maps a url to a provider, or returns nil if no such provider exists defp provider_from_url(url, opts) do - fetch_type = - if Keyword.get(opts, :skip_cache?, false), do: :hard, else: :soft + fetch_type = if Keyword.get(opts, :skip_cache?, false), do: :hard, else: :soft {:ok, providers} = fetch_providers(fetch_type) case URI.parse(url) do - %URI{host: nil} -> + %URI{host: nil} -> nil %URI{host: host} -> - Enum.find providers, &host_matches?(host, &1) + Enum.find(providers, &host_matches?(host, &1)) end end defp endpoint_from_provider(provider, params) do - [ endpoint | _] = provider["endpoints"] + [endpoint | _] = provider["endpoints"] - url = endpoint["url"] + url = endpoint["url"] regex = ~r/{(.*?)}/ - url = Regex.replace regex, url, fn _, key -> params[key] end + url = Regex.replace(regex, url, fn _, key -> params[key] end) {:ok, url} end defp host_matches?(host, %{"provider_url" => provider_url}) do - Regex.match? ~r/https?:\/\/#{host}/, provider_url + Regex.match?(~r/https?:\/\/#{host}/, provider_url) end ## GenServer callbacks @doc false def start_link(opts \\ []) do - GenServer.start_link __MODULE__, nil, opts + GenServer.start_link(__MODULE__, nil, opts) end def init(state) do diff --git a/lib/furlex/parser.ex b/lib/furlex/parser.ex index e93f10c..362b310 100644 --- a/lib/furlex/parser.ex +++ b/lib/furlex/parser.ex @@ -1,16 +1,15 @@ defmodule Furlex.Parser do - @doc """ Parses the given HTML, returning a map structure of structured data keys mapping to their respective values, or an error. """ - @callback parse(html :: String.t) :: {:ok, Map.t} | {:error, Atom.t} + @callback parse(html :: String.t()) :: {:ok, Map.t()} | {:error, Atom.t()} @doc """ Extracts the given tags from the given raw html according to the given match function """ - @spec extract(List.t | String.t, String.t, Function.t) :: Map.t + @spec extract(List.t() | String.t(), String.t(), Function.t()) :: Map.t() def extract(tags, html, match) when is_list(tags) do tags |> Stream.map(&extract(&1, html, match)) @@ -18,15 +17,18 @@ defmodule Furlex.Parser do |> Map.new() |> group_keys() end + def extract(tag, html, match) do case Floki.find(html, match.(tag)) do - nil -> nil + nil -> + nil + elements -> content = case do_extract_content(elements) do - [] -> nil - [ element ] -> element - content -> content + [] -> nil + [element] -> element + content -> content end {tag, content} @@ -34,10 +36,12 @@ defmodule Furlex.Parser do end @doc "Extracts a canonical url from the given raw HTML" - @spec extract_canonical(String.t) :: nil | String.t + @spec extract_canonical(String.t()) :: nil | String.t() def extract_canonical(html) do case Floki.find(html, "link[rel=\"canonical\"]") do - [] -> nil + [] -> + nil + elements -> elements |> Floki.attribute("href") @@ -65,25 +69,28 @@ defmodule Furlex.Parser do } } """ - @spec group_keys(Map.t) :: Map.t + @spec group_keys(Map.t()) :: Map.t() def group_keys(map) + def group_keys(map) do if Application.get_env(:furlex, :group_keys?) do - Enum.reduce map, %{}, fn + Enum.reduce(map, %{}, fn {_, v}, _acc when is_map(v) -> group_keys(v) - {k, v}, acc -> do_group_keys(k, v, acc) - end + {k, v}, acc -> do_group_keys(k, v, acc) + end) else map end end defp do_group_keys(key, value, acc) do - [ h | t ] = key |> String.split(":") |> Enum.reverse() - base = Map.new [{h, value}] - result = Enum.reduce t, base, fn key, sub_acc -> - Map.new([{key, sub_acc}]) - end + [h | t] = key |> String.split(":") |> Enum.reverse() + base = Map.new([{h, value}]) + + result = + Enum.reduce(t, base, fn key, sub_acc -> + Map.new([{key, sub_acc}]) + end) deep_merge(acc, result) end @@ -95,15 +102,16 @@ defmodule Furlex.Parser do defp deep_resolve(_key, left = %{}, right = %{}) do deep_merge(left, right) end + defp deep_resolve(_key, _left, right) do right end defp do_extract_content(elements) do - Enum.map elements, fn element -> + Enum.map(elements, fn element -> element |> Floki.attribute("content") |> Enum.at(0) - end + end) end end diff --git a/lib/furlex/parser/facebook.ex b/lib/furlex/parser/facebook.ex index 8379371..c91e511 100644 --- a/lib/furlex/parser/facebook.ex +++ b/lib/furlex/parser/facebook.ex @@ -27,10 +27,10 @@ defmodule Furlex.Parser.Facebook do video:release_date video:tag video:writer video:series ) - @spec parse(String.t) :: {:ok, Map.t} + @spec parse(String.t()) :: {:ok, Map.t()} def parse(html) do - meta = &("meta[property=\"#{&1}\"]") - map = Parser.extract tags(), html, meta + meta = &"meta[property=\"#{&1}\"]" + map = Parser.extract(tags(), html, meta) {:ok, map} end diff --git a/lib/furlex/parser/html.ex b/lib/furlex/parser/html.ex index 286fb6f..9a3ccbc 100644 --- a/lib/furlex/parser/html.ex +++ b/lib/furlex/parser/html.ex @@ -3,10 +3,12 @@ defmodule Furlex.Parser.HTML do alias Furlex.Parser.{Facebook, Twitter} - @spec parse(String.t) :: nil | {:ok, Map.t} + @spec parse(String.t()) :: nil | {:ok, Map.t()} def parse(html) do case Floki.find(html, "meta[name]") do - nil -> {:ok, %{}} + nil -> + {:ok, %{}} + elements -> content = elements @@ -19,16 +21,18 @@ defmodule Furlex.Parser.HTML do # Filter out plain meta elements from Twitter, Facebook, etc. defp filter_other(elements) do - Enum.reject elements, fn element -> - extract_attribute(element, "name") in (Facebook.tags ++ Twitter.tags) - end + Enum.reject(elements, fn element -> + extract_attribute(element, "name") in (Facebook.tags() ++ Twitter.tags()) + end) end defp to_map(element, acc) do - key = extract_attribute(element, "name") + key = extract_attribute(element, "name") existing = Map.get(acc, key) - to_add = extract_attribute(element, "content") || - extract_attribute(element, "property") + + to_add = + extract_attribute(element, "content") || + extract_attribute(element, "property") if is_nil(existing) do Map.put(acc, key, to_add) @@ -38,8 +42,8 @@ defmodule Furlex.Parser.HTML do |> prepend(existing) |> Enum.uniq() |> case do - [ element ] -> element - list -> list + [element] -> element + list -> list end Map.put(acc, key, value) @@ -48,11 +52,11 @@ defmodule Furlex.Parser.HTML do defp extract_attribute(element, key) do case Floki.attribute(element, key) do - [ attribute ] -> attribute - _ -> nil + [attribute] -> attribute + _ -> nil end end - defp prepend(value, list) when is_list(list), do: [ value | list ] - defp prepend(value, element), do: [ value | [ element ]] + defp prepend(value, list) when is_list(list), do: [value | list] + defp prepend(value, element), do: [value | [element]] end diff --git a/lib/furlex/parser/json_ld.ex b/lib/furlex/parser/json_ld.ex index a4d561b..7000dbe 100644 --- a/lib/furlex/parser/json_ld.ex +++ b/lib/furlex/parser/json_ld.ex @@ -3,12 +3,12 @@ defmodule Furlex.Parser.JsonLD do @json_library Application.get_env(:furlex, :json_library, Jason) - @spec parse(String.t) :: nil | {:ok, List.t} + @spec parse(String.t()) :: nil | {:ok, List.t()} def parse(html) do meta = "script[type=\"application/ld+json\"]" case Floki.find(html, meta) do - nil -> + nil -> {:ok, []} elements -> diff --git a/lib/furlex/parser/twitter.ex b/lib/furlex/parser/twitter.ex index 8ca25b9..91c57d2 100644 --- a/lib/furlex/parser/twitter.ex +++ b/lib/furlex/parser/twitter.ex @@ -13,10 +13,10 @@ defmodule Furlex.Parser.Twitter do twitter:app:url:googleplay twitter:app:id:googleplay ) - @spec parse(String.t) :: {:ok, Map.t} + @spec parse(String.t()) :: {:ok, Map.t()} def parse(html) do - meta = &("meta[name=\"#{&1}\"]") - map = Parser.extract tags(), html, meta + meta = &"meta[name=\"#{&1}\"]" + map = Parser.extract(tags(), html, meta) {:ok, map} end diff --git a/mix.exs b/mix.exs index 3500e40..97024d8 100644 --- a/mix.exs +++ b/mix.exs @@ -6,8 +6,8 @@ defmodule Furlex.Mixfile do app: :furlex, version: "0.4.3", elixir: "~> 1.4", - build_embedded: Mix.env == :prod, - start_permanent: Mix.env == :prod, + build_embedded: Mix.env() == :prod, + start_permanent: Mix.env() == :prod, description: description(), package: package(), deps: deps(), @@ -37,11 +37,9 @@ defmodule Furlex.Mixfile do {:httpoison, "~> 1.5"}, {:jason, "~> 1.0", optional: true}, {:plug_cowboy, "~> 1.0 or ~> 2.0"}, - {:benchee, "~> 0.13", only: :dev}, {:ex_doc, "~> 0.19", only: :dev, runtime: false}, - - {:bypass, "~> 0.8", only: :test}, + {:bypass, "~> 0.8", only: :test} ] end @@ -62,7 +60,7 @@ defmodule Furlex.Mixfile do licenses: ["Apache 2.0"], links: %{ "Github" => "http://github.com/claytongentry/furlex", - "Docs" => "http://hexdocs.pm/furlex", + "Docs" => "http://hexdocs.pm/furlex" } ] end diff --git a/test/furlex/fetcher_test.exs b/test/furlex/fetcher_test.exs index 84542c3..f53a6d6 100644 --- a/test/furlex/fetcher_test.exs +++ b/test/furlex/fetcher_test.exs @@ -7,21 +7,21 @@ defmodule Furlex.FetcherTest do setup do bypass = Bypass.open() - url = "http://localhost:#{bypass.port}" + url = "http://localhost:#{bypass.port}" {:ok, bypass: bypass, url: url} end test "fetches url", %{bypass: bypass, url: url} do - Bypass.expect_once bypass, &handle/1 + Bypass.expect_once(bypass, &handle/1) assert {:ok, body, 200} = Fetcher.fetch(url) - assert body =~ "Test HTML" + assert body =~ "Test HTML" end test "fetches url with options", %{url: url} do assert {:error, %HTTPoison.Error{reason: :checkout_timeout}} == - Fetcher.fetch(url, timeout: 0) + Fetcher.fetch(url, timeout: 0) end def handle(conn) do @@ -30,6 +30,6 @@ defmodule Furlex.FetcherTest do |> Path.join() |> File.read!() - Plug.Conn.resp conn, 200, body + Plug.Conn.resp(conn, 200, body) end end diff --git a/test/furlex/oembed_test.exs b/test/furlex/oembed_test.exs index 70176bb..a89a41f 100644 --- a/test/furlex/oembed_test.exs +++ b/test/furlex/oembed_test.exs @@ -5,31 +5,31 @@ defmodule Furlex.OembedTest do setup do bypass = Bypass.open() - url = "http://localhost:#{bypass.port}" - config = Application.get_env :furlex, Oembed, [] + url = "http://localhost:#{bypass.port}" + config = Application.get_env(:furlex, Oembed, []) - new_config = Keyword.put config, :oembed_host, url - Application.put_env :furlex, Oembed, new_config + new_config = Keyword.put(config, :oembed_host, url) + Application.put_env(:furlex, Oembed, new_config) - on_exit fn -> - Application.put_env :furlex, Oembed, config + on_exit(fn -> + Application.put_env(:furlex, Oembed, config) :ok - end + end) {:ok, bypass: bypass} end test "returns endpoint from url", %{bypass: bypass} do - Bypass.expect bypass, &handle/1 + Bypass.expect(bypass, &handle/1) assert {:error, :no_oembed_provider} == - Oembed.endpoint_from_url("foobar") + Oembed.endpoint_from_url("foobar") - url = "https://vimeo.com/88856141" + url = "https://vimeo.com/88856141" params = %{"format" => "json"} - {:ok, endpoint} = Oembed.endpoint_from_url(url, params, [skip_cache?: true]) + {:ok, endpoint} = Oembed.endpoint_from_url(url, params, skip_cache?: true) assert endpoint == "https://vimeo.com/api/oembed.json" end @@ -42,6 +42,6 @@ defmodule Furlex.OembedTest do |> Path.join() |> File.read!() - Plug.Conn.resp conn, 200, providers + Plug.Conn.resp(conn, 200, providers) end end diff --git a/test/furlex/parser/facebook_test.exs b/test/furlex/parser/facebook_test.exs index 913d9ee..0c283cb 100644 --- a/test/furlex/parser/facebook_test.exs +++ b/test/furlex/parser/facebook_test.exs @@ -6,12 +6,15 @@ defmodule Furlex.Parser.FacebookTest do doctest Facebook test "parses Facebook Open Graph" do - html = " - "content=\"www.example.com\"/>" + html = + " + "content=\"www.example.com\"/>" - assert {:ok, %{ - "og" => %{ - "url" => "www.example.com" - }}} == Facebook.parse(html) + assert {:ok, + %{ + "og" => %{ + "url" => "www.example.com" + } + }} == Facebook.parse(html) end end diff --git a/test/furlex/parser/html_test.exs b/test/furlex/parser/html_test.exs index 909f3b2..d8399f7 100644 --- a/test/furlex/parser/html_test.exs +++ b/test/furlex/parser/html_test.exs @@ -9,7 +9,7 @@ defmodule Furlex.Parser.HTMLTest do test "parses HTML meta data" do html = - [ __DIR__ | ~w(.. .. fixtures test.html) ] + [__DIR__ | ~w(.. .. fixtures test.html)] |> Path.join() |> File.read!() @@ -19,18 +19,20 @@ defmodule Furlex.Parser.HTMLTest do test "dedupes meta data" do html = - [ __DIR__ | ~w(.. .. fixtures duplicate_meta.html) ] + [__DIR__ | ~w(.. .. fixtures duplicate_meta.html)] |> Path.join() |> File.read!() assert {:ok, meta} = HTML.parse(html) assert meta["generator"] == "Loja Integrada" + assert meta["google-site-verification"] == [ - "GbnYBmQLHGrgQRVEi4b2fzcrAA81TMh86T3Z1kDDW-c", - "og5Ef6ntOLY0CrU0H8mURx_WwrlZc9Hz2HDXQGWOdAg", - "66Kpz8sWyMtS35U7Eodir6sXoV5gJe7a9kNN9xQQnYE" - ] + "GbnYBmQLHGrgQRVEi4b2fzcrAA81TMh86T3Z1kDDW-c", + "og5Ef6ntOLY0CrU0H8mURx_WwrlZc9Hz2HDXQGWOdAg", + "66Kpz8sWyMtS35U7Eodir6sXoV5gJe7a9kNN9xQQnYE" + ] + assert meta["robots"] == "index, follow" # Ensure resultant meta is encodable diff --git a/test/furlex/parser/json_ld_test.exs b/test/furlex/parser/json_ld_test.exs index d999a35..9c2cf84 100644 --- a/test/furlex/parser/json_ld_test.exs +++ b/test/furlex/parser/json_ld_test.exs @@ -6,21 +6,20 @@ defmodule Furlex.Parser.JsonLDTest do doctest JsonLD test "parses JSON-LD" do - html = - """ - - """ + html = """ + + """ - assert {:ok, [json_ld]} = JsonLD.parse(html) + assert {:ok, [json_ld]} = JsonLD.parse(html) - assert Map.get(json_ld, "@context") == "http://schema.org" - assert Map.get(json_ld, "name") == "Example" - assert Map.get(json_ld, "@type") == "WebSite" - assert Map.get(json_ld, "url") == "https://www.example.com" + assert Map.get(json_ld, "@context") == "http://schema.org" + assert Map.get(json_ld, "name") == "Example" + assert Map.get(json_ld, "@type") == "WebSite" + assert Map.get(json_ld, "url") == "https://www.example.com" end end diff --git a/test/furlex/parser/twitter_test.exs b/test/furlex/parser/twitter_test.exs index b60ce75..adf2768 100644 --- a/test/furlex/parser/twitter_test.exs +++ b/test/furlex/parser/twitter_test.exs @@ -6,12 +6,15 @@ defmodule Furlex.Parser.TwitterTest do doctest Twitter test "parses Twitter Cards" do - html = " - "content=\"www.example.com\"/>" + html = + " + "content=\"www.example.com\"/>" - assert {:ok, %{ - "twitter" => %{ - "image" => "www.example.com" - }}} == Twitter.parse(html) + assert {:ok, + %{ + "twitter" => %{ + "image" => "www.example.com" + } + }} == Twitter.parse(html) end end diff --git a/test/furlex/parser_test.exs b/test/furlex/parser_test.exs index 8e6b267..74cbacf 100644 --- a/test/furlex/parser_test.exs +++ b/test/furlex/parser_test.exs @@ -6,24 +6,25 @@ defmodule Furlex.ParserTest do doctest Parser setup do - Application.put_env :furlex, :group_keys?, true + Application.put_env(:furlex, :group_keys?, true) end test "extracts tags from html" do html = """ - - """ + + """ - tags = Parser.extract ["foobar"], html, &("meta[name=\"#{&1}\"]") + tags = Parser.extract(["foobar"], html, &"meta[name=\"#{&1}\"]") - assert tags["foobar"] == "foobaz" + assert tags["foobar"] == "foobaz" end test "extracts canonical url from html" do - html = " - "href=\"www.example.com\"/>" + html = + " + "href=\"www.example.com\"/>" - assert is_nil Parser.extract_canonical("foobar") + assert is_nil(Parser.extract_canonical("foobar")) assert Parser.extract_canonical(html) == "www.example.com" end @@ -39,19 +40,19 @@ defmodule Furlex.ParserTest do result = Parser.group_keys(map) assert result == %{ - "twitter" => %{ - "app" => %{ - "id" => %{ - "googleplay" => "com.google.android.youtube", - "ipad" => "544007664" - }, - "name" => %{ - "googleplay" => "YouTube", - "iphone" => "YouTube" - } - }, - "card" => "player" - } - } + "twitter" => %{ + "app" => %{ + "id" => %{ + "googleplay" => "com.google.android.youtube", + "ipad" => "544007664" + }, + "name" => %{ + "googleplay" => "YouTube", + "iphone" => "YouTube" + } + }, + "card" => "player" + } + } end end diff --git a/test/furlex_test.exs b/test/furlex_test.exs index 27e0e2d..a89c91a 100644 --- a/test/furlex_test.exs +++ b/test/furlex_test.exs @@ -3,7 +3,7 @@ defmodule FurlexTest do setup do bypass = Bypass.open() - url = "http://localhost:#{bypass.port}" + url = "http://localhost:#{bypass.port}" oembed = Furlex.Oembed oembed_config = Application.get_env(:furlex, oembed, []) @@ -11,23 +11,23 @@ defmodule FurlexTest do Application.put_env(:furlex, oembed, new_config) - on_exit fn -> + on_exit(fn -> Application.put_env(:furlex, oembed, oembed_config) :ok - end + end) {:ok, bypass: bypass, url: url} end test "unfurls a url", %{bypass: bypass, url: url} do - Bypass.expect bypass, &handle/1 + Bypass.expect(bypass, &handle/1) assert {:ok, %Furlex{} = furlex} = Furlex.unfurl(url) - assert furlex.status_code == 200 - assert furlex.facebook["og"]["site_name"] == "Vimeo" - assert furlex.twitter["twitter"]["title"] == "FIDLAR - Cocaine (Feat. Nick Offerman)" + assert furlex.status_code == 200 + assert furlex.facebook["og"]["site_name"] == "Vimeo" + assert furlex.twitter["twitter"]["title"] == "FIDLAR - Cocaine (Feat. Nick Offerman)" assert Enum.at(furlex.json_ld, 0)["@type"] == "VideoObject" end @@ -39,14 +39,15 @@ defmodule FurlexTest do |> Path.join() |> File.read!() - Plug.Conn.resp conn, 200, providers + Plug.Conn.resp(conn, 200, providers) end + def handle(conn) do html = [__DIR__ | ~w(fixtures vimeo.html)] |> Path.join() |> File.read!() - Plug.Conn.resp conn, 200, html + Plug.Conn.resp(conn, 200, html) end end diff --git a/test/test_helper.exs b/test/test_helper.exs index 1fa1576..1a33fff 100644 --- a/test/test_helper.exs +++ b/test/test_helper.exs @@ -1,2 +1,2 @@ ExUnit.start() -{:ok, _ } = Application.ensure_all_started(:bypass) +{:ok, _} = Application.ensure_all_started(:bypass) From ef8a81cc6565451621804a40668ee3198b9109cd Mon Sep 17 00:00:00 2001 From: Neil Berkman Date: Tue, 10 Dec 2019 13:13:51 -0800 Subject: [PATCH 2/6] Handle providers listed with subdomains host_matches? previously would fail to match providers with URLs such as http://www.twitter.com/. Generalized the regex to handle subdomains. --- lib/furlex/oembed.ex | 2 +- test/fixtures/providers.json | 15 ++++++++++++++- test/furlex/oembed_test.exs | 14 ++++++++++++++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/lib/furlex/oembed.ex b/lib/furlex/oembed.ex index 2a01ae1..dcf7a5e 100644 --- a/lib/furlex/oembed.ex +++ b/lib/furlex/oembed.ex @@ -86,7 +86,7 @@ defmodule Furlex.Oembed do end defp host_matches?(host, %{"provider_url" => provider_url}) do - Regex.match?(~r/https?:\/\/#{host}/, provider_url) + Regex.match?(~r/https?:\/\/([a-zA-Z0-9]+\.)?#{host}/, provider_url) end ## GenServer callbacks diff --git a/test/fixtures/providers.json b/test/fixtures/providers.json index 73ed851..5627b32 100644 --- a/test/fixtures/providers.json +++ b/test/fixtures/providers.json @@ -1,4 +1,17 @@ [ + { + "provider_name": "Twitter", + "provider_url": "http:\/\/www.twitter.com/", + "endpoints": [ + { + "schemes": [ + "https:\/\/twitter.com\/*\/status\/*", + "https:\/\/*.twitter.com\/*\/status\/*" + ], + "url": "https:\/\/publish.twitter.com\/oembed" + } + ] + }, { "provider_name": "VideoJug", "provider_url": "http:\/\/www.videojug.com", @@ -147,4 +160,4 @@ } ] } -] +] \ No newline at end of file diff --git a/test/furlex/oembed_test.exs b/test/furlex/oembed_test.exs index a89a41f..8964f81 100644 --- a/test/furlex/oembed_test.exs +++ b/test/furlex/oembed_test.exs @@ -34,6 +34,20 @@ defmodule Furlex.OembedTest do assert endpoint == "https://vimeo.com/api/oembed.json" end + test "returns endpoint from url with subdomain", %{bypass: bypass} do + Bypass.expect(bypass, &handle/1) + + assert {:error, :no_oembed_provider} == + Oembed.endpoint_from_url("foobar") + + url = "https://twitter.com/arshia__/status/1204481088422178817?s=20" + params = %{"format" => "json"} + + {:ok, endpoint} = Oembed.endpoint_from_url(url, params, skip_cache?: true) + + assert endpoint == "https://publish.twitter.com/oembed" + end + def handle(%{request_path: "/providers.json"} = conn) do assert conn.method == "GET" From 4f786c60e18daf5e82b781b2b68db2e8aa884a42 Mon Sep 17 00:00:00 2001 From: technicalcapt Date: Thu, 12 Dec 2019 20:32:11 +0700 Subject: [PATCH 3/6] increase Task timeout and add test async --- lib/furlex.ex | 4 ++-- test/furlex/parser/facebook_test.exs | 6 +++++- test/furlex/parser/twitter_test.exs | 6 +++++- test/furlex_test.exs | 2 +- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/lib/furlex.ex b/lib/furlex.ex index 134fb83..56c9392 100644 --- a/lib/furlex.ex +++ b/lib/furlex.ex @@ -72,7 +72,7 @@ defmodule Furlex do defp fetch(url, opts) do fetch = Task.async(Fetcher, :fetch, [url, opts]) fetch_oembed = Task.async(Fetcher, :fetch_oembed, [url, opts]) - yield = Task.yield_many([fetch, fetch_oembed]) + yield = Task.yield_many([fetch, fetch_oembed], 10_000) with [fetch, fetch_oembed] <- yield, {_fetch, {:ok, {:ok, body, status_code}}} <- fetch, @@ -87,7 +87,7 @@ defmodule Furlex do parse = &Task.async(&1, :parse, [body]) tasks = Enum.map([Facebook, Twitter, JsonLD, HTML], parse) - with [facebook, twitter, json_ld, other] <- Task.yield_many(tasks), + with [facebook, twitter, json_ld, other] <- Task.yield_many(tasks, 18_000), {_facebook, {:ok, {:ok, facebook}}} <- facebook, {_twitter, {:ok, {:ok, twitter}}} <- twitter, {_json_ld, {:ok, {:ok, json_ld}}} <- json_ld, diff --git a/test/furlex/parser/facebook_test.exs b/test/furlex/parser/facebook_test.exs index 0c283cb..403abc8 100644 --- a/test/furlex/parser/facebook_test.exs +++ b/test/furlex/parser/facebook_test.exs @@ -1,10 +1,14 @@ defmodule Furlex.Parser.FacebookTest do - use ExUnit.Case + use ExUnit.Case, async: true alias Furlex.Parser.Facebook doctest Facebook + setup do + Application.put_env(:furlex, :group_keys?, true) + end + test "parses Facebook Open Graph" do html = " diff --git a/test/furlex/parser/twitter_test.exs b/test/furlex/parser/twitter_test.exs index adf2768..e770957 100644 --- a/test/furlex/parser/twitter_test.exs +++ b/test/furlex/parser/twitter_test.exs @@ -1,10 +1,14 @@ defmodule Furlex.Parser.TwitterTest do - use ExUnit.Case + use ExUnit.Case, async: true alias Furlex.Parser.Twitter doctest Twitter + setup do + Application.put_env(:furlex, :group_keys?, true) + end + test "parses Twitter Cards" do html = " diff --git a/test/furlex_test.exs b/test/furlex_test.exs index a89c91a..6666471 100644 --- a/test/furlex_test.exs +++ b/test/furlex_test.exs @@ -1,5 +1,5 @@ defmodule FurlexTest do - use ExUnit.Case + use ExUnit.Case, async: true setup do bypass = Bypass.open() From a1ae86dff10c920cd686f6ae1108ae5999ac6047 Mon Sep 17 00:00:00 2001 From: Neil Berkman Date: Sun, 29 Nov 2020 23:25:34 -0800 Subject: [PATCH 4/6] Update some dependencies --- mix.exs | 8 ++++---- mix.lock | 58 ++++++++++++++++++++++++++++++-------------------------- 2 files changed, 35 insertions(+), 31 deletions(-) diff --git a/mix.exs b/mix.exs index 97024d8..900b200 100644 --- a/mix.exs +++ b/mix.exs @@ -33,13 +33,13 @@ defmodule Furlex.Mixfile do defp deps do [ - {:floki, "~> 0.17.0"}, + {:floki, "~> 0.29.0"}, {:httpoison, "~> 1.5"}, {:jason, "~> 1.0", optional: true}, - {:plug_cowboy, "~> 1.0 or ~> 2.0"}, - {:benchee, "~> 0.13", only: :dev}, + {:plug_cowboy, "~> 2.4.1"}, + {:benchee, "~> 1.0.1", only: :dev}, {:ex_doc, "~> 0.19", only: :dev, runtime: false}, - {:bypass, "~> 0.8", only: :test} + {:bypass, "~> 2.1.0", only: :test} ] end diff --git a/mix.lock b/mix.lock index 101f81c..f1f2ab1 100644 --- a/mix.lock +++ b/mix.lock @@ -1,29 +1,33 @@ %{ - "benchee": {:hex, :benchee, "0.99.0", "0efbfc31045ad2f75a48673bd1befa8a6a5855e93b8c3117aed7d7da8de65b71", [:mix], [{:deep_merge, "~> 1.0", [hex: :deep_merge, repo: "hexpm", optional: false]}], "hexpm"}, - "bypass": {:hex, :bypass, "0.9.0", "4cedcd326eeec497e0090a73d351cbd0f11e39329ddf9095931b03da9b6dc417", [:mix], [{:cowboy, "~> 1.0 or ~> 2.0", [hex: :cowboy, repo: "hexpm", optional: false]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm"}, - "certifi": {:hex, :certifi, "2.5.1", "867ce347f7c7d78563450a18a6a28a8090331e77fa02380b4a21962a65d36ee5", [:rebar3], [{:parse_trans, "~>3.3", [hex: :parse_trans, repo: "hexpm", optional: false]}], "hexpm"}, - "cowboy": {:hex, :cowboy, "1.1.2", "61ac29ea970389a88eca5a65601460162d370a70018afe6f949a29dca91f3bb0", [:rebar3], [{:cowlib, "~> 1.0.2", [hex: :cowlib, repo: "hexpm", optional: false]}, {:ranch, "~> 1.3.2", [hex: :ranch, repo: "hexpm", optional: false]}], "hexpm"}, - "cowlib": {:hex, :cowlib, "1.0.2", "9d769a1d062c9c3ac753096f868ca121e2730b9a377de23dec0f7e08b1df84ee", [:make], [], "hexpm"}, - "deep_merge": {:hex, :deep_merge, "1.0.0", "b4aa1a0d1acac393bdf38b2291af38cb1d4a52806cf7a4906f718e1feb5ee961", [:mix], [], "hexpm"}, - "earmark": {:hex, :earmark, "1.3.2", "b840562ea3d67795ffbb5bd88940b1bed0ed9fa32834915125ea7d02e35888a5", [:mix], [], "hexpm"}, - "ex_doc": {:hex, :ex_doc, "0.20.2", "1bd0dfb0304bade58beb77f20f21ee3558cc3c753743ae0ddbb0fd7ba2912331", [:mix], [{:earmark, "~> 1.3", [hex: :earmark, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.10", [hex: :makeup_elixir, repo: "hexpm", optional: false]}], "hexpm"}, - "floki": {:hex, :floki, "0.17.2", "81b3a39d85f5cae39c8da16236ce152f7f8f50faf84b480ba53351d7e96ca6ca", [:mix], [{:mochiweb, "~> 2.15", [hex: :mochiweb, repo: "hexpm", optional: false]}], "hexpm"}, - "hackney": {:hex, :hackney, "1.15.1", "9f8f471c844b8ce395f7b6d8398139e26ddca9ebc171a8b91342ee15a19963f4", [:rebar3], [{:certifi, "2.5.1", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.4", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm"}, - "httpoison": {:hex, :httpoison, "1.5.1", "0f55b5b673b03c5c327dac7015a67cb571b99b631acc0bc1b0b98dcd6b9f2104", [:mix], [{:hackney, "~> 1.8", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm"}, - "idna": {:hex, :idna, "6.0.0", "689c46cbcdf3524c44d5f3dde8001f364cd7608a99556d8fbd8239a5798d4c10", [:rebar3], [{:unicode_util_compat, "0.4.1", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm"}, - "jason": {:hex, :jason, "1.1.2", "b03dedea67a99223a2eaf9f1264ce37154564de899fd3d8b9a21b1a6fd64afe7", [:mix], [{:decimal, "~> 1.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm"}, - "makeup": {:hex, :makeup, "0.8.0", "9cf32aea71c7fe0a4b2e9246c2c4978f9070257e5c9ce6d4a28ec450a839b55f", [:mix], [{:nimble_parsec, "~> 0.5.0", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm"}, - "makeup_elixir": {:hex, :makeup_elixir, "0.13.0", "be7a477997dcac2e48a9d695ec730b2d22418292675c75aa2d34ba0909dcdeda", [:mix], [{:makeup, "~> 0.8", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm"}, - "metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm"}, - "mime": {:hex, :mime, "1.3.1", "30ce04ab3175b6ad0bdce0035cba77bba68b813d523d1aac73d9781b4d193cf8", [:mix], [], "hexpm"}, - "mimerl": {:hex, :mimerl, "1.2.0", "67e2d3f571088d5cfd3e550c383094b47159f3eee8ffa08e64106cdf5e981be3", [:rebar3], [], "hexpm"}, - "mochiweb": {:hex, :mochiweb, "2.18.0", "eb55f1db3e6e960fac4e6db4e2db9ec3602cc9f30b86cd1481d56545c3145d2e", [:rebar3], [], "hexpm"}, - "nimble_parsec": {:hex, :nimble_parsec, "0.5.0", "90e2eca3d0266e5c53f8fbe0079694740b9c91b6747f2b7e3c5d21966bba8300", [:mix], [], "hexpm"}, - "parse_trans": {:hex, :parse_trans, "3.3.0", "09765507a3c7590a784615cfd421d101aec25098d50b89d7aa1d66646bc571c1", [:rebar3], [], "hexpm"}, - "plug": {:hex, :plug, "1.8.2", "0bcce1daa420f189a6491f3940cc77ea7fb1919761175c9c3b59800d897440fc", [:mix], [{:mime, "~> 1.0", [hex: :mime, repo: "hexpm", optional: false]}, {:plug_crypto, "~> 1.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: true]}], "hexpm"}, - "plug_cowboy": {:hex, :plug_cowboy, "1.0.0", "2e2a7d3409746d335f451218b8bb0858301c3de6d668c3052716c909936eb57a", [:mix], [{:cowboy, "~> 1.0", [hex: :cowboy, repo: "hexpm", optional: false]}, {:plug, "~> 1.7", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm"}, - "plug_crypto": {:hex, :plug_crypto, "1.0.0", "18e49317d3fa343f24620ed22795ec29d4a5e602d52d1513ccea0b07d8ea7d4d", [:mix], [], "hexpm"}, - "ranch": {:hex, :ranch, "1.3.2", "e4965a144dc9fbe70e5c077c65e73c57165416a901bd02ea899cfd95aa890986", [:rebar3], [], "hexpm"}, - "ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.4", "f0eafff810d2041e93f915ef59899c923f4568f4585904d010387ed74988e77b", [:make, :mix, :rebar3], [], "hexpm"}, - "unicode_util_compat": {:hex, :unicode_util_compat, "0.4.1", "d869e4c68901dd9531385bb0c8c40444ebf624e60b6962d95952775cac5e90cd", [:rebar3], [], "hexpm"}, + "benchee": {:hex, :benchee, "1.0.1", "66b211f9bfd84bd97e6d1beaddf8fc2312aaabe192f776e8931cb0c16f53a521", [:mix], [{:deep_merge, "~> 1.0", [hex: :deep_merge, repo: "hexpm", optional: false]}], "hexpm", "3ad58ae787e9c7c94dd7ceda3b587ec2c64604563e049b2a0e8baafae832addb"}, + "bypass": {:hex, :bypass, "2.1.0", "909782781bf8e20ee86a9cabde36b259d44af8b9f38756173e8f5e2e1fabb9b1", [:mix], [{:plug, "~> 1.7", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 2.0", [hex: :plug_cowboy, repo: "hexpm", optional: false]}, {:ranch, "~> 1.3", [hex: :ranch, repo: "hexpm", optional: false]}], "hexpm", "d9b5df8fa5b7a6efa08384e9bbecfe4ce61c77d28a4282f79e02f1ef78d96b80"}, + "certifi": {:hex, :certifi, "2.5.2", "b7cfeae9d2ed395695dd8201c57a2d019c0c43ecaf8b8bcb9320b40d6662f340", [:rebar3], [{:parse_trans, "~>3.3", [hex: :parse_trans, repo: "hexpm", optional: false]}], "hexpm", "3b3b5f36493004ac3455966991eaf6e768ce9884693d9968055aeeeb1e575040"}, + "cowboy": {:hex, :cowboy, "2.8.0", "f3dc62e35797ecd9ac1b50db74611193c29815401e53bac9a5c0577bd7bc667d", [:rebar3], [{:cowlib, "~> 2.9.1", [hex: :cowlib, repo: "hexpm", optional: false]}, {:ranch, "~> 1.7.1", [hex: :ranch, repo: "hexpm", optional: false]}], "hexpm", "4643e4fba74ac96d4d152c75803de6fad0b3fa5df354c71afdd6cbeeb15fac8a"}, + "cowboy_telemetry": {:hex, :cowboy_telemetry, "0.3.1", "ebd1a1d7aff97f27c66654e78ece187abdc646992714164380d8a041eda16754", [:rebar3], [{:cowboy, "~> 2.7", [hex: :cowboy, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "3a6efd3366130eab84ca372cbd4a7d3c3a97bdfcfb4911233b035d117063f0af"}, + "cowlib": {:hex, :cowlib, "2.9.1", "61a6c7c50cf07fdd24b2f45b89500bb93b6686579b069a89f88cb211e1125c78", [:rebar3], [], "hexpm", "e4175dc240a70d996156160891e1c62238ede1729e45740bdd38064dad476170"}, + "deep_merge": {:hex, :deep_merge, "1.0.0", "b4aa1a0d1acac393bdf38b2291af38cb1d4a52806cf7a4906f718e1feb5ee961", [:mix], [], "hexpm", "ce708e5f094b9cd4e8f2be4f00d2f4250c4095be93f8cd6d018c753894885430"}, + "earmark": {:hex, :earmark, "1.3.2", "b840562ea3d67795ffbb5bd88940b1bed0ed9fa32834915125ea7d02e35888a5", [:mix], [], "hexpm", "e3be2bc3ae67781db529b80aa7e7c49904a988596e2dbff897425b48b3581161"}, + "earmark_parser": {:hex, :earmark_parser, "1.4.12", "b245e875ec0a311a342320da0551da407d9d2b65d98f7a9597ae078615af3449", [:mix], [], "hexpm", "711e2cc4d64abb7d566d43f54b78f7dc129308a63bc103fbd88550d2174b3160"}, + "ex_doc": {:hex, :ex_doc, "0.23.0", "a069bc9b0bf8efe323ecde8c0d62afc13d308b1fa3d228b65bca5cf8703a529d", [:mix], [{:earmark_parser, "~> 1.4.0", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}], "hexpm", "f5e2c4702468b2fd11b10d39416ddadd2fcdd173ba2a0285ebd92c39827a5a16"}, + "floki": {:hex, :floki, "0.29.0", "b1710d8c93a2f860dc2d7adc390dd808dc2fb8f78ee562304457b75f4c640881", [:mix], [{:html_entities, "~> 0.5.0", [hex: :html_entities, repo: "hexpm", optional: false]}], "hexpm", "008585ce64b9f74c07d32958ec9866f4b8a124bf4da1e2941b28e41384edaaad"}, + "hackney": {:hex, :hackney, "1.16.0", "5096ac8e823e3a441477b2d187e30dd3fff1a82991a806b2003845ce72ce2d84", [:rebar3], [{:certifi, "2.5.2", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.1", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:parse_trans, "3.3.0", [hex: :parse_trans, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.6", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm", "3bf0bebbd5d3092a3543b783bf065165fa5d3ad4b899b836810e513064134e18"}, + "html_entities": {:hex, :html_entities, "0.5.1", "1c9715058b42c35a2ab65edc5b36d0ea66dd083767bef6e3edb57870ef556549", [:mix], [], "hexpm", "30efab070904eb897ff05cd52fa61c1025d7f8ef3a9ca250bc4e6513d16c32de"}, + "httpoison": {:hex, :httpoison, "1.7.0", "abba7d086233c2d8574726227b6c2c4f6e53c4deae7fe5f6de531162ce9929a0", [:mix], [{:hackney, "~> 1.16", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm", "975cc87c845a103d3d1ea1ccfd68a2700c211a434d8428b10c323dc95dc5b980"}, + "idna": {:hex, :idna, "6.0.1", "1d038fb2e7668ce41fbf681d2c45902e52b3cb9e9c77b55334353b222c2ee50c", [:rebar3], [{:unicode_util_compat, "0.5.0", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "a02c8a1c4fd601215bb0b0324c8a6986749f807ce35f25449ec9e69758708122"}, + "jason": {:hex, :jason, "1.2.2", "ba43e3f2709fd1aa1dce90aaabfd039d000469c05c56f0b8e31978e03fa39052", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "18a228f5f0058ee183f29f9eae0805c6e59d61c3b006760668d8d18ff0d12179"}, + "makeup": {:hex, :makeup, "1.0.5", "d5a830bc42c9800ce07dd97fa94669dfb93d3bf5fcf6ea7a0c67b2e0e4a7f26c", [:mix], [{:nimble_parsec, "~> 0.5 or ~> 1.0", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "cfa158c02d3f5c0c665d0af11512fed3fba0144cf1aadee0f2ce17747fba2ca9"}, + "makeup_elixir": {:hex, :makeup_elixir, "0.15.0", "98312c9f0d3730fde4049985a1105da5155bfe5c11e47bdc7406d88e01e4219b", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.1", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "75ffa34ab1056b7e24844c90bfc62aaf6f3a37a15faa76b07bc5eba27e4a8b4a"}, + "metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm", "69b09adddc4f74a40716ae54d140f93beb0fb8978d8636eaded0c31b6f099f16"}, + "mime": {:hex, :mime, "1.5.0", "203ef35ef3389aae6d361918bf3f952fa17a09e8e43b5aa592b93eba05d0fb8d", [:mix], [], "hexpm", "55a94c0f552249fc1a3dd9cd2d3ab9de9d3c89b559c2bd01121f824834f24746"}, + "mimerl": {:hex, :mimerl, "1.2.0", "67e2d3f571088d5cfd3e550c383094b47159f3eee8ffa08e64106cdf5e981be3", [:rebar3], [], "hexpm", "f278585650aa581986264638ebf698f8bb19df297f66ad91b18910dfc6e19323"}, + "mochiweb": {:hex, :mochiweb, "2.20.1", "e4dbd0ed716f076366ecf62ada5755a844e1d95c781e8c77df1d4114be868cdf", [:rebar3], [], "hexpm", "d1aeee7870470d2fa9eae0b3d5ab6c33801aa2d82b10e9dade885c5c921b36aa"}, + "nimble_parsec": {:hex, :nimble_parsec, "1.1.0", "3a6fca1550363552e54c216debb6a9e95bd8d32348938e13de5eda962c0d7f89", [:mix], [], "hexpm", "08eb32d66b706e913ff748f11694b17981c0b04a33ef470e33e11b3d3ac8f54b"}, + "parse_trans": {:hex, :parse_trans, "3.3.0", "09765507a3c7590a784615cfd421d101aec25098d50b89d7aa1d66646bc571c1", [:rebar3], [], "hexpm", "17ef63abde837ad30680ea7f857dd9e7ced9476cdd7b0394432af4bfc241b960"}, + "plug": {:hex, :plug, "1.11.0", "f17217525597628298998bc3baed9f8ea1fa3f1160aa9871aee6df47a6e4d38e", [:mix], [{:mime, "~> 1.0", [hex: :mime, repo: "hexpm", optional: false]}, {:plug_crypto, "~> 1.1.1 or ~> 1.2", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "2d9c633f0499f9dc5c2fd069161af4e2e7756890b81adcbb2ceaa074e8308876"}, + "plug_cowboy": {:hex, :plug_cowboy, "2.4.1", "779ba386c0915027f22e14a48919a9545714f849505fa15af2631a0d298abf0f", [:mix], [{:cowboy, "~> 2.7", [hex: :cowboy, repo: "hexpm", optional: false]}, {:cowboy_telemetry, "~> 0.3", [hex: :cowboy_telemetry, repo: "hexpm", optional: false]}, {:plug, "~> 1.7", [hex: :plug, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "d72113b6dff7b37a7d9b2a5b68892808e3a9a752f2bf7e503240945385b70507"}, + "plug_crypto": {:hex, :plug_crypto, "1.2.0", "1cb20793aa63a6c619dd18bb33d7a3aa94818e5fd39ad357051a67f26dfa2df6", [:mix], [], "hexpm", "a48b538ae8bf381ffac344520755f3007cc10bd8e90b240af98ea29b69683fc2"}, + "ranch": {:hex, :ranch, "1.7.1", "6b1fab51b49196860b733a49c07604465a47bdb78aa10c1c16a3d199f7f8c881", [:rebar3], [], "hexpm", "451d8527787df716d99dc36162fca05934915db0b6141bbdac2ea8d3c7afc7d7"}, + "ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.6", "cf344f5692c82d2cd7554f5ec8fd961548d4fd09e7d22f5b62482e5aeaebd4b0", [:make, :mix, :rebar3], [], "hexpm", "bdb0d2471f453c88ff3908e7686f86f9be327d065cc1ec16fa4540197ea04680"}, + "telemetry": {:hex, :telemetry, "0.4.2", "2808c992455e08d6177322f14d3bdb6b625fbcfd233a73505870d8738a2f4599", [:rebar3], [], "hexpm", "2d1419bd9dda6a206d7b5852179511722e2b18812310d304620c7bd92a13fcef"}, + "unicode_util_compat": {:hex, :unicode_util_compat, "0.5.0", "8516502659002cec19e244ebd90d312183064be95025a319a6c7e89f4bccd65b", [:rebar3], [], "hexpm", "d48d002e15f5cc105a696cf2f1bbb3fc72b4b770a184d8420c8db20da2674b38"}, } From 53e731d409828c1c303ca447cc2e758f9dcaa180 Mon Sep 17 00:00:00 2001 From: Neil Berkman Date: Sun, 29 Nov 2020 23:52:23 -0800 Subject: [PATCH 5/6] Fix Floki warning --- lib/furlex/parser.ex | 46 ++++++++++++++++++++---------------- lib/furlex/parser/html.ex | 20 +++++++++------- lib/furlex/parser/json_ld.ex | 20 +++++++++------- 3 files changed, 47 insertions(+), 39 deletions(-) diff --git a/lib/furlex/parser.ex b/lib/furlex/parser.ex index 362b310..be24534 100644 --- a/lib/furlex/parser.ex +++ b/lib/furlex/parser.ex @@ -19,33 +19,37 @@ defmodule Furlex.Parser do end def extract(tag, html, match) do - case Floki.find(html, match.(tag)) do - nil -> - nil - - elements -> - content = - case do_extract_content(elements) do - [] -> nil - [element] -> element - content -> content - end - - {tag, content} + with {:ok, document} <- Floki.parse_document(html) do + case Floki.find(document, match.(tag)) do + nil -> + nil + + elements -> + content = + case do_extract_content(elements) do + [] -> nil + [element] -> element + content -> content + end + + {tag, content} + end end end @doc "Extracts a canonical url from the given raw HTML" @spec extract_canonical(String.t()) :: nil | String.t() def extract_canonical(html) do - case Floki.find(html, "link[rel=\"canonical\"]") do - [] -> - nil - - elements -> - elements - |> Floki.attribute("href") - |> Enum.at(0) + with {:ok, document} <- Floki.parse_document(html) do + case Floki.find(document, "link[rel=\"canonical\"]") do + [] -> + nil + + elements -> + elements + |> Floki.attribute("href") + |> Enum.at(0) + end end end diff --git a/lib/furlex/parser/html.ex b/lib/furlex/parser/html.ex index 9a3ccbc..f6f45f9 100644 --- a/lib/furlex/parser/html.ex +++ b/lib/furlex/parser/html.ex @@ -5,17 +5,19 @@ defmodule Furlex.Parser.HTML do @spec parse(String.t()) :: nil | {:ok, Map.t()} def parse(html) do - case Floki.find(html, "meta[name]") do - nil -> - {:ok, %{}} + with {:ok, document} <- Floki.parse_document(html) do + case Floki.find(document, "meta[name]") do + nil -> + {:ok, %{}} - elements -> - content = - elements - |> filter_other() - |> Enum.reduce(%{}, &to_map/2) + elements -> + content = + elements + |> filter_other() + |> Enum.reduce(%{}, &to_map/2) - {:ok, content} + {:ok, content} + end end end diff --git a/lib/furlex/parser/json_ld.ex b/lib/furlex/parser/json_ld.ex index 7000dbe..d136f4a 100644 --- a/lib/furlex/parser/json_ld.ex +++ b/lib/furlex/parser/json_ld.ex @@ -7,17 +7,19 @@ defmodule Furlex.Parser.JsonLD do def parse(html) do meta = "script[type=\"application/ld+json\"]" - case Floki.find(html, meta) do - nil -> - {:ok, []} + with {:ok, document} <- Floki.parse_document(html) do + case Floki.find(document, meta) do + nil -> + {:ok, []} - elements -> - json_ld = - elements - |> Enum.map(&decode/1) - |> List.flatten() + elements -> + json_ld = + elements + |> Enum.map(&decode/1) + |> List.flatten() - {:ok, json_ld} + {:ok, json_ld} + end end end From 57b7978c7b7aafa449bc2f1a1034f1b6fe92c801 Mon Sep 17 00:00:00 2001 From: Neil Berkman Date: Mon, 30 Nov 2020 00:12:53 -0800 Subject: [PATCH 6/6] Fix deprecation warning --- lib/furlex.ex | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/lib/furlex.ex b/lib/furlex.ex index 56c9392..d1b4d65 100644 --- a/lib/furlex.ex +++ b/lib/furlex.ex @@ -33,14 +33,12 @@ defmodule Furlex do @doc false def start(_type, _args) do - import Supervisor.Spec - - opts = [strategy: :one_for_one, name: Furlex.Supervisor] - children = [ - worker(Furlex.Oembed, [[name: Furlex.Oembed]]) + {Furlex.Oembed, [name: Furlex.Oembed]} ] + opts = [strategy: :one_for_one, name: Furlex.Supervisor] + Supervisor.start_link(children, opts) end