Skip to content

Commit

Permalink
ny cites (#7)
Browse files Browse the repository at this point in the history
* failing test

* test passes

* refactor

* ficx

* refactor

* scoping

* docs

* narrower specs

* narrower specs

* refactor

* refactor

* refactor: style

* refactor: remove maybe unneeded guards
  • Loading branch information
dogweather authored Nov 13, 2023
1 parent c8d5bb4 commit cb8ba80
Show file tree
Hide file tree
Showing 3 changed files with 592 additions and 20 deletions.
37 changes: 19 additions & 18 deletions lib/news_util.ex
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ defmodule NewsUtil do
@doc """
Find citations in a string of HTML or from a URL.
"""
@spec find_citations(URI.t()) :: list()
@spec find_citations(URI.t) :: [binary]
def find_citations(%URI{} = uri) do
url = URI.to_string(uri)
temp_file = FileUtil.tmp_file!(url)
Expand All @@ -21,7 +21,7 @@ defmodule NewsUtil do
end


@spec find_citations_in_file(binary()) :: list()
@spec find_citations_in_file(binary) :: [binary]
def find_citations_in_file(path) do
case Path.extname(path) do
".pdf" -> find_citations_in_html(FileUtil.read_pdf_as_html!(path))
Expand All @@ -30,8 +30,8 @@ defmodule NewsUtil do
end


@spec find_citations_in_html(binary()) :: list()
defp find_citations_in_html(html) when is_binary(html) do
@spec find_citations_in_html(binary) :: [binary]
defp find_citations_in_html(html) do
cites_from_hrefs =
html
|> uri_list()
Expand All @@ -48,8 +48,8 @@ defmodule NewsUtil do
end


@spec uri_list(binary()) :: list()
def uri_list(html) when is_binary(html) do
@spec uri_list(binary) :: [URI.t]
defp uri_list(html) do
{:ok, document} = Floki.parse_document(html)

document
Expand All @@ -59,47 +59,48 @@ defmodule NewsUtil do
end


@spec transform(URI.t()) :: nil | binary()
def transform(%URI{} = url) do
@spec transform(URI.t) :: nil | binary
defp transform(%URI{} = url) do
case url do
%{host: "leginfo.legislature.ca.gov"} -> leginfo_url_to_cite(url)
%{host: "texas.public.law"} -> texas_public_law_url_to_cite(url)

%{host: "newyork.public.law"} -> public_law_url_to_cite(url)
%{host: "texas.public.law"} -> public_law_url_to_cite(url)
_ -> nil
end
end


@spec cleanup_list(any()) :: list()
def cleanup_list(list) do
@spec cleanup_list(list) :: list
defp cleanup_list(list) do
list
|> sort()
|> uniq()
end


@spec texas_public_law_url_to_cite(URI.t()) :: binary()
def texas_public_law_url_to_cite(%URI{path: path}) do
@spec public_law_url_to_cite(URI.t) :: binary
defp public_law_url_to_cite(%URI{path: path}) do
path
|> String.split("/")
|> last()
|> String.replace("_", " ")
|> String.split(" ")
|> map(&String.capitalize/1)
|> join(" ")
|> String.replace("N.y.", "N.Y.")
end


@spec leginfo_url_to_cite(URI.t()) :: binary()
def leginfo_url_to_cite(%URI{query: query}) do
@spec leginfo_url_to_cite(URI.t) :: binary
defp leginfo_url_to_cite(%URI{query: query}) do
query
|> URI.decode_query()
|> make_cite_to_cal_codes()
end


@spec make_cite_to_cal_codes(map()) :: binary()
def make_cite_to_cal_codes(%{"lawCode" => code, "sectionNum" => section}) do
@spec make_cite_to_cal_codes(map) :: binary
defp make_cite_to_cal_codes(%{"lawCode" => code, "sectionNum" => section}) do
"CA #{code_to_abbrev(code)} Section #{section}"
|> String.replace_suffix(".", "")
end
Expand Down

Large diffs are not rendered by default.

17 changes: 15 additions & 2 deletions test/news_util_test.exs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import NewsUtil

defmodule NewsUtilTest do
@moduledoc false
use ExUnit.Case
doctest NewsUtil

Expand All @@ -14,12 +15,14 @@ defmodule NewsUtilTest do
assert find_citations_in_file(filename) == ["CA Educ Code Section 47605", "CA Educ Code Section 47605.6"]
end


test "California citations when the leginfo links have reversed params" do
filename = fixture("qandasec6.asp")
assert find_citations_in_file(filename) == ["CA Educ Code Section 47605"]
end

test "Texas citations when they're in public.law links" do

test "Texas links to public.law" do
filename = fixture("Formal Marriage License | Fort Bend County.html")

assert find_citations_in_file(filename) == [
Expand All @@ -29,12 +32,22 @@ defmodule NewsUtilTest do
]
end

test "Colorado CRS citations in a PDF" do

test "Colorado plain-text CRS citations in a PDF" do
filename = fixture("JDF432.pdf")

assert find_citations_in_file(filename) == [
"C.R.S. 13-15-101",
"C.R.S. 13-15-102",
]
end


test "NY links to public.law" do
filename = fixture("Potential expulsions for SUNY and CUNY students convicted of hate crimes, amidst surge in antisemitic incidents _ WRGB.html")

assert find_citations_in_file(filename) == [
"N.Y. Penal Law Section 485.05"
]
end
end

0 comments on commit cb8ba80

Please sign in to comment.