Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 75 additions & 0 deletions pythonbible/fuzzy_match_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from __future__ import annotations

import re

NUMBER_WORD_DIGIT_MAP = {
"zero": "0",
"one": "1",
"two": "2",
"three": "3",
"four": "4",
"five": "5",
"six": "6",
"seven": "7",
"eight": "8",
"nine": "9",
"ten": "10",
"eleven": "11",
"twelve": "12",
"thirteen": "13",
"fourteen": "14",
"fifteen": "15",
"sixteen": "16",
"seventeen": "17",
"eighteen": "18",
"nineteen": "19",
"twenty": "20",
"thirty": "30",
"forty": "40",
"fifty": "50",
"sixty": "60",
"seventy": "70",
"eighty": "80",
"ninety": "90",
"hundred": "100",
"thousand": "1000",
"million": "1000000",
"billion": "1000000000",
}


def words_to_digits(text: str) -> str:
words = re.findall(r"[\w]+|[.,!?:;-]", text)
clean_text = []
current_number = []

for word in words:
if word.lower() in NUMBER_WORD_DIGIT_MAP:
current_number.append(NUMBER_WORD_DIGIT_MAP.get(word.lower(), ""))
continue

if word.lower() == "and":
continue

if current_number:
clean_text.append("".join(current_number))
current_number = []
clean_text.append(word)

if current_number:
clean_text.append("".join(current_number))

return " ".join(clean_text)


def clean_text_for_fuzzy_matching(text: str) -> str:
clean_text = text.replace(" chapter ", " ")
clean_text = clean_text.replace(", verses ", ": ")
clean_text = clean_text.replace(", verse ", ": ")
clean_text = clean_text.replace(" verses ", ": ")
clean_text = clean_text.replace(" verse ", ": ")
clean_text = clean_text.replace(" and ", ", ")
clean_text = clean_text.replace(" & ", ", ")
clean_text = clean_text.replace(" through ", "- ")
clean_text = clean_text.replace(" number ", " ")
return words_to_digits(clean_text)
6 changes: 6 additions & 0 deletions pythonbible/parser.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from __future__ import annotations

import re
from typing import Any
from typing import Match
from typing import Pattern

from pythonbible.books import Book
from pythonbible.fuzzy_match_util import clean_text_for_fuzzy_matching
from pythonbible.normalized_reference import NormalizedReference
from pythonbible.regular_expressions import SCRIPTURE_REFERENCE_REGULAR_EXPRESSION
from pythonbible.roman_numeral_util import convert_all_roman_numerals_to_integers
Expand All @@ -24,6 +26,7 @@
def get_references(
text: str,
book_groups: dict[str, tuple[Book, ...]] | None = None,
**kwargs: dict[str, Any],
) -> list[NormalizedReference]:
"""Search the text for scripture references.

Expand All @@ -43,6 +46,9 @@ def get_references(
clean_text: str = convert_all_roman_numerals_to_integers(text)
clean_text = clean_text.replace(HTML_NDASH, DASH).replace(HTML_MDASH, DASH)

if kwargs.get("fuzzy", False):
clean_text = clean_text_for_fuzzy_matching(clean_text)

for reference_match in re.finditer(
SCRIPTURE_REFERENCE_REGULAR_EXPRESSION,
clean_text,
Expand Down
6 changes: 5 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ per-file-ignores =
pythonbible/book_groups.py:WPS110,WPS115,WPS120,WPS437
pythonbible/books.py:WPS110,WPS114,WPS115,WPS120,WPS226,WPS317,WPS437
pythonbible/formatter.py:WPS118,WPS201,WPS204,WPS226,WPS336
pythonbible/parser.py:WPS232
pythonbible/fuzzy_match_util.py:WPS226
pythonbible/parser.py:WPS201,WPS232
pythonbible/roman_numeral_util.py:E741,WPS111,WPS121,WPS115
pythonbible/regular_expressions.py:WPS226
pythonbible/versions.py:WPS110,WPS114,WPS115,WPS120,WPS437
Expand All @@ -68,3 +69,6 @@ per-file-ignores =

exclude =
venv

[radon]
exclude = pythonbible/bible/asv/*.py,pythonbible/bible/kjv/*.py,venv/*
94 changes: 74 additions & 20 deletions tests/fuzzy_match_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,38 +6,50 @@
import pythonbible as bible


@pytest.mark.xfail(reason="fuzzy matching isn't fully supported yet")
def test_fuzzy_match_1() -> None:
"""Test fuzzy matching of references."""
fuzzy_match_input = (
"Second Timothy chapter two verses three and four says endure hardship"
)
expected = [bible.NormalizedReference(bible.Book.TIMOTHY_2, 2, 3, 2, 4, None)]
expected = [
bible.NormalizedReference(bible.Book.TIMOTHY_2, 2, 3, 2, 3, None),
bible.NormalizedReference(bible.Book.TIMOTHY_2, 2, 4, 2, 4, None),
]
actual = bible.get_references(
fuzzy_match_input,
fuzzy=True, # type: ignore[arg-type]
)

assert bible.get_references(fuzzy_match_input) == expected
assert actual == expected


@pytest.mark.xfail(reason="fuzzy matching isn't fully supported yet")
def test_fuzzy_match_2() -> None:
"""Test fuzzy matching of references."""
fuzzy_match_input = "If you read Ephesians four 17 through 32 all the ammunition"
expected = [bible.NormalizedReference(bible.Book.EPHESIANS, 4, 17, 4, 32, None)]
actual = bible.get_references(
fuzzy_match_input,
fuzzy=True, # type: ignore[arg-type]
)

assert bible.get_references(fuzzy_match_input) == expected
assert actual == expected


@pytest.mark.xfail(reason="fuzzy matching isn't fully supported yet")
def test_fuzzy_match_3() -> None:
"""Test fuzzy matching of references."""
fuzzy_match_input = (
"remember that powerful message of Paul in first Corinthians nine"
)
expected = [bible.NormalizedReference(bible.Book.CORINTHIANS_1, 9, 1, 9, 27, None)]
actual = bible.get_references(
fuzzy_match_input,
fuzzy=True, # type: ignore[arg-type]
)

assert bible.get_references(fuzzy_match_input) == expected
assert actual == expected


@pytest.mark.xfail(reason="fuzzy matching isn't fully supported yet")
def test_fuzzy_match_4() -> None:
"""Test fuzzy matching of references."""
fuzzy_match_input = (
Expand All @@ -46,38 +58,52 @@ def test_fuzzy_match_4() -> None:
)

expected = [bible.NormalizedReference(bible.Book.MATTHEW, 5, 1, 7, 29, None)]
actual = bible.get_references(
fuzzy_match_input,
fuzzy=True, # type: ignore[arg-type]
)

assert bible.get_references(fuzzy_match_input) == expected
assert actual == expected


@pytest.mark.xfail(reason="fuzzy matching isn't fully supported yet")
def test_fuzzy_match_5() -> None:
"""Test fuzzy matching of references."""
fuzzy_match_input = "Jesus said over in Matthew chapter six, verse number 12"
expected = [bible.NormalizedReference(bible.Book.MATTHEW, 6, 12, 6, 12, None)]
actual = bible.get_references(
fuzzy_match_input,
fuzzy=True, # type: ignore[arg-type]
)

assert bible.get_references(fuzzy_match_input) == expected
assert actual == expected


@pytest.mark.xfail(reason="fuzzy matching isn't fully supported yet")
def test_fuzzy_match_6() -> None:
"""Test fuzzy matching of references."""
fuzzy_match_input = "Genesis four, 25."
expected = [bible.NormalizedReference(bible.Book.GENESIS, 4, 25, 4, 25, None)]
actual = bible.get_references(
fuzzy_match_input,
fuzzy=True, # type: ignore[arg-type]
)

assert bible.get_references(fuzzy_match_input) == expected
assert actual == expected


@pytest.mark.xfail(reason="fuzzy matching isn't fully supported yet")
def test_fuzzy_match_7() -> None:
"""Test fuzzy matching of references."""
fuzzy_match_input = "and forth between Haggai two and Ezra three."
expected = [
bible.NormalizedReference(bible.Book.HAGGAI, 2, 1, 2, 23, None),
bible.NormalizedReference(bible.Book.EZRA, 3, 1, 3, 13, None),
]
actual = bible.get_references(
fuzzy_match_input,
fuzzy=True, # type: ignore[arg-type]
)

assert bible.get_references(fuzzy_match_input) == expected
assert actual == expected


@pytest.mark.xfail(reason="fuzzy matching isn't fully supported yet")
Expand All @@ -88,8 +114,12 @@ def test_fuzzy_match_8() -> None:
bible.NormalizedReference(bible.Book.JOHN, 1, 15, 1, 15, None),
bible.NormalizedReference(bible.Book.JOHN, 1, 30, 1, 30, None),
]
actual = bible.get_references(
fuzzy_match_input,
fuzzy=True, # type: ignore[arg-type]
)

assert bible.get_references(fuzzy_match_input) == expected
assert actual == expected


@pytest.mark.xfail(reason="fuzzy matching isn't fully supported yet")
Expand All @@ -100,8 +130,12 @@ def test_fuzzy_match_9() -> None:
"verses through chapter four, verse one."
)
expected = [bible.NormalizedReference(bible.Book.COLOSSIANS, 3, 22, 4, 1, None)]
actual = bible.get_references(
fuzzy_match_input,
fuzzy=True, # type: ignore[arg-type]
)

assert bible.get_references(fuzzy_match_input) == expected
assert actual == expected


def test_fuzzy_match_10() -> None:
Expand All @@ -117,8 +151,12 @@ def test_fuzzy_match_11() -> None:
"""Test fuzzy matching of references."""
fuzzy_match_input = "says in Mark 16 10 that the disciples were"
expected = [bible.NormalizedReference(bible.Book.MARK, 16, 10, 16, 10, None)]
actual = bible.get_references(
fuzzy_match_input,
fuzzy=True, # type: ignore[arg-type]
)

assert bible.get_references(fuzzy_match_input) == expected
assert actual == expected


@pytest.mark.xfail(reason="fuzzy matching isn't fully supported yet")
Expand All @@ -145,8 +183,12 @@ def test_fuzzy_match_13() -> None:
expected = [
bible.NormalizedReference(bible.Book.CORINTHIANS_1, 14, 34, 14, 35, None),
]
actual = bible.get_references(
fuzzy_match_input,
fuzzy=True, # type: ignore[arg-type]
)

assert bible.get_references(fuzzy_match_input) == expected
assert actual == expected


@pytest.mark.xfail(reason="fuzzy matching isn't fully supported yet")
Expand All @@ -158,8 +200,12 @@ def test_fuzzy_match_14() -> None:
bible.NormalizedReference(bible.Book.GENESIS, 2, 7, 2, 7, None),
bible.NormalizedReference(bible.Book.GENESIS, 21, 22, 21, 22, None),
]
actual = bible.get_references(
fuzzy_match_input,
fuzzy=True, # type: ignore[arg-type]
)

assert bible.get_references(fuzzy_match_input) == expected
assert actual == expected


@pytest.mark.xfail(reason="fuzzy matching isn't fully supported yet")
Expand All @@ -169,8 +215,12 @@ def test_fuzzy_match_15() -> None:
"look in Revelations 21, 1 through 7, you can start reading all about"
)
expected = [bible.NormalizedReference(bible.Book.REVELATION, 21, 1, 21, 7, None)]
actual = bible.get_references(
fuzzy_match_input,
fuzzy=True, # type: ignore[arg-type]
)

assert bible.get_references(fuzzy_match_input) == expected
assert actual == expected


def test_fuzzy_match_16() -> None:
Expand All @@ -186,5 +236,9 @@ def test_fuzzy_match_17() -> None:
"""Test fuzzy matching of references."""
fuzzy_match_input = "for one another Galatians 6 1 & 2 clearly gives us"
expected = [bible.NormalizedReference(bible.Book.GALATIANS, 6, 1, 6, 2, None)]
actual = bible.get_references(
fuzzy_match_input,
fuzzy=True, # type: ignore[arg-type]
)

assert bible.get_references(fuzzy_match_input) == expected
assert actual == expected