dh-tech · rlskoeser · Dec 19, 2025 · Feb 6, 2025 · Dec 18, 2025 · Dec 18, 2025
diff --git a/docs/undate/converters.rst b/docs/undate/converters.rst
@@ -4,10 +4,20 @@ Converters
 Overview
 --------
 
+
+.. automodule:: undate.converters
+
+-----
+
 .. automodule:: undate.converters.base
    :members:
    :undoc-members:
 
+
+.. autoclass:: undate.converters.combined.OmnibusDateConverter
+   :members:
+
+
 Formats
 --------
 
@@ -33,6 +43,8 @@ Extended Date-Time Format (EDTF)
 Calendars
 ---------
 
+.. automodule:: undate.converters.calendars
+
 Gregorian
 ^^^^^^^^^
 

diff --git a/src/undate/converters/__init__.py b/src/undate/converters/__init__.py
@@ -1 +1,29 @@
-from undate.converters.base import BaseDateConverter as BaseDateConverter
+"""
+Converter classes add support for parsing and serializing dates
+in a variety of formats. A subset of these are calendar converters
+(:mod:`undate.converters.calendar`), which means they support both parsing
+and conversion from an alternate calendar to a common Gregorian
+for comparison across dates.
+
+To parse a date with a supported converter, use the ``Undate`` class method
+:meth:`~undate.undate.Undate.parse` and specify the date as a string
+with the desired format or calendar, e.g.
+
+.. code-block::
+
+  Undate.parse("2001-05", "EDTF")
+  Undate.parse("7 Heshvan 5425", "Hebrew")
+
+For converters that support it, you can also serialize a date in a specified
+format with ``Undate`` class method :meth:`~undate.undate.Undate.format`:
+
+.. code-block::
+
+  Undate.parse("Rabīʿ ath-Thānī 343", "Islamic").format("EDTF")
+
+
+"""
+
+from undate.converters.base import BaseDateConverter, GRAMMAR_FILE_PATH
+
+__all__ = ["BaseDateConverter", "GRAMMAR_FILE_PATH"]
diff --git a/src/undate/converters/base.py b/src/undate/converters/base.py
@@ -44,6 +44,7 @@
 
 import importlib
 import logging
+import pathlib
 import pkgutil
 from functools import cache
 from typing import Dict, Type
@@ -53,6 +54,10 @@
 logger = logging.getLogger(__name__)
 
 
+#: Path to parser grammar files
+GRAMMAR_FILE_PATH = pathlib.Path(__file__).parent / "grammars"
+
+
 class BaseDateConverter:
     """Base class for parsing, formatting, and converting dates to handle
     specific formats and different calendars."""

diff --git a/src/undate/converters/calendars/hebrew/parser.py b/src/undate/converters/calendars/hebrew/parser.py
@@ -1,8 +1,8 @@
-import pathlib
-
 from lark import Lark
 
-grammar_path = pathlib.Path(__file__).parent / "hebrew.lark"
+from undate.converters import GRAMMAR_FILE_PATH
+
+grammar_path = GRAMMAR_FILE_PATH / "hebrew.lark"
 
 with open(grammar_path) as grammar:
     # NOTE: LALR parser is faster but can't be used to ambiguity between years and dates

diff --git a/src/undate/converters/calendars/hebrew/transformer.py b/src/undate/converters/calendars/hebrew/transformer.py
@@ -26,12 +26,13 @@ def hebrew_date(self, items):
 
         # initialize and return an undate with year, month, day and
         # configured calendar (hebrew by default)
+        # NOTE: use self.calendar so Seleucid can extend more easily
         return Undate(**parts, calendar=self.calendar)
 
-    # year translation is not needed since we want a tree with name year
-    # this is equivalent to a no-op
-    # def year(self, items):
-    #     return Tree(data="year", children=[items[0]])
+    def year(self, items):
+        # combine multiple parts into a single string
+        value = "".join([str(i) for i in items])
+        return Tree(data="year", children=[value])
 
     def month(self, items):
         # month has a nested tree for the rule and the value

diff --git a/src/undate/converters/calendars/islamic/parser.py b/src/undate/converters/calendars/islamic/parser.py
@@ -1,8 +1,8 @@
-import pathlib
-
 from lark import Lark
 
-grammar_path = pathlib.Path(__file__).parent / "islamic.lark"
+from undate.converters import GRAMMAR_FILE_PATH
+
+grammar_path = GRAMMAR_FILE_PATH / "islamic.lark"
 
 with open(grammar_path) as grammar:
     # NOTE: LALR parser is faster but can't be used due to ambiguity between years and days

diff --git a/src/undate/converters/calendars/islamic/transformer.py b/src/undate/converters/calendars/islamic/transformer.py
@@ -28,8 +28,17 @@ def islamic_date(self, items):
 
     # year translation is not needed since we want a tree with name year
     # this is equivalent to a no-op
-    # def year(self, items):
-    #     return Tree(data="year", children=[items[0]])
+    def year(self, items):
+        # combine multiple parts into a single string
+        # (for some reason we're getting an anonymous token in combined parser)
+        value = "".join([str(i) for i in items])
+        return Tree(data="year", children=[value])
+
+    def day(self, items):
+        # combine multiple parts into a single string
+        # (for some reason we're getting an anonymous token in combined parser)
+        value = "".join([str(i) for i in items])
+        return Tree(data="day", children=[value])
 
     def month(self, items):
         # month has a nested tree for the rule and the value

diff --git a/src/undate/converters/combined.py b/src/undate/converters/combined.py
@@ -0,0 +1,85 @@
+"""
+**Experimental** combined parser. Supports EDTF, Hebrew, and Hijri
+where dates are unambiguous. (Year-only dates are parsed as EDTF in
+Gregorian calendar.)
+"""
+
+from typing import Union
+
+from lark import Lark
+from lark.exceptions import UnexpectedCharacters
+from lark.visitors import Transformer, merge_transformers
+
+from undate import Undate, UndateInterval
+from undate.converters import BaseDateConverter, GRAMMAR_FILE_PATH
+from undate.converters.edtf.transformer import EDTFTransformer
+from undate.converters.calendars.hebrew.transformer import HebrewDateTransformer
+from undate.converters.calendars.islamic.transformer import IslamicDateTransformer
+
+
+class CombinedDateTransformer(Transformer):
+    def start(self, children):
+        # trigger the transformer for the appropriate part of the grammar
+        return children
+
+
+# NOTE: currently year-only dates in combined parser are interpreted as
+# EDTF and use Gregorian calendar.
+# In future, we could refine by adding calendar names & abbreviations
+# to the parser in order to recognize years from other calendars.
+
+combined_transformer = merge_transformers(
+    CombinedDateTransformer(),
+    edtf=EDTFTransformer(),
+    hebrew=HebrewDateTransformer(),
+    islamic=IslamicDateTransformer(),
+)
+
+
+# open based on filename so we can specify relative import path based on grammar file
+parser = Lark.open(
+    str(GRAMMAR_FILE_PATH / "combined.lark"), rel_to=__file__, strict=True
+)
+
+
+class OmnibusDateConverter(BaseDateConverter):
+    """
+    Combination parser that aggregates existing parser grammars.
+    Currently supports EDTF, Hebrew, and Hijri  where dates are unambiguous.
+    (Year-only dates are parsed as EDTF in Gregorian calendar.)
+
+    Does not support serialization.
+
+    Example usage::
+
+        Undate.parse("Tammuz 4816", "omnibus")
+
+    """
+
+    #: converter name: omnibus
+    name: str = "omnibus"
+
+    def __init__(self):
+        self.transformer = combined_transformer
+
+    def parse(self, value: str) -> Union[Undate, UndateInterval]:
+        """
+        Parse a string in a supported format and return an :class:`~undate.undate.Undate`
+        or :class:`~undate.undate.UndateInterval`.
+        """
+        if not value:
+            raise ValueError("Parsing empty/unset string is not supported")
+
+        # parse the input string, then transform to undate object
+        try:
+            parsetree = parser.parse(value)
+            # transform returns a list; we want the first item in the list
+            return self.transformer.transform(parsetree)[0]
+        except UnexpectedCharacters:
+            raise ValueError(
+                "Parsing failed: '%s' is not in a recognized date format" % value
+            )
+
+    def to_string(self, undate: Union[Undate, UndateInterval]) -> str:
+        "Not supported by this converter. Will raise :class:`ValueError`"
+        raise ValueError("Omnibus converter does not support serialization")
diff --git a/src/undate/converters/edtf/parser.py b/src/undate/converters/edtf/parser.py
@@ -1,8 +1,8 @@
-import pathlib
-
 from lark import Lark
 
-grammar_path = pathlib.Path(__file__).parent / "edtf.lark"
+from undate.converters import GRAMMAR_FILE_PATH
+
+grammar_path = GRAMMAR_FILE_PATH / "edtf.lark"
 
 with open(grammar_path) as grammar:
     edtf_parser = Lark(grammar.read(), start="edtf")
diff --git a/src/undate/converters/edtf/transformer.py b/src/undate/converters/edtf/transformer.py
@@ -66,7 +66,10 @@ def day_unspecified(self, items):
     def date_level1(self, items):
         return self.date(items)
 
-    # year (including negative years) use default transformation
+    def year(self, items):
+        # combine parts (numeric & unknown) into a single string
+        value = "".join(self.get_values(items))
+        return Tree(data="year", children=[value])
 
     def year_fivedigitsplus(self, items):
         # strip off the leading Y and convert to integer

diff --git a/src/undate/converters/grammars/combined.lark b/src/undate/converters/grammars/combined.lark
@@ -0,0 +1,32 @@
+%import common.WS
+%ignore WS
+
+start: (edtf__start | hebrew__hebrew_date  | islamic__islamic_date )
+
+// Renaming of the import variables is required, as they receive the namespace of this file.
+// See: https://github.com/lark-parser/lark/pull/973#issuecomment-907287565
+
+// All grammars are in the same file, so we can use relative imports
+
+// relative import from edtf.lark
+%import .edtf.edtf -> edtf__start
+
+// relative import from hebrew.lark
+%import .hebrew.hebrew_date -> hebrew__hebrew_date
+%import .hebrew.day  -> hebrew__day
+%import .hebrew.month -> hebrew__month
+%import .hebrew.year -> hebrew__year
+
+// relative import from islamic.lark
+%import .islamic.islamic_date -> islamic__islamic_date
+%import .islamic.day  -> islamic__day
+%import .islamic.month -> islamic__month
+%import .islamic.year -> islamic__year
+
+
+// override hebrew date to omit year-only, since year without calendar is ambiguous
+// NOTE: potentially support year with calendar label
+%override hebrew__hebrew_date: hebrew__day hebrew__month hebrew__year | hebrew__month hebrew__year 
+
+// same for islamic date, year alone is ambiguous
+%override islamic__islamic_date: islamic__day islamic__month islamic__year | islamic__month islamic__year 
diff --git a/src/undate/converters/edtf/edtf.lark → src/undate/converters/grammars/edtf.lark b/src/undate/converters/edtf/edtf.lark → src/undate/converters/grammars/edtf.lark
diff --git a/...e/converters/calendars/hebrew/hebrew.lark → src/undate/converters/grammars/hebrew.lark b/...e/converters/calendars/hebrew/hebrew.lark → src/undate/converters/grammars/hebrew.lark
@@ -11,23 +11,23 @@ hebrew_date: weekday? day month comma? year | month year | year
 // PGP dates use qualifiers like "first decade of" (for beginning of month)
 // "first third of", seasons (can look for more examples)
 
-// Hebrew calendar starts with year 1  in 3761 BCE
+// Hebrew calendar starts with year 1 in 3761 BCE
 year: /\d+/
 
 // months
 month: month_1
      | month_2
- 	 | month_3
- 	 | month_4
- 	 | month_5
- 	 | month_6
- 	 | month_7
- 	 | month_8
- 	 | month_9
- 	 | month_10
-  	 | month_11
-      | month_12
-      | month_13
+     | month_3
+     | month_4
+     | month_5
+     | month_6
+     | month_7
+     | month_8
+     | month_9
+     | month_10
+     | month_11
+     | month_12
+     | month_13
 // months have 29 or 30 days; we do not expect leading zeroes
 day: /[1-9]/ | /[12][0-9]/ | /30/
 

diff --git a/...converters/calendars/islamic/islamic.lark → src/undate/converters/grammars/islamic.lark b/...converters/calendars/islamic/islamic.lark → src/undate/converters/grammars/islamic.lark
diff --git a/tests/test_converters/test_combined_parser.py b/tests/test_converters/test_combined_parser.py
@@ -0,0 +1,54 @@
+import pytest
+
+from undate.converters.combined import parser, combined_transformer
+
+from undate import Undate, UndateInterval
+
+# test that valid dates can be parsed
+
+testcases = [
+    # EDTF
+    ("1984", Undate(1984)),
+    ("201X", Undate("201X")),
+    ("20XX", Undate("20XX")),
+    ("2004-XX", Undate(2004, "XX")),
+    ("1000/2000", UndateInterval(Undate(1000), Undate(2000))),
+    # Hebrew / Anno Mundi calendar
+    ("Tammuz 4816", Undate(4816, 4, calendar="Hebrew")),
+    # Islamic / Hijri calendar
+    ("Jumādā I 1243", Undate(1243, 5, calendar="Islamic")),
+    ("7 Jumādā I 1243", Undate(1243, 5, 7, calendar="Islamic")),
+    ("14 Rabīʿ I 901", Undate(901, 3, 14, calendar="Islamic")),
+]
+
+
+@pytest.mark.parametrize("date_string,expected", testcases)
+def test_transform(date_string, expected):
+    # test the transformer directly
+    transformer = combined_transformer
+    # parse the input string, then transform to undate object
+    parsetree = parser.parse(date_string)
+    # since the same unknown date is not considered strictly equal,
+    # compare object representations
+    transformed_date = transformer.transform(parsetree)
+    assert repr(transformed_date[0]) == repr(expected)
+
+
+@pytest.mark.parametrize("date_string,expected", testcases)
+def test_converter(date_string, expected):
+    # should work the same way when called through the converter class
+    assert repr(Undate.parse(date_string, "omnibus")) == repr(expected)
+
+
+def test_parse_errors():
+    # empty string not supported
+    with pytest.raises(ValueError, match="not supported"):
+        Undate.parse("", "omnibus")
+
+    with pytest.raises(ValueError, match="not in a recognized date format"):
+        Undate.parse("Monday 2023", "omnibus")
+
+
+def test_no_serialize():
+    with pytest.raises(ValueError, match="does not support"):
+        Undate("2022").format("omnibus")