Skip to content

momoa.engines.pydantic

[docs] module momoa.engines.pydantic

"""Pydantic v2 engine: compiles JSON Schema into pydantic.BaseModel subclasses.

Uses datamodel-code-generator to handle the full JSON Schema → Pydantic mapping,
including $ref, $defs, nested objects, arrays, enums, and composition keywords.

Known limitation: dynamically generated models are not pickle-able. Use
model_dump() / model_validate() for cross-process serialisation instead of pickle.
"""

from typing import Any

from datamodel_code_generator import GenerateConfig
from datamodel_code_generator.dynamic import generate_dynamic_models
from pydantic import BaseModel

from momoa.engines import EngineResult
from momoa.exceptions import SchemaCompileError

_CONFIG = GenerateConfig(formatters=[])


class PydanticEngine:
    """Compiles JSON Schema into pydantic.BaseModel subclasses."""

    @property
    def output_format(self) -> str:
        """Engine identifier."""
        return "pydantic"

    def context_labeller(self) -> None:
        """No URI labelling needed; datamodel-code-generator handles naming itself."""
        return None

    def compile(self, spec: dict[str, Any], *, root_name: str | None = None) -> EngineResult:
        """Compile a JSON Schema spec into BaseModel subclasses with .serialize()."""
        title = spec.get("title", "<schema>")
        try:
            raw = generate_dynamic_models(spec, config=_CONFIG, cache_size=1024)
        except Exception as ex:
            raise SchemaCompileError(title, str(ex)) from ex

        base_models = [cls for cls in raw.values() if issubclass(cls, BaseModel)]
        if not base_models:
            raise SchemaCompileError(title, "produced no model classes")

        models = tuple(_with_serialize(cls) for cls in base_models)

        if root_name is not None:
            root_candidates = [m for m in models if m.__name__ == root_name]
            if not root_candidates:
                available = ", ".join(m.__name__ for m in models)
                raise SchemaCompileError(
                    title, f"no model named {root_name!r}; available: {available}"
                )
            # Put the named root model last so EngineResult.model returns it
            models = (*[m for m in models if m.__name__ != root_name], root_candidates[0])

        return EngineResult(models=models)


def _with_serialize(cls: type[BaseModel]) -> type[BaseModel]:
    """Return a subclass of cls that adds a .serialize() method."""

    def serialize(self) -> dict[str, Any]:
        return self.model_dump(mode="json", exclude_unset=True)

    return type(cls.__name__, (cls,), {"serialize": serialize})