Serde: add unit tests for all serialization and deserialization logic

This commit is contained in:
Johannes Kirschbauer
2024-07-26 14:22:07 +02:00
parent 76e192bd49
commit d7b6fc16a4
6 changed files with 372 additions and 216 deletions

View File

@@ -1,141 +1,22 @@
import dataclasses
import json
from collections.abc import Callable
from dataclasses import dataclass, fields, is_dataclass
from dataclasses import dataclass
from functools import wraps
from inspect import Parameter, Signature, signature
from pathlib import Path
from types import UnionType
from typing import (
Annotated,
Any,
Generic,
Literal,
TypeVar,
get_args,
get_origin,
get_type_hints,
)
from .serde import dataclass_to_dict, from_dict, sanitize_string
__all__ = ["from_dict", "dataclass_to_dict", "sanitize_string"]
from clan_cli.errors import ClanError
def sanitize_string(s: str) -> str:
# Using the native string sanitizer to handle all edge cases
# Remove the outer quotes '"string"'
return json.dumps(s)[1:-1]
def dataclass_to_dict(obj: Any) -> Any:
"""
Utility function to convert dataclasses to dictionaries
It converts all nested dataclasses, lists, tuples, and dictionaries to dictionaries
It does NOT convert member functions.
"""
if is_dataclass(obj):
return {
# Use either the original name or name
sanitize_string(
field.metadata.get("original_name", field.name)
): dataclass_to_dict(getattr(obj, field.name))
for field in fields(obj) # type: ignore
}
elif isinstance(obj, list | tuple):
return [dataclass_to_dict(item) for item in obj]
elif isinstance(obj, dict):
return {sanitize_string(k): dataclass_to_dict(v) for k, v in obj.items()}
elif isinstance(obj, Path):
return sanitize_string(str(obj))
elif isinstance(obj, str):
return sanitize_string(obj)
else:
return obj
def is_union_type(type_hint: type) -> bool:
return type(type_hint) is UnionType
def get_inner_type(type_hint: type) -> type:
if is_union_type(type_hint):
# Return the first non-None type
return next(t for t in get_args(type_hint) if t is not type(None))
return type_hint
def get_second_type(type_hint: type[dict]) -> type:
"""
Get the value type of a dictionary type hint
"""
args = get_args(type_hint)
if len(args) == 2:
# Return the second argument, which should be the value type (Machine)
return args[1]
raise ValueError(f"Invalid type hint for dict: {type_hint}")
def from_dict(t: type, data: dict[str, Any] | None) -> Any:
"""
Dynamically instantiate a data class from a dictionary, handling nested data classes.
"""
if data is None:
return None
try:
# Attempt to create an instance of the data_class
field_values = {}
for field in fields(t):
original_name = field.metadata.get("original_name", field.name)
field_value = data.get(original_name)
field_type = get_inner_type(field.type) # type: ignore
if original_name in data:
# If the field is another dataclass, recursively instantiate it
if is_dataclass(field_type):
field_value = from_dict(field_type, field_value)
elif isinstance(field_type, Path | str) and isinstance(
field_value, str
):
field_value = (
Path(field_value) if field_type == Path else field_value
)
elif get_origin(field_type) is dict and isinstance(field_value, dict):
# The field is a dictionary with a specific type
inner_type = get_second_type(field_type)
field_value = {
k: from_dict(inner_type, v) for k, v in field_value.items()
}
elif get_origin is list and isinstance(field_value, list):
# The field is a list with a specific type
inner_type = get_args(field_type)[0]
field_value = [from_dict(inner_type, v) for v in field_value]
# Set the value
if (
field.default is not dataclasses.MISSING
or field.default_factory is not dataclasses.MISSING
):
# Fields with default value
# a: Int = 1
# b: list = Field(default_factory=list)
if original_name in data or field_value is not None:
field_values[field.name] = field_value
else:
# Fields without default value
# a: Int
field_values[field.name] = field_value
return t(**field_values)
except (TypeError, ValueError) as e:
print(f"Failed to instantiate {t.__name__}: {e} {data}")
return None
T = TypeVar("T")
ResponseDataType = TypeVar("ResponseDataType")

View File

@@ -0,0 +1,101 @@
"""
This module provides utility functions for serialization and deserialization of data classes.
Functions:
- sanitize_string(s: str) -> str: Ensures a string is properly escaped for json serializing.
- dataclass_to_dict(obj: Any) -> Any: Converts a data class and its nested data classes, lists, tuples, and dictionaries to dictionaries.
- from_dict(t: type[T], data: Any) -> T: Dynamically instantiates a data class from a dictionary, constructing nested data classes, validates all required fields exist and have the expected type.
Classes:
- TypeAdapter: A Pydantic type adapter for data classes.
Exceptions:
- ValidationError: Raised when there is a validation error during deserialization.
- ClanError: Raised when there is an error during serialization or deserialization.
Dependencies:
- dataclasses: Provides the @dataclass decorator and related functions for creating data classes.
- json: Provides functions for working with JSON data.
- collections.abc: Provides abstract base classes for collections.
- functools: Provides functions for working with higher-order functions and decorators.
- inspect: Provides functions for inspecting live objects.
- operator: Provides functions for working with operators.
- pathlib: Provides classes for working with filesystem paths.
- types: Provides functions for working with types.
- typing: Provides support for type hints.
- pydantic: A library for data validation and settings management.
- pydantic_core: Core functionality for Pydantic.
Note: This module assumes the presence of other modules and classes such as `ClanError` and `ErrorDetails` from the `clan_cli.errors` module.
"""
import json
from dataclasses import dataclass, fields, is_dataclass
from pathlib import Path
from typing import (
Any,
TypeVar,
)
from pydantic import TypeAdapter, ValidationError
from pydantic_core import ErrorDetails
from clan_cli.errors import ClanError
def sanitize_string(s: str) -> str:
# Using the native string sanitizer to handle all edge cases
# Remove the outer quotes '"string"'
return json.dumps(s)[1:-1]
def dataclass_to_dict(obj: Any) -> Any:
"""
Utility function to convert dataclasses to dictionaries
It converts all nested dataclasses, lists, tuples, and dictionaries to dictionaries
It does NOT convert member functions.
"""
if is_dataclass(obj):
return {
# Use either the original name or name
sanitize_string(
field.metadata.get("original_name", field.name)
): dataclass_to_dict(getattr(obj, field.name))
for field in fields(obj)
if not field.name.startswith("_") # type: ignore
}
elif isinstance(obj, list | tuple):
return [dataclass_to_dict(item) for item in obj]
elif isinstance(obj, dict):
return {sanitize_string(k): dataclass_to_dict(v) for k, v in obj.items()}
elif isinstance(obj, Path):
return sanitize_string(str(obj))
elif isinstance(obj, str):
return sanitize_string(obj)
else:
return obj
T = TypeVar("T", bound=dataclass) # type: ignore
def from_dict(t: type[T], data: Any) -> T:
"""
Dynamically instantiate a data class from a dictionary, handling nested data classes.
We use dataclasses. But the deserialization logic of pydantic takes a lot of complexity.
"""
adapter = TypeAdapter(t)
try:
return adapter.validate_python(data)
except ValidationError as e:
fst_error: ErrorDetails = e.errors()[0]
if not fst_error:
raise ClanError(msg=str(e))
msg = fst_error.get("msg")
loc = fst_error.get("loc")
field_path = "Unknown"
if loc:
field_path = str(loc)
raise ClanError(msg=msg, location=f"{t!s}: {field_path}", description=str(e))