diff --git a/docs/advanced/index.md b/docs/advanced/index.md index f6178249ce..6b009b44c2 100644 --- a/docs/advanced/index.md +++ b/docs/advanced/index.md @@ -1,10 +1,16 @@ # Advanced User Guide -The **Advanced User Guide** is gradually growing, you can already read about some advanced topics. +The **Advanced User Guide** covers advanced topics and features of SQLModel. -At some point it will include: +Current topics include: -* How to use `async` and `await` with the async session. -* How to run migrations. -* How to combine **SQLModel** models with SQLAlchemy. +* [Working with Decimal Fields](decimal.md) - How to handle decimal numbers in SQLModel +* [Working with UUID Fields](uuid.md) - How to use UUID fields in your models +* [Storing Pydantic Models in JSONB Columns](pydantic-jsonb.md) - How to store and work with Pydantic models in JSONB columns + +Coming soon: + +* How to use `async` and `await` with the async session +* How to run migrations +* How to combine **SQLModel** models with SQLAlchemy * ...and more. 🤓 diff --git a/docs/advanced/pydantic-jsonb.md b/docs/advanced/pydantic-jsonb.md new file mode 100644 index 0000000000..9b89db23df --- /dev/null +++ b/docs/advanced/pydantic-jsonb.md @@ -0,0 +1,111 @@ +# Storing Pydantic Models in JSONB Columns + +You can store Pydantic models (and lists or dicts of them) in JSON or JSONB database columns using the `PydanticJSONB` utility. + +This is especially useful when: + +- You want to persist flexible, nested data structures in your models. +- You prefer to avoid separate relational tables for structured fields like metadata, config, or address. +- You want automatic serialization and deserialization using Pydantic. + +## Usage + +You can use it with SQLModel like this: + +```python +from typing import Optional +from pydantic import BaseModel +from sqlmodel import SQLModel, Field, Column +from sqlmodel.sql.sqltypes import PydanticJSONB + +class Address(BaseModel): + street: str + city: str + +class User(SQLModel, table=True): + id: Optional[int] = Field(default=None, primary_key=True) + name: str + address: Address = Field(sa_column=Column(PydanticJSONB(Address))) +``` + +This will store the `address` field as a `JSONB` column in PostgreSQL and automatically serialize/deserialize to and from the `Address` Pydantic model. + +If you're using a list or dict of models, `PydanticJSONB` supports that too: + +```python +Field(sa_column=Column(PydanticJSONB(List[SomeModel]))) +Field(sa_column=Column(PydanticJSONB(Dict[str, SomeModel]))) +``` + +## Create & Store Data + +Here's how to create and store data with Pydantic models in JSONB columns: + +```python +from sqlmodel import Session, create_engine + +engine = create_engine("postgresql://user:password@localhost/db") + +# Insert a User with an Address +with Session(engine) as session: + user = User( + name="John Doe", + address=Address(street="123 Main St", city="New York") + ) + session.add(user) + session.commit() +``` + +## Retrieve & Use Data + +When you retrieve the data, it's automatically converted back to a Pydantic model: + +```python +with Session(engine) as session: + user = session.query(User).first() + print(user.address.street) # "123 Main St" + print(user.address.city) # "New York" + print(type(user.address)) # +``` + +Result: +✅ No need for `Address(**user.address)` – it's already an `Address` instance! +✅ Automatic conversion between JSONB and Pydantic models. + +This simplifies handling structured data in SQLModel, making JSONB storage seamless and ergonomic. 🚀 + +## Requirements + +* PostgreSQL (for full `JSONB` support). +* Pydantic v2. +* SQLAlchemy 2.x. + +## Limitations + +### Nested Model Updates + +Currently, updating attributes inside a nested Pydantic model doesn't automatically trigger a database update. This is similar to how plain dictionaries work in SQLAlchemy. For example: + +```python +# This won't trigger a database update +row = select(...) # some MyTable row +row.data.x = 1 +db.add(row) # no effect, change isn't detected +``` + +To update nested model attributes, you need to reassign the entire model: + +```python +# Workaround: Create a new instance and reassign +updated = ExtraData(**row.data.model_dump()) +updated.x = 1 +row.data = updated +db.add(row) +``` + +This limitation will be addressed in a future update using `MutableDict` to enable change tracking for nested fields. The `MutableDict` implementation will emit change events when the contents of the dictionary are altered, including when values are added or removed. + +## Notes + +* Falls back to `JSON` if `JSONB` is not available. +* Only tested with PostgreSQL at the moment. diff --git a/mkdocs.yml b/mkdocs.yml index c59ccd245a..8e036fbfaf 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -128,6 +128,7 @@ nav: - advanced/index.md - advanced/decimal.md - advanced/uuid.md + - advanced/pydantic-jsonb.md - Resources: - resources/index.md - help.md diff --git a/sqlmodel/sql/sqltypes.py b/sqlmodel/sql/sqltypes.py index 512daacbab..01d828b919 100644 --- a/sqlmodel/sql/sqltypes.py +++ b/sqlmodel/sql/sqltypes.py @@ -1,16 +1,106 @@ -from typing import Any, cast +from typing import ( + Any, + Dict, + List, + Optional, + Type, + TypeVar, + Union, + cast, + get_args, + get_origin, +) +from pydantic import BaseModel +from pydantic_core import to_jsonable_python from sqlalchemy import types +from sqlalchemy.dialects.postgresql import JSONB # for Postgres JSONB from sqlalchemy.engine.interfaces import Dialect +BaseModelType = TypeVar("BaseModelType", bound=BaseModel) + +# Define a type alias for JSON-serializable values +JSONValue = Union[Dict[str, Any], List[Any], str, int, float, bool, None] + class AutoString(types.TypeDecorator): # type: ignore impl = types.String cache_ok = True mysql_default_length = 255 - def load_dialect_impl(self, dialect: Dialect) -> "types.TypeEngine[Any]": + def load_dialect_impl(self, dialect: Dialect) -> types.TypeEngine[Any]: impl = cast(types.String, self.impl) if impl.length is None and dialect.name == "mysql": return dialect.type_descriptor(types.String(self.mysql_default_length)) return super().load_dialect_impl(dialect) + + +class PydanticJSONB(types.TypeDecorator): # type: ignore + """Custom type to automatically handle Pydantic model serialization.""" + + impl = JSONB # use JSONB type in Postgres (fallback to JSON for others) + cache_ok = True # allow SQLAlchemy to cache results + + def __init__( + self, + model_class: Union[ + Type[BaseModelType], + Type[List[BaseModelType]], + Type[Dict[str, BaseModelType]], + ], + *args: Any, + **kwargs: Any, + ): + super().__init__(*args, **kwargs) + self.model_class = model_class # Pydantic model class to use + + def process_bind_param(self, value: Any, dialect: Any) -> JSONValue: # noqa: ANN401, ARG002, ANN001 + if value is None: + return None + if isinstance(value, BaseModel): + return value.model_dump(mode="json") + if isinstance(value, list): + return [ + m.model_dump(mode="json") + if isinstance(m, BaseModel) + else to_jsonable_python(m) + for m in value + ] + if isinstance(value, dict): + return { + k: v.model_dump(mode="json") + if isinstance(v, BaseModel) + else to_jsonable_python(v) + for k, v in value.items() + } + + # We know to_jsonable_python returns a JSON-serializable value, but mypy sees it as an Any type + return to_jsonable_python(value) # type: ignore[no-any-return] + + def process_result_value( + self, value: Any, dialect: Any + ) -> Optional[Union[BaseModelType, List[BaseModelType], Dict[str, BaseModelType]]]: # noqa: ANN401, ARG002, ANN001 + if value is None: + return None + if isinstance(value, dict): + # If model_class is a Dict type hint, handle key-value pairs + origin = get_origin(self.model_class) + if origin is dict: + model_class = get_args(self.model_class)[ + 1 + ] # Get the value type (the model) + return {k: model_class.model_validate(v) for k, v in value.items()} + # Regular case: the whole dict represents a single model + return self.model_class.model_validate(value) # type: ignore + if isinstance(value, list): + # If model_class is a List type hint + origin = get_origin(self.model_class) + if origin is list: + model_class = get_args(self.model_class)[0] + return [model_class.model_validate(v) for v in value] + # Fallback case (though this shouldn't happen given our __init__ types) + return [self.model_class.model_validate(v) for v in value] # type: ignore + + raise TypeError( + f"Unsupported type for PydanticJSONB from database: {type(value)}. Expected a dictionary or list." + )