Superduper encoding
Any Superduper class which derives from superduper.base.Base (including superduper.Component) is serializable
in a database friendly way.
Here is a representative example of how this works for all such classes:
from superduper import Component
import typing as t
import pprint
class MyClass(Component):
a: str
b: int
c: t.Dict
d: t.Callable
e: Component | None = None
my_instance_1 = MyClass('my_class', a='test', b=2, c={'testing': '123'}, d=lambda x: x + 1)
my_instance_2 = MyClass('my_class', a='test', b=2, c={'testing': '123'}, d=lambda x: x + 1, e=my_instance_1)
r = my_instance_2.encode()
print(r)
# {'identifier': 'my_class',
# 'upstream': None,
# 'a': 'test',
# 'b': 2,
# 'c': {'testing': '123'},
# 'd': '&:blob:c6fb8b5d832ad2dc83c2eb7c4ac8ab5899414929125a9f63cd06a800deaa1edf',
# 'e': '?MyClass:my_class',
# '_path': '__main__.MyClass',
# 'version': None,
# 'status': None,
# 'uuid': 'dbe131726b2b2fb896eb832b3fde10df',
# '_builds': {'MyClass:my_class': {'upstream': None,
# 'a': 'test',
# 'b': 2,
# 'c': {'testing': '123'},
# 'd': '&:blob:75e739227d94585271ae83c710bfdcf43993755d7be9d08cf4462b8a1bff9242',
# 'e': None,
# '_path': '__main__.MyClass',
# 'version': None,
# 'status': None,
# 'uuid': 'dbe131726b2b2fb896eb832b3fde10df'}},
# '_blobs': {'c6fb8b5d832ad2dc83c2eb7c4ac8ab5899414929125a9f63cd06a800deaa1edf': b'\x80\x04\x95\x04\x01\x00\x00\x00\x00\x00\x00\x8c\ndill._dill\x94\x8c\x10_create_function\x94\x93\x94(h\x00\x8c\x0c_create_code\x94\x93\x94(C\x00\x94K\x01K\x00K\x00K\x01K\x02KCC\x08|\x00d\x01\x17\x00S\x00\x94NK\x01\x86\x94)\x8c\x01x\x94\x85\x94\x8cN/var/folders/3h/p6qzszds1c7gtbmt_2qq0tvm0000gn/T/ipykernel_10359/1592532502.py\x94\x8c\x08<lambda>\x94K\x0eC\x02\x08\x00\x94))t\x94R\x94}\x94\x8c\x08__name__\x94\x8c\x08__main__\x94sh\x0bNNt\x94R\x94}\x94}\x94\x8c\x0f__annotations__\x94}\x94s\x86\x94b.',
# '75e739227d94585271ae83c710bfdcf43993755d7be9d08cf4462b8a1bff9242': b'\x80\x04\x95\x04\x01\x00\x00\x00\x00\x00\x00\x8c\ndill._dill\x94\x8c\x10_create_function\x94\x93\x94(h\x00\x8c\x0c_create_code\x94\x93\x94(C\x00\x94K\x01K\x00K\x00K\x01K\x02KCC\x08|\x00d\x01\x17\x00S\x00\x94NK\x01\x86\x94)\x8c\x01x\x94\x85\x94\x8cN/var/folders/3h/p6qzszds1c7gtbmt_2qq0tvm0000gn/T/ipykernel_10359/1592532502.py\x94\x8c\x08<lambda>\x94K\rC\x02\x08\x00\x94))t\x94R\x94}\x94\x8c\x08__name__\x94\x8c\x08__main__\x94sh\x0bNNt\x94R\x94}\x94}\x94\x8c\x0f__annotations__\x94}\x94s\x86\x94b.'},
# '_files': {}}
In this example you can see that superduper serializes the two nested components using their parameters and class path, and
unwraps the nested structure into a flat structure. Items which are not serializable using JSON are serialized as bytes
and saved separately in the output.
The developer can control this behaviour using the parameter annotations. To check the schema which superduper infers, developers can use:
print(MyClass.class_schema)
# identifier | upstream | a | b | c | d | e | version | status
# -----------+---------------+-----+-----+------+------+---------------+---------+-------
# str | ComponentList | str | int | JSON | Dill | ComponentType | int | str