Multimodal vector search - images
APPLY = False
from superduper import superduper
db = superduper('mongomock:///test_db')
Get useful sample data​
def getter():
import subprocess
subprocess.run([
'curl', '-O', 'https://superduperdb-public-demo.s3.amazonaws.com/images_classification.zip',
])
subprocess.run(['rm', '-rf', 'images'])
subprocess.run(['rm', '-rf', '__MACOSX'])
subprocess.run(['unzip', 'images_classification.zip'])
subprocess.run(['rm', 'images_classification.zip'])
import json
from PIL import Image
with open('images/images.json', 'r') as f:
data = json.load(f)
data = data[:100]
data = [{'img': Image.open(r['image_path'])} for r in data]
subprocess.run(['rm', '-rf', '__MACOSX'])
subprocess.run(['rm', '-rf', 'images'])
return data
if APPLY:
data = getter()
Build multimodal embedding models​
We define the output data type of a model as a vector for vector transformation.
from superduper.components.vector_index import sqlvector
output_datatype = sqlvector(shape=(1024,))
Then define two models, one for text embedding and one for image embedding.
import clip
from superduper import vector, imported
from superduper_torch import TorchModel
rn50 = imported(clip.load)('RN50', device='cpu')
compatible_model = TorchModel(
identifier='clip_text',
object=rn50[0],
preprocess=lambda x: clip.tokenize(x)[0],
postprocess=lambda x: x.tolist(),
datatype=output_datatype,
forward_method='encode_text',
)
embedding_model = TorchModel(
identifier='clip_image',
object=rn50[0].visual,
preprocess=rn50[1],
postprocess=lambda x: x.tolist(),
datatype=output_datatype,
)
Because we use multimodal models, we define different keys to specify which model to use for embedding calculations in the vector_index.
indexing_key = 'img'
compatible_key = 'text'
Create vector-index​
vector_index_name = 'my-vector-index'
from superduper import VectorIndex, Listener
vector_index = VectorIndex(
vector_index_name,
indexing_listener=Listener(
key=indexing_key,
select=db['docs'].select(),
model=embedding_model,
identifier='indexing-listener',
),
compatible_listener=Listener(
key=compatible_key,
model=compatible_model,
select=None,
identifier='compatible-listener',
)
)
from superduper import Application
application = Application(
'image-vector-search',
components=[vector_index],
)
if APPLY:
db.apply(application, force=True)
Add the data​
The order in which data is added is not important. However if your data requires a custom Schema
in order to work, it's easier to add the Application
first, and the data later. The advantage of this flexibility, is that once the Application
is installed, it's waiting for incoming data, so that the Application
is always up-to-date. This comes in particular handy with AI scenarios which need to respond to changing news.
if APPLY:
from superduper import Document
table_or_collection = db['docs']
ids = db.execute(table_or_collection.insert([Document(r) for r in data]))
Perform a vector search​
We can perform the vector searches using two types of data:
- Text: By text description, we can find images similar to the text description.
- Image: By using an image, we can find images similar to the provided image.
if APPLY:
item = Document({compatible_key: "Find a black dog."})
if APPLY:
from IPython.display import display
search_image = data[0]
display(search_image)
item = Document(search_image)
Once we have this search target, we can execute a search as follows.
if APPLY:
select = db['docs'].like(item, vector_index=vector_index_name, n=5).select()
results = list(db.execute(select))
from IPython.display import display
for result in results:
display(result[indexing_key])
Create a Template
​
from superduper import Template, Table, Schema
from superduper.components.dataset import RemoteData
from superduper_pillow import pil_image
template = Template(
'multimodal_image_search',
template=application,
default_table=Table(
'sample_multimodal_image_search',
schema=Schema(
'sample_multimodal_image_search/schema',
fields={'img': pil_image},
),
data=RemoteData('sample_images', getter=getter),
),
substitutions={'docs': 'table_name', 'cpu': 'device'},
types={
'device': {
'type': 'str',
'default': 'cpu',
},
'table_name': {
'type': 'str',
'default': 'sample_multimodal_image_search',
},
}
)
template.export('.')
template.template
vector_index.indexing_listener.select