Skip to main content

Build image embedding model

Construct a neural network architecture to project high-dimensional image data into a lower-dimensional, dense vector representation (embedding) that preserves relevant semantic and visual information within a learned latent space.

!wget https://raw.githubusercontent.com/openai/CLIP/main/CLIP.png
image_path = "CLIP.png"

import torchvision.models as models
from torchvision import transforms
from superduper_torch import TorchModel

class TorchVisionEmbedding:
def __init__(self):
# Load the pre-trained ResNet-18 model
self.resnet = models.resnet18(pretrained=True)

# Set the model to evaluation mode
self.resnet.eval()

def preprocess(self, image):
# Preprocess the image
preprocess = preprocess = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
tensor_image = preprocess(image)
return tensor_image

embedding_model = TorchVisionEmbedding()
superdupermodel = TorchModel(identifier='my-vision-model-torch', object=embedding_model.resnet, preprocess=embedding_model.preprocess)
embedding_model.predict(Image.open(image_path))