Get useful sample data
- Text
- Text-Classification
- Image
- Image-Classification
- Video
- Audio
!curl -O https://superduperdb-public-demo.s3.amazonaws.com/text.json
import json
with open('text.json', 'r') as f:
data = json.load(f)
!curl -O https://superduperdb-public-demo.s3.amazonaws.com/text_classification.json
import json
with open("text_classification.json", "r") as f:
data = json.load(f)
num_classes = 2
!curl -O https://superduperdb-public-demo.s3.amazonaws.com/pdfs.zip && unzip -o pdfs.zip
import os
data = [f'pdfs/{x}' for x in os.listdir('./pdfs') if x.endswith('.pdf')]
!curl -O https://superduperdb-public-demo.s3.amazonaws.com/images.zip && unzip images.zip
import os
from PIL import Image
data = [f'images/{x}' for x in os.listdir('./images') if x.endswith(".png")][:200]
data = [ Image.open(path) for path in data]
!curl -O https://superduperdb-public-demo.s3.amazonaws.com/images_classification.zip && unzip images_classification.zip
import json
from PIL import Image
with open('images/images.json', 'r') as f:
data = json.load(f)
data = [{'x': Image.open(d['image_path']), 'y': d['label']} for d in data]
num_classes = 2
!curl -O https://superduperdb-public-demo.s3.amazonaws.com/videos.zip && unzip videos.zip
import os
data = [f'videos/{x}' for x in os.listdir('./videos')]
sample_datapoint = data[-1]
from superduper.ext.pillow import pil_image
chunked_model_datatype = pil_image
# !curl -O https://superduperdb-public-demo.s3.amazonaws.com/audio.zip && unzip audio.zip
import os
data = [f'audios/{x}' for x in os.listdir('./audio')]
sample_datapoint = data[-1]