Label Data with Teacher
Use the teacher model to automatically label your training data.
Prepare the Dataset
from seeme import Client
client = Client()
# Create dataset for distillation
dataset = client.create_dataset(
name="Distillation: Product Classifier",
description="Teacher-labeled data for model distillation",
task_type="image_classification"
)
version = client.create_dataset_version(
dataset_id=dataset.id,
name="v1"
)
# Create splits
train_split = client.create_split(version_id=version.id, name="train")
val_split = client.create_split(version_id=version.id, name="validation")
# Create labels (matching what teacher will predict)
label_names = ["good", "scratch", "dent", "discoloration", "crack"]
for name in label_names:
client.create_label(version_id=version.id, name=name)
print(f"Dataset ready: {dataset.id}")Upload Unlabeled Data
import os
import glob
import random
# Get all unlabeled images
image_paths = glob.glob("./unlabeled_images/**/*.jpg", recursive=True)
random.shuffle(image_paths)
print(f"Found {len(image_paths)} images to label")
# Split 80/20 for train/val
val_count = int(len(image_paths) * 0.2)
val_paths = image_paths[:val_count]
train_paths = image_paths[val_count:]
print(f"Training: {len(train_paths)}, Validation: {len(val_paths)}")
# Upload training images
print("Uploading training images...")
for i, path in enumerate(train_paths):
client.create_dataset_item(
version_id=version.id,
split_id=train_split.id,
file_path=path
)
if (i + 1) % 100 == 0:
print(f" Uploaded {i + 1}/{len(train_paths)}")
# Upload validation images
print("Uploading validation images...")
for i, path in enumerate(val_paths):
client.create_dataset_item(
version_id=version.id,
split_id=val_split.id,
file_path=path
)
if (i + 1) % 100 == 0:
print(f" Uploaded {i + 1}/{len(val_paths)}")
print("Upload complete!")Configure the Post-Processor
Set up the teacher model to automatically label uploads:
Monitor Labeling Progress
import time
def monitor_labeling(client, dataset_id, poll_interval=30):
"""Monitor post-processor labeling progress."""
print("Monitoring labeling progress...")
print("-" * 50)
while True:
# Get job counts
pending = client.get_post_processor_jobs(
dataset_id=dataset_id, status="pending"
)
processing = client.get_post_processor_jobs(
dataset_id=dataset_id, status="processing"
)
completed = client.get_post_processor_jobs(
dataset_id=dataset_id, status="completed"
)
failed = client.get_post_processor_jobs(
dataset_id=dataset_id, status="failed"
)
total = len(pending) + len(processing) + len(completed) + len(failed)
done = len(completed) + len(failed)
print(f"Progress: {done}/{total} "
f"(Completed: {len(completed)}, Failed: {len(failed)}, "
f"Processing: {len(processing)}, Pending: {len(pending)})")
if len(pending) == 0 and len(processing) == 0:
print("-" * 50)
print("Labeling complete!")
print(f" Successfully labeled: {len(completed)}")
print(f" Failed: {len(failed)}")
break
time.sleep(poll_interval)
return len(completed), len(failed)
# Monitor
completed, failed = monitor_labeling(client, dataset.id)Handle Failed Items
# Get failed jobs
failed_jobs = client.get_post_processor_jobs(
dataset_id=dataset.id,
status="failed"
)
if len(failed_jobs) > 0:
print(f"\n{len(failed_jobs)} items failed labeling:")
for job in failed_jobs[:10]:
print(f" Item {job.item_id}: {job.error}")
# Option 1: Retry failed jobs
print("\nRetrying failed jobs...")
for job in failed_jobs:
client.retry_post_processor_job(
dataset_id=dataset.id,
item_id=job.item_id,
job_id=job.id
)
# Option 2: Remove failed items
# for job in failed_jobs:
# client.delete_dataset_item(item_id=job.item_id)Check Label Distribution
# Get labeling statistics
stats = client.get_dataset_stats(version_id=version.id)
print("\nLabel Distribution:")
print("-" * 40)
total_labeled = sum(stats['label_counts'].values())
for label, count in sorted(stats['label_counts'].items(), key=lambda x: -x[1]):
pct = count / total_labeled * 100
bar = "█" * int(pct / 2)
print(f"{label:<15} {count:>6} ({pct:>5.1f}%) {bar}")
# Check for severe imbalance
counts = list(stats['label_counts'].values())
if max(counts) > 10 * min(counts):
print("\n⚠️ Warning: Severe class imbalance detected")
print(" Consider: weighted loss, oversampling, or more data")Review a Sample
Before training, review a sample of labels to catch teacher errors:
import random
# Get random sample for review
items = client.get_dataset_items(
version_id=version.id,
split_id=train_split.id,
limit=100
)
random.shuffle(items)
sample = items[:20]
print("\nSample items for review:")
print("Open these in the web UI to verify labels:")
print("-" * 60)
for item in sample:
annotations = client.get_annotations(item_id=item.id)
label = annotations[0].label_name if annotations else "UNLABELED"
confidence = annotations[0].confidence if annotations else 0
print(f" {item.name}: {label} ({confidence:.0%})")
print(f" → Review: https://app.seeme.ai/datasets/{dataset.id}/items/{item.id}")What to Look For
| Issue | Action |
|---|---|
| Wrong label | Correct it manually |
| Low confidence but correct | OK, keep it |
| Low confidence and wrong | Delete or correct |
| Ambiguous image | Consider removing |
| Pattern of errors | Adjust teacher prompt or threshold |
Confidence Analysis
# Analyze prediction confidence distribution
items = client.get_dataset_items(version_id=version.id, limit=1000)
confidences = []
for item in items:
annotations = client.get_annotations(item_id=item.id)
if annotations:
confidences.append(annotations[0].confidence)
# Distribution
import statistics
print("\nConfidence Distribution:")
print(f" Min: {min(confidences):.2%}")
print(f" Max: {max(confidences):.2%}")
print(f" Mean: {statistics.mean(confidences):.2%}")
print(f" Median: {statistics.median(confidences):.2%}")
# Count by confidence bucket
buckets = {"90-100%": 0, "80-90%": 0, "70-80%": 0, "<70%": 0}
for c in confidences:
if c >= 0.9:
buckets["90-100%"] += 1
elif c >= 0.8:
buckets["80-90%"] += 1
elif c >= 0.7:
buckets["70-80%"] += 1
else:
buckets["<70%"] += 1
print("\nConfidence Buckets:")
for bucket, count in buckets.items():
print(f" {bucket}: {count} ({count/len(confidences):.1%})")Adjust Confidence Threshold
If too many items have low confidence:
# Option 1: Lower threshold (more data, but potentially noisier)
client.update_post_processor(
processor_id=processor.id,
confidence_threshold=0.7 # Was 0.8
)
# Option 2: Keep high threshold, but re-run failed items with different prompt
# Create a second processor for low-confidence items
backup_processor = client.create_post_processor(
dataset_id=dataset.id,
name="Backup Teacher (lower threshold)",
model_type="llm",
model_id=llm_model.id,
prompt="[More detailed prompt with examples]",
confidence_threshold=0.6,
enabled=False # Enable manually for specific items
)Best Practices
- Start with high confidence threshold (0.8-0.9), lower if needed
- Review 5-10% of labels manually, especially edge cases
- Check class balance - severe imbalance affects student training
- Log teacher confidence - useful for debugging later
- Keep original images - don’t delete even if labeling fails
- Version your dataset - create a new version if you change labeling approach
Next Step
With your data labeled, proceed to Train Student to train the small model.