Label Data with Teacher

Label Data with Teacher

Use the teacher model to automatically label your training data.

Prepare the Dataset

from seeme import Client

client = Client()

# Create dataset for distillation
dataset = client.create_dataset(
    name="Distillation: Product Classifier",
    description="Teacher-labeled data for model distillation",
    task_type="image_classification"
)

version = client.create_dataset_version(
    dataset_id=dataset.id,
    name="v1"
)

# Create splits
train_split = client.create_split(version_id=version.id, name="train")
val_split = client.create_split(version_id=version.id, name="validation")

# Create labels (matching what teacher will predict)
label_names = ["good", "scratch", "dent", "discoloration", "crack"]
for name in label_names:
    client.create_label(version_id=version.id, name=name)

print(f"Dataset ready: {dataset.id}")

Upload Unlabeled Data

import os
import glob
import random

# Get all unlabeled images
image_paths = glob.glob("./unlabeled_images/**/*.jpg", recursive=True)
random.shuffle(image_paths)

print(f"Found {len(image_paths)} images to label")

# Split 80/20 for train/val
val_count = int(len(image_paths) * 0.2)
val_paths = image_paths[:val_count]
train_paths = image_paths[val_count:]

print(f"Training: {len(train_paths)}, Validation: {len(val_paths)}")

# Upload training images
print("Uploading training images...")
for i, path in enumerate(train_paths):
    client.create_dataset_item(
        version_id=version.id,
        split_id=train_split.id,
        file_path=path
    )
    if (i + 1) % 100 == 0:
        print(f"  Uploaded {i + 1}/{len(train_paths)}")

# Upload validation images
print("Uploading validation images...")
for i, path in enumerate(val_paths):
    client.create_dataset_item(
        version_id=version.id,
        split_id=val_split.id,
        file_path=path
    )
    if (i + 1) % 100 == 0:
        print(f"  Uploaded {i + 1}/{len(val_paths)}")

print("Upload complete!")

Configure the Post-Processor

Set up the teacher model to automatically label uploads:

Monitor Labeling Progress

import time

def monitor_labeling(client, dataset_id, poll_interval=30):
    """Monitor post-processor labeling progress."""
    print("Monitoring labeling progress...")
    print("-" * 50)

    while True:
        # Get job counts
        pending = client.get_post_processor_jobs(
            dataset_id=dataset_id, status="pending"
        )
        processing = client.get_post_processor_jobs(
            dataset_id=dataset_id, status="processing"
        )
        completed = client.get_post_processor_jobs(
            dataset_id=dataset_id, status="completed"
        )
        failed = client.get_post_processor_jobs(
            dataset_id=dataset_id, status="failed"
        )

        total = len(pending) + len(processing) + len(completed) + len(failed)
        done = len(completed) + len(failed)

        print(f"Progress: {done}/{total} "
              f"(Completed: {len(completed)}, Failed: {len(failed)}, "
              f"Processing: {len(processing)}, Pending: {len(pending)})")

        if len(pending) == 0 and len(processing) == 0:
            print("-" * 50)
            print("Labeling complete!")
            print(f"  Successfully labeled: {len(completed)}")
            print(f"  Failed: {len(failed)}")
            break

        time.sleep(poll_interval)

    return len(completed), len(failed)

# Monitor
completed, failed = monitor_labeling(client, dataset.id)

Handle Failed Items

# Get failed jobs
failed_jobs = client.get_post_processor_jobs(
    dataset_id=dataset.id,
    status="failed"
)

if len(failed_jobs) > 0:
    print(f"\n{len(failed_jobs)} items failed labeling:")

    for job in failed_jobs[:10]:
        print(f"  Item {job.item_id}: {job.error}")

    # Option 1: Retry failed jobs
    print("\nRetrying failed jobs...")
    for job in failed_jobs:
        client.retry_post_processor_job(
            dataset_id=dataset.id,
            item_id=job.item_id,
            job_id=job.id
        )

    # Option 2: Remove failed items
    # for job in failed_jobs:
    #     client.delete_dataset_item(item_id=job.item_id)

Check Label Distribution

# Get labeling statistics
stats = client.get_dataset_stats(version_id=version.id)

print("\nLabel Distribution:")
print("-" * 40)

total_labeled = sum(stats['label_counts'].values())
for label, count in sorted(stats['label_counts'].items(), key=lambda x: -x[1]):
    pct = count / total_labeled * 100
    bar = "█" * int(pct / 2)
    print(f"{label:<15} {count:>6} ({pct:>5.1f}%) {bar}")

# Check for severe imbalance
counts = list(stats['label_counts'].values())
if max(counts) > 10 * min(counts):
    print("\n⚠️  Warning: Severe class imbalance detected")
    print("   Consider: weighted loss, oversampling, or more data")

Review a Sample

Before training, review a sample of labels to catch teacher errors:

import random

# Get random sample for review
items = client.get_dataset_items(
    version_id=version.id,
    split_id=train_split.id,
    limit=100
)

random.shuffle(items)
sample = items[:20]

print("\nSample items for review:")
print("Open these in the web UI to verify labels:")
print("-" * 60)

for item in sample:
    annotations = client.get_annotations(item_id=item.id)
    label = annotations[0].label_name if annotations else "UNLABELED"
    confidence = annotations[0].confidence if annotations else 0

    print(f"  {item.name}: {label} ({confidence:.0%})")
    print(f"    → Review: https://app.seeme.ai/datasets/{dataset.id}/items/{item.id}")

What to Look For

IssueAction
Wrong labelCorrect it manually
Low confidence but correctOK, keep it
Low confidence and wrongDelete or correct
Ambiguous imageConsider removing
Pattern of errorsAdjust teacher prompt or threshold

Confidence Analysis

# Analyze prediction confidence distribution
items = client.get_dataset_items(version_id=version.id, limit=1000)

confidences = []
for item in items:
    annotations = client.get_annotations(item_id=item.id)
    if annotations:
        confidences.append(annotations[0].confidence)

# Distribution
import statistics

print("\nConfidence Distribution:")
print(f"  Min: {min(confidences):.2%}")
print(f"  Max: {max(confidences):.2%}")
print(f"  Mean: {statistics.mean(confidences):.2%}")
print(f"  Median: {statistics.median(confidences):.2%}")

# Count by confidence bucket
buckets = {"90-100%": 0, "80-90%": 0, "70-80%": 0, "<70%": 0}
for c in confidences:
    if c >= 0.9:
        buckets["90-100%"] += 1
    elif c >= 0.8:
        buckets["80-90%"] += 1
    elif c >= 0.7:
        buckets["70-80%"] += 1
    else:
        buckets["<70%"] += 1

print("\nConfidence Buckets:")
for bucket, count in buckets.items():
    print(f"  {bucket}: {count} ({count/len(confidences):.1%})")

Adjust Confidence Threshold

If too many items have low confidence:

# Option 1: Lower threshold (more data, but potentially noisier)
client.update_post_processor(
    processor_id=processor.id,
    confidence_threshold=0.7  # Was 0.8
)

# Option 2: Keep high threshold, but re-run failed items with different prompt
# Create a second processor for low-confidence items
backup_processor = client.create_post_processor(
    dataset_id=dataset.id,
    name="Backup Teacher (lower threshold)",
    model_type="llm",
    model_id=llm_model.id,
    prompt="[More detailed prompt with examples]",
    confidence_threshold=0.6,
    enabled=False  # Enable manually for specific items
)

Best Practices

  1. Start with high confidence threshold (0.8-0.9), lower if needed
  2. Review 5-10% of labels manually, especially edge cases
  3. Check class balance - severe imbalance affects student training
  4. Log teacher confidence - useful for debugging later
  5. Keep original images - don’t delete even if labeling fails
  6. Version your dataset - create a new version if you change labeling approach

Next Step

With your data labeled, proceed to Train Student to train the small model.