Label Data with Teacher

Use the teacher model to automatically label your training data.

Prepare the Dataset

from seeme import Client

client = Client()

# Create dataset for distillation
dataset = client.create_dataset(
    name="Distillation: Product Classifier",
    description="Teacher-labeled data for model distillation",
    task_type="image_classification"
)

version = client.create_dataset_version(
    dataset_id=dataset.id,
    name="v1"
)

# Create splits
train_split = client.create_split(version_id=version.id, name="train")
val_split = client.create_split(version_id=version.id, name="validation")

# Create labels (matching what teacher will predict)
label_names = ["good", "scratch", "dent", "discoloration", "crack"]
for name in label_names:
    client.create_label(version_id=version.id, name=name)

print(f"Dataset ready: {dataset.id}")

Upload Unlabeled Data

import os
import glob
import random

# Get all unlabeled images
image_paths = glob.glob("./unlabeled_images/**/*.jpg", recursive=True)
random.shuffle(image_paths)

print(f"Found {len(image_paths)} images to label")

# Split 80/20 for train/val
val_count = int(len(image_paths) * 0.2)
val_paths = image_paths[:val_count]
train_paths = image_paths[val_count:]

print(f"Training: {len(train_paths)}, Validation: {len(val_paths)}")

# Upload training images
print("Uploading training images...")
for i, path in enumerate(train_paths):
    client.create_dataset_item(
        version_id=version.id,
        split_id=train_split.id,
        file_path=path
    )
    if (i + 1) % 100 == 0:
        print(f"  Uploaded {i + 1}/{len(train_paths)}")

# Upload validation images
print("Uploading validation images...")
for i, path in enumerate(val_paths):
    client.create_dataset_item(
        version_id=version.id,
        split_id=val_split.id,
        file_path=path
    )
    if (i + 1) % 100 == 0:
        print(f"  Uploaded {i + 1}/{len(val_paths)}")

print("Upload complete!")

Configure the Post-Processor

Set up the teacher model to automatically label uploads:

# Using a classification model as teacher
processor = client.create_post_processor(
    dataset_id=dataset.id,
    name="Teacher: Large Classifier",
    model_type="classification",
    model_id=teacher_model.id,
    output_target="annotations",

    # Quality controls
    confidence_threshold=0.8,     # Only keep predictions ≥80% confidence
    auto_create_labels=True,      # Create labels from predictions

    # Enable
    enabled=True
)

# Using a local LLM as teacher
processor = client.create_post_processor(
    dataset_id=dataset.id,
    name="Teacher: LLM Classifier",
    model_type="llm",
    model_id=llm_model.id,
    prompt="""
Classify this product image into exactly ONE category:

- good: No visible defects, product is acceptable
- scratch: Surface scratches visible
- dent: Physical dent or deformation
- discoloration: Color issues, staining, fading
- crack: Visible cracks or fractures

Instructions:
1. Examine the entire image
2. Identify the most significant issue (if any)
3. If no defects, classify as "good"

Return ONLY the category name, nothing else.
""",
    output_target="annotations",
    auto_create_labels=True,
    enabled=True,
    config={
        "temperature": 0.1,    # Low for consistency
        "max_tokens": 50
    }
)

# Using external LLM (OpenAI, Anthropic)
processor = client.create_post_processor(
    dataset_id=dataset.id,
    name="Teacher: GPT-4 Classifier",
    model_type="llm",
    external_provider="openai",
    external_model="gpt-4-turbo",
    external_config={
        "api_key": os.environ["OPENAI_API_KEY"],
        "temperature": 0.1,
        "max_tokens": 50
    },
    prompt="""
Classify this image: good, scratch, dent, discoloration, or crack.
Return only the category name.
""",
    output_target="annotations",
    auto_create_labels=True,
    enabled=True
)

# Create post-processor with classification model
curl -X POST "https://api.seeme.ai/api/v1/postprocessors" \
  -H "Authorization: myusername:my-api-key" \
  -H "Content-Type: application/json" \
  -d '{
    "dataset_id": "your-dataset-id",
    "name": "Teacher: Large Classifier",
    "model_type": "classification",
    "model_id": "teacher-model-id",
    "output_target": "annotations",
    "confidence_threshold": 0.8,
    "auto_create_labels": true,
    "enabled": true
  }'

# Create post-processor with LLM teacher
curl -X POST "https://api.seeme.ai/api/v1/postprocessors" \
  -H "Authorization: myusername:my-api-key" \
  -H "Content-Type: application/json" \
  -d '{
    "dataset_id": "your-dataset-id",
    "name": "Teacher: LLM Classifier",
    "model_type": "llm",
    "model_id": "llm-model-id",
    "prompt": "Classify this image: good, scratch, dent, discoloration, or crack. Return only the category name.",
    "output_target": "annotations",
    "auto_create_labels": true,
    "enabled": true,
    "config": {
      "temperature": 0.1,
      "max_tokens": 50
    }
  }'

# Create post-processor with external LLM (OpenAI)
curl -X POST "https://api.seeme.ai/api/v1/postprocessors" \
  -H "Authorization: myusername:my-api-key" \
  -H "Content-Type: application/json" \
  -d '{
    "dataset_id": "your-dataset-id",
    "name": "Teacher: GPT-4 Classifier",
    "model_type": "llm",
    "external_provider": "openai",
    "external_model": "gpt-4-turbo",
    "external_config": {
      "api_key": "your-openai-api-key",
      "temperature": 0.1,
      "max_tokens": 50
    },
    "prompt": "Classify this image: good, scratch, dent, discoloration, or crack. Return only the category name.",
    "output_target": "annotations",
    "auto_create_labels": true,
    "enabled": true
  }'

Monitor Labeling Progress

import time

def monitor_labeling(client, dataset_id, poll_interval=30):
    """Monitor post-processor labeling progress."""
    print("Monitoring labeling progress...")
    print("-" * 50)

    while True:
        # Get job counts
        pending = client.get_post_processor_jobs(
            dataset_id=dataset_id, status="pending"
        )
        processing = client.get_post_processor_jobs(
            dataset_id=dataset_id, status="processing"
        )
        completed = client.get_post_processor_jobs(
            dataset_id=dataset_id, status="completed"
        )
        failed = client.get_post_processor_jobs(
            dataset_id=dataset_id, status="failed"
        )

        total = len(pending) + len(processing) + len(completed) + len(failed)
        done = len(completed) + len(failed)

        print(f"Progress: {done}/{total} "
              f"(Completed: {len(completed)}, Failed: {len(failed)}, "
              f"Processing: {len(processing)}, Pending: {len(pending)})")

        if len(pending) == 0 and len(processing) == 0:
            print("-" * 50)
            print("Labeling complete!")
            print(f"  Successfully labeled: {len(completed)}")
            print(f"  Failed: {len(failed)}")
            break

        time.sleep(poll_interval)

    return len(completed), len(failed)

# Monitor
completed, failed = monitor_labeling(client, dataset.id)

Handle Failed Items

# Get failed jobs
failed_jobs = client.get_post_processor_jobs(
    dataset_id=dataset.id,
    status="failed"
)

if len(failed_jobs) > 0:
    print(f"\n{len(failed_jobs)} items failed labeling:")

    for job in failed_jobs[:10]:
        print(f"  Item {job.item_id}: {job.error}")

    # Option 1: Retry failed jobs
    print("\nRetrying failed jobs...")
    for job in failed_jobs:
        client.retry_post_processor_job(
            dataset_id=dataset.id,
            item_id=job.item_id,
            job_id=job.id
        )

    # Option 2: Remove failed items
    # for job in failed_jobs:
    #     client.delete_dataset_item(item_id=job.item_id)

Check Label Distribution

# Get labeling statistics
stats = client.get_dataset_stats(version_id=version.id)

print("\nLabel Distribution:")
print("-" * 40)

total_labeled = sum(stats['label_counts'].values())
for label, count in sorted(stats['label_counts'].items(), key=lambda x: -x[1]):
    pct = count / total_labeled * 100
    bar = "█" * int(pct / 2)
    print(f"{label:<15} {count:>6} ({pct:>5.1f}%) {bar}")

# Check for severe imbalance
counts = list(stats['label_counts'].values())
if max(counts) > 10 * min(counts):
    print("\n⚠️  Warning: Severe class imbalance detected")
    print("   Consider: weighted loss, oversampling, or more data")

Review a Sample

Before training, review a sample of labels to catch teacher errors:

import random

# Get random sample for review
items = client.get_dataset_items(
    version_id=version.id,
    split_id=train_split.id,
    limit=100
)

random.shuffle(items)
sample = items[:20]

print("\nSample items for review:")
print("Open these in the web UI to verify labels:")
print("-" * 60)

for item in sample:
    annotations = client.get_annotations(item_id=item.id)
    label = annotations[0].label_name if annotations else "UNLABELED"
    confidence = annotations[0].confidence if annotations else 0

    print(f"  {item.name}: {label} ({confidence:.0%})")
    print(f"    → Review: https://app.seeme.ai/datasets/{dataset.id}/items/{item.id}")

What to Look For

Issue	Action
Wrong label	Correct it manually
Low confidence but correct	OK, keep it
Low confidence and wrong	Delete or correct
Ambiguous image	Consider removing
Pattern of errors	Adjust teacher prompt or threshold

Confidence Analysis

# Analyze prediction confidence distribution
items = client.get_dataset_items(version_id=version.id, limit=1000)

confidences = []
for item in items:
    annotations = client.get_annotations(item_id=item.id)
    if annotations:
        confidences.append(annotations[0].confidence)

# Distribution
import statistics

print("\nConfidence Distribution:")
print(f"  Min: {min(confidences):.2%}")
print(f"  Max: {max(confidences):.2%}")
print(f"  Mean: {statistics.mean(confidences):.2%}")
print(f"  Median: {statistics.median(confidences):.2%}")

# Count by confidence bucket
buckets = {"90-100%": 0, "80-90%": 0, "70-80%": 0, "<70%": 0}
for c in confidences:
    if c >= 0.9:
        buckets["90-100%"] += 1
    elif c >= 0.8:
        buckets["80-90%"] += 1
    elif c >= 0.7:
        buckets["70-80%"] += 1
    else:
        buckets["<70%"] += 1

print("\nConfidence Buckets:")
for bucket, count in buckets.items():
    print(f"  {bucket}: {count} ({count/len(confidences):.1%})")

Adjust Confidence Threshold

If too many items have low confidence:

# Option 1: Lower threshold (more data, but potentially noisier)
client.update_post_processor(
    processor_id=processor.id,
    confidence_threshold=0.7  # Was 0.8
)

# Option 2: Keep high threshold, but re-run failed items with different prompt
# Create a second processor for low-confidence items
backup_processor = client.create_post_processor(
    dataset_id=dataset.id,
    name="Backup Teacher (lower threshold)",
    model_type="llm",
    model_id=llm_model.id,
    prompt="[More detailed prompt with examples]",
    confidence_threshold=0.6,
    enabled=False  # Enable manually for specific items
)

Best Practices

Start with high confidence threshold (0.8-0.9), lower if needed
Review 5-10% of labels manually, especially edge cases
Check class balance - severe imbalance affects student training
Log teacher confidence - useful for debugging later
Keep original images - don’t delete even if labeling fails
Version your dataset - create a new version if you change labeling approach

Next Step

With your data labeled, proceed to Train Student to train the small model.

Setup Teacher Model Train Student Model