cURL
curl --request POST \ --url https://api.example.com/training/jobs/{jobId}/evaluate \ --header 'Content-Type: application/json' \ --data ' { "checkpointId": "<string>", "evaluationName": "<string>", "testDataset": { "datasetId": "<string>", "datasetUrl": "<string>", "format": "<string>", "sampleSize": 123 }, "metrics": [ {} ], "evaluationConfig": { "batchSize": 123, "numWorkers": 123, "generateReports": true, "saveResults": true }, "comparisonBaselines": [ { "baselineType": "<string>", "baselineConfig": {} } ] } '
{ "evaluationId": "eval_abc123_001", "status": "queued", "estimatedDuration": "25m", "progress": { "samplesProcessed": 0, "totalSamples": 10000, "percentComplete": 0.0, "currentMetrics": {} }, "createdAt": "2024-01-16T10:00:00Z" }
Run comprehensive evaluation of trained models using custom datasets and metrics
Show Test Dataset
json
csv
parquet
hdf5
Show Available Metrics
accuracy
precision
recall
f1_score
auc_roc
confusion_matrix
mse
mae
rmse
r2_score
mean_absolute_percentage_error
perplexity
bleu_score
rouge_score
bertscore
Show Evaluation Config
Show Baseline Comparison
random
majority_class
pretrained_model
previous_version
queued
running
completed
failed
Show Progress Object
curl -X POST "https://api.tensorone.ai/v2/training/jobs/job_train_abc123/evaluate" \ -H "Authorization: Bearer YOUR_API_KEY" \ -H "Content-Type: application/json" \ -d '{ "evaluationName": "Final Model Evaluation", "checkpointId": "ckpt_best_abc123", "testDataset": { "datasetId": "ds_test_456", "format": "json" }, "metrics": ["accuracy", "precision", "recall", "f1_score", "confusion_matrix"], "evaluationConfig": { "batchSize": 32, "generateReports": true }, "comparisonBaselines": [ { "baselineType": "random", "baselineConfig": {} } ] }'
import requests # Start comprehensive evaluation response = requests.post( "https://api.tensorone.ai/v2/training/jobs/job_train_abc123/evaluate", headers={ "Authorization": "Bearer YOUR_API_KEY", "Content-Type": "application/json" }, json={ "evaluationName": "Production Readiness Test", "checkpointId": "ckpt_best_abc123", "testDataset": { "datasetUrl": "s3://my-bucket/test-data/", "format": "parquet", "sampleSize": 10000 }, "metrics": [ "accuracy", "precision", "recall", "f1_score", "auc_roc", "confusion_matrix" ], "evaluationConfig": { "batchSize": 64, "generateReports": True, "saveResults": True }, "comparisonBaselines": [ { "baselineType": "pretrained_model", "baselineConfig": { "modelName": "bert-base-uncased" } } ] } ) evaluation = response.json() print(f"Started evaluation: {evaluation['evaluationId']}") print(f"Estimated duration: {evaluation['estimatedDuration']}") # Monitor evaluation progress import time while True: progress_response = requests.get( f"https://api.tensorone.ai/v2/training/evaluations/{evaluation['evaluationId']}", headers={"Authorization": "Bearer YOUR_API_KEY"} ) eval_data = progress_response.json() if eval_data['status'] == 'completed': print("Evaluation completed!") print(f"Final accuracy: {eval_data['results']['metrics']['accuracy']:.3f}") break elif eval_data['status'] == 'running': progress = eval_data['progress']['percentComplete'] print(f"Progress: {progress:.1f}%") time.sleep(30)
const response = await fetch('https://api.tensorone.ai/v2/training/jobs/job_train_abc123/evaluate', { method: 'POST', headers: { 'Authorization': 'Bearer YOUR_API_KEY', 'Content-Type': 'application/json' }, body: JSON.stringify({ evaluationName: 'Model Performance Benchmark', testDataset: { datasetId: 'ds_benchmark_789', format: 'json' }, metrics: ['accuracy', 'f1_score', 'perplexity'], evaluationConfig: { batchSize: 16, generateReports: true } }) }); const evaluation = await response.json(); console.log('Evaluation started:', evaluation);
curl -X GET "https://api.tensorone.ai/v2/training/evaluations/eval_abc123_001" \ -H "Authorization: Bearer YOUR_API_KEY"
# Get detailed evaluation results response = requests.get( "https://api.tensorone.ai/v2/training/evaluations/eval_abc123_001", headers={"Authorization": "Bearer YOUR_API_KEY"} ) results = response.json() if results['status'] == 'completed': metrics = results['results']['metrics'] print("=== Evaluation Results ===") print(f"Accuracy: {metrics['accuracy']:.3f}") print(f"Precision: {metrics['precision']:.3f}") print(f"Recall: {metrics['recall']:.3f}") print(f"F1 Score: {metrics['f1_score']:.3f}") # Download detailed report if 'reportUrl' in results['results']: report_response = requests.get(results['results']['reportUrl']) with open('evaluation_report.pdf', 'wb') as f: f.write(report_response.content) print("Downloaded detailed evaluation report")
{ "evaluationId": "eval_abc123_001", "status": "completed", "evaluationName": "Final Model Evaluation", "jobId": "job_train_abc123", "checkpointId": "ckpt_best_abc123", "results": { "metrics": { "accuracy": 0.8756, "precision": 0.8623, "recall": 0.8891, "f1_score": 0.8755, "auc_roc": 0.9234, "confusion_matrix": [ [850, 23, 45, 12], [18, 892, 34, 21], [32, 28, 876, 19], [15, 19, 22, 901] ] }, "classMetrics": { "class_0": {"precision": 0.89, "recall": 0.91, "f1": 0.90}, "class_1": {"precision": 0.87, "recall": 0.88, "f1": 0.875}, "class_2": {"precision": 0.84, "recall": 0.86, "f1": 0.85}, "class_3": {"precision": 0.91, "recall": 0.89, "f1": 0.90} }, "baselineComparison": { "random_baseline": { "accuracy": 0.2489, "improvement": "+250.7%" } }, "reportUrl": "https://reports.tensorone.ai/evaluations/eval_abc123_001.pdf", "rawResultsUrl": "https://results.tensorone.ai/eval_abc123_001.json" }, "executionDetails": { "samplesEvaluated": 10000, "evaluationTime": "23m 45s", "averageInferenceTime": "12.3ms", "resourceUsage": { "gpuUtilization": 85.2, "memoryUsage": "8.2GB" } }, "createdAt": "2024-01-16T10:00:00Z", "completedAt": "2024-01-16T10:23:45Z" }
{ "metrics": ["accuracy", "loss"], "testDataset": {"datasetId": "ds_test_123"} }
{ "metrics": ["accuracy", "precision", "recall", "f1_score", "confusion_matrix"], "comparisonBaselines": [ {"baselineType": "random"}, {"baselineType": "majority_class"} ] }
{ "evaluationName": "Model A vs Model B", "checkpointComparison": [ "ckpt_model_a_123", "ckpt_model_b_456" ], "metrics": ["accuracy", "latency", "memory_usage"] }
{ "metrics": ["bleu_score", "rouge_score", "bertscore"], # NLP "evaluationConfig": { "customMetrics": [ { "name": "semantic_similarity", "function": "cosine_similarity", "parameters": {"threshold": 0.8} } ] } }
{ "evaluationConfig": { "errorAnalysis": { "enabled": True, "sampleFailureCases": 100, "categorizeErrors": True } } }
{ "evaluationConfig": { "fairnessMetrics": { "enabled": True, "protectedAttributes": ["gender", "age_group"], "metrics": ["demographic_parity", "equalized_odds"] } } }
{ "evaluationConfig": { "robustnessTests": { "enabled": True, "testTypes": ["adversarial_examples", "data_drift", "noise_injection"], "severityLevels": ["low", "medium", "high"] } } }
def comprehensive_evaluation(job_id, test_dataset_id): """Run comprehensive model evaluation with monitoring""" # Start evaluation eval_response = requests.post( f"https://api.tensorone.ai/v2/training/jobs/{job_id}/evaluate", json={ "evaluationName": f"Comprehensive Eval {datetime.now()}", "testDataset": {"datasetId": test_dataset_id}, "metrics": [ "accuracy", "precision", "recall", "f1_score", "auc_roc", "confusion_matrix" ], "comparisonBaselines": [ {"baselineType": "random"}, {"baselineType": "majority_class"} ], "evaluationConfig": { "generateReports": True, "errorAnalysis": {"enabled": True} } } ) eval_id = eval_response.json()['evaluationId'] # Monitor progress while True: status_response = requests.get( f"https://api.tensorone.ai/v2/training/evaluations/{eval_id}" ) eval_data = status_response.json() if eval_data['status'] == 'completed': return eval_data['results'] elif eval_data['status'] == 'failed': raise Exception("Evaluation failed") time.sleep(30)