All TensorOne API errors follow a consistent JSON format:
{
"error": "ERROR_CODE",
"message": "Human-readable error description",
"code": 400,
"details": {
"field": "parameter_name",
"reason": "Specific validation failure"
},
"request_id": "req_1234567890abcdef",
"timestamp": "2024-01-15T10:30:00Z"
}
HTTP Status Codes
4xx Client Errors
400 Bad Request
The request was invalid or missing required parameters.
{
"error": "VALIDATION_ERROR",
"message": "Request validation failed",
"code": 400,
"details": {
"field": "gpu_type",
"reason": "Invalid GPU type specified"
}
}
401 Unauthorized
Authentication failed or API key is invalid.
{
"error": "UNAUTHORIZED",
"message": "Invalid or missing API key",
"code": 401
}
403 Forbidden
API key lacks required permissions for the requested resource.
{
"error": "FORBIDDEN",
"message": "Insufficient permissions",
"code": 403,
"details": {
"required_permission": "clusters:write",
"current_permissions": ["clusters:read"]
}
}
404 Not Found
The requested resource doesn’t exist.
{
"error": "RESOURCE_NOT_FOUND",
"message": "Cluster not found",
"code": 404,
"details": {
"resource": "cluster",
"id": "cls_nonexistent"
}
}
409 Conflict
Request conflicts with current resource state.
{
"error": "STATE_CONFLICT",
"message": "Cannot delete running cluster",
"code": 409,
"details": {
"current_state": "running",
"required_state": "stopped"
}
}
422 Unprocessable Entity
Request is well-formed but contains semantic errors.
{
"error": "SEMANTIC_ERROR",
"message": "Insufficient resources for requested configuration",
"code": 422,
"details": {
"requested": "8x NVIDIA A100",
"available": "2x NVIDIA A100"
}
}
429 Too Many Requests
Rate limit exceeded.
{
"error": "RATE_LIMIT_EXCEEDED",
"message": "API rate limit exceeded",
"code": 429,
"details": {
"limit": 100,
"remaining": 0,
"reset_at": "2024-01-15T11:00:00Z",
"retry_after": 1800
}
}
5xx Server Errors
500 Internal Server Error
Unexpected server error occurred.
{
"error": "INTERNAL_ERROR",
"message": "An unexpected error occurred",
"code": 500,
"details": {
"incident_id": "inc_1234567890"
}
}
502 Bad Gateway
Upstream service is unavailable.
{
"error": "SERVICE_UNAVAILABLE",
"message": "AI service temporarily unavailable",
"code": 502,
"details": {
"service": "text-to-image",
"estimated_recovery": "2024-01-15T10:45:00Z"
}
}
503 Service Unavailable
Service is temporarily overloaded or under maintenance.
{
"error": "SERVICE_UNAVAILABLE",
"message": "Service temporarily unavailable",
"code": 503,
"details": {
"retry_after": 300,
"maintenance_window": "2024-01-15T10:30:00Z to 2024-01-15T11:00:00Z"
}
}
Domain-Specific Errors
Cluster Errors
CLUSTER_CREATION_FAILED
{
"error": "CLUSTER_CREATION_FAILED",
"message": "Failed to create cluster",
"code": 422,
"details": {
"reason": "Insufficient GPU availability",
"alternative_gpu_types": ["NVIDIA RTX 4090", "NVIDIA A40"]
}
}
CLUSTER_START_TIMEOUT
{
"error": "CLUSTER_START_TIMEOUT",
"message": "Cluster failed to start within timeout",
"code": 408,
"details": {
"timeout": "300s",
"current_state": "starting"
}
}
Endpoint Errors
ENDPOINT_EXECUTION_FAILED
{
"error": "ENDPOINT_EXECUTION_FAILED",
"message": "Model execution failed",
"code": 500,
"details": {
"reason": "Out of memory",
"suggestions": ["Reduce batch size", "Use smaller model variant"]
}
}
ENDPOINT_TIMEOUT
{
"error": "EXECUTION_TIMEOUT",
"message": "Endpoint execution timed out",
"code": 408,
"details": {
"timeout": "300s",
"partial_results": false
}
}
Training Errors
TRAINING_DATA_INVALID
{
"error": "TRAINING_DATA_INVALID",
"message": "Training dataset validation failed",
"code": 422,
"details": {
"invalid_samples": 15,
"total_samples": 1000,
"errors": [
{ "line": 42, "reason": "Missing label" },
{ "line": 156, "reason": "Invalid image format" }
]
}
}
Payment Errors
INSUFFICIENT_CREDITS
{
"error": "INSUFFICIENT_CREDITS",
"message": "Insufficient account credits",
"code": 402,
"details": {
"required": 50.0,
"available": 12.5,
"currency": "USD"
}
}
PAYMENT_METHOD_DECLINED
{
"error": "PAYMENT_DECLINED",
"message": "Payment method was declined",
"code": 402,
"details": {
"decline_reason": "insufficient_funds",
"payment_method": "card_*1234"
}
}
Error Handling Best Practices
1. Implement Retry Logic
import time
import random
from typing import Optional
def make_request_with_retry(
func,
max_retries: int = 3,
backoff_factor: float = 1.0,
retryable_errors: list = [500, 502, 503, 504, 429]
) -> Optional[dict]:
for attempt in range(max_retries + 1):
try:
response = func()
if response.status_code == 429:
retry_after = int(response.headers.get('Retry-After', 60))
time.sleep(retry_after)
continue
if response.status_code not in retryable_errors:
return response.json()
if attempt == max_retries:
break
# Exponential backoff with jitter
delay = backoff_factor * (2 ** attempt) + random.uniform(0, 1)
time.sleep(delay)
except Exception as e:
if attempt == max_retries:
raise e
return None
2. Graceful Error Handling
async function handleAPICall(apiFunction) {
try {
const result = await apiFunction();
return { success: true, data: result };
} catch (error) {
const errorData = error.response?.data || {};
switch (errorData.error) {
case "RATE_LIMIT_EXCEEDED":
return {
success: false,
error: "rate_limit",
retryAfter: errorData.details?.retry_after,
message: "Please wait before making more requests",
};
case "INSUFFICIENT_CREDITS":
return {
success: false,
error: "payment_required",
message: "Please add credits to your account",
required: errorData.details?.required,
};
case "RESOURCE_NOT_FOUND":
return {
success: false,
error: "not_found",
message: "The requested resource was not found",
};
default:
return {
success: false,
error: "unknown",
message: errorData.message || "An unexpected error occurred",
requestId: errorData.request_id,
};
}
}
}
3. Validation Before Requests
def validate_cluster_config(config):
errors = []
if not config.get('name'):
errors.append("Cluster name is required")
if config.get('container_disk_gb', 0) < 10:
errors.append("Container disk must be at least 10GB")
valid_gpu_types = ['NVIDIA A100', 'NVIDIA RTX 4090', 'NVIDIA A40']
if config.get('gpu_type') not in valid_gpu_types:
errors.append(f"GPU type must be one of: {valid_gpu_types}")
if errors:
raise ValueError(f"Validation failed: {', '.join(errors)}")
4. User-Friendly Error Messages
ERROR_MESSAGES = {
'UNAUTHORIZED': 'Please check your API key and try again.',
'FORBIDDEN': 'You don\'t have permission to perform this action.',
'RATE_LIMIT_EXCEEDED': 'You\'ve made too many requests. Please wait a moment.',
'INSUFFICIENT_CREDITS': 'Please add credits to your account to continue.',
'CLUSTER_CREATION_FAILED': 'Unable to create cluster. Please try a different configuration.',
'ENDPOINT_EXECUTION_FAILED': 'Model execution failed. Please check your input parameters.'
}
def get_user_friendly_message(error_code, fallback_message):
return ERROR_MESSAGES.get(error_code, fallback_message)
Debugging Tips
1. Log Request IDs
Always log the request_id from error responses for support inquiries:
import logging
def log_api_error(error_response):
logging.error(
f"API Error: {error_response.get('error')} "
f"(Request ID: {error_response.get('request_id')})"
)
2. Monitor Error Patterns
Track error frequencies to identify patterns:
from collections import defaultdict
import json
error_counts = defaultdict(int)
def track_error(error_response):
error_code = error_response.get('error')
error_counts[error_code] += 1
# Log if error frequency is high
if error_counts[error_code] % 10 == 0:
logging.warning(f"Error {error_code} occurred {error_counts[error_code]} times")
3. Validate Responses
Always validate API responses:
def validate_response(response):
if not response:
raise ValueError("Empty response received")
if 'error' in response:
raise APIError(
response['error'],
response.get('message'),
response.get('details')
)
return response
Getting Help
When contacting support, include:
- Request ID from the error response
- Timestamp of the error
- Full error response (sanitized of sensitive data)
- Code snippet that produced the error
- Expected behavior vs actual behavior
Never include API keys or sensitive data in support requests or logs.