Documentation Index
Fetch the complete documentation index at: https://runcrate.ai/docs/llms.txt
Use this file to discover all available pages before exploring further.
Python SDK
The official Python SDK for Runcrate. Supports both synchronous and asynchronous usage with full type safety via Pydantic.
Installation
Requires Python 3.9+.
Quick Start
from runcrate import Runcrate
client = Runcrate(api_key="rc_live_YOUR_API_KEY")
# Chat completion
response = client.models.chat_completion(
model="deepseek-ai/DeepSeek-V3",
messages=[{"role": "user", "content": "Hello!"}],
)
print(response.choices[0].message.content)
# List GPU instances
instances = client.instances.list()
for inst in instances:
print(f"{inst.name} — {inst.status}")
client.close()
Or use as a context manager:
with Runcrate(api_key="rc_live_YOUR_API_KEY") as client:
balance = client.billing.get_balance()
print(f"Credits: ${balance.credits_balance}")
Configuration
client = Runcrate(
api_key="rc_live_...", # or RUNCRATE_API_KEY env var
base_url="https://runcrate.ai", # infrastructure API
inference_url="https://api.runcrate.ai", # model inference API
timeout=30, # seconds
max_retries=3, # retry on 429/5xx
environment="production", # optional — target a specific environment
)
Environments
API keys are workspace-scoped. By default, requests target the workspace’s default environment (usually main). To target a different environment, pass environment at client construction:
# Default environment (e.g., main)
client = Runcrate(api_key="rc_live_...")
# Specific environment
staging = Runcrate(api_key="rc_live_...", environment="staging")
prod = Runcrate(api_key="rc_live_...", environment="production")
# Each client only sees resources in its own environment
staging.instances.list() # only staging instances
prod.instances.list() # only production instances
What’s environment-scoped: instances, crates, storage volumes.
What’s workspace-wide: SSH keys, billing, API keys, templates.
Async Support
from runcrate import AsyncRuncrate
async with AsyncRuncrate(api_key="rc_live_...") as client:
instances = await client.instances.list()
balance = await client.billing.get_balance()
Every method available on Runcrate has an async equivalent on AsyncRuncrate.
Model Inference
All inference methods hit api.runcrate.ai.
Chat Completions
response = client.models.chat_completion(
model="deepseek-ai/DeepSeek-V3",
messages=[{"role": "user", "content": "Explain quantum computing"}],
max_tokens=500,
temperature=0.7,
)
print(response.choices[0].message.content)
Streaming
stream = client.models.chat_completion(
model="deepseek-ai/DeepSeek-V3",
messages=[{"role": "user", "content": "Tell me a story"}],
stream=True,
)
for chunk in stream:
delta = chunk["choices"][0]["delta"]
print(delta.get("content", ""), end="", flush=True)
Image Generation
image = client.models.generate_image(
model="black-forest-labs/FLUX.1-schnell",
prompt="A futuristic cityscape at sunset",
width=1024,
height=768,
)
# Save directly
image.data[0].save("output.png")
Model-Specific Parameters
All image methods accept extra keyword arguments that get passed through to the provider. Different models support different parameters:
# Seed for reproducibility
image = client.models.generate_image(
model="black-forest-labs/FLUX.1-schnell",
prompt="A cat in space",
seed=42,
num_inference_steps=4,
guidance=3.5,
)
# Image editing — pass a file path, URL, or base64 string
image = client.models.generate_image(
model="black-forest-labs/FLUX.1-kontext-pro",
prompt="Make the sky purple",
image="./photo.png", # file path (auto base64-encoded)
)
# Image editing with URL
image = client.models.generate_image(
model="Wan-AI/Wan2.6-Image-Edit",
prompt="Remove the background",
image="https://example.com/photo.png", # URL (passed as-is)
)
# ControlNet / Canny
image = client.models.generate_image(
model="some-controlnet-model",
prompt="A building",
control_image="./edges.png",
strength=0.8,
)
The image, start_image, mask, and control_image fields accept three formats:
- File path —
"./photo.png" (auto-detected, read and base64-encoded)
- URL —
"https://..." (passed through as-is)
- Base64 string — raw base64 data (passed through as-is)
Video Generation
# Submit, poll, and save in one call
job = client.models.generate_video_and_save(
"output.mp4",
model="google/veo-3.0",
prompt="A drone flying over mountains",
duration=8,
on_status=lambda j: print(f"Status: {j.status}"),
)
Or manage the lifecycle manually:
job = client.models.generate_video(
model="google/veo-3.0",
prompt="Ocean waves at sunset",
)
# Poll until done
import time
while job.status not in ("completed", "failed"):
time.sleep(5)
job = client.models.get_video_status(job.id)
# Download
video_bytes = client.models.download_video(job.id)
with open("video.mp4", "wb") as f:
f.write(video_bytes)
Extra parameters (e.g., seed, negative_prompt, image for image-to-video models) are passed through:
job = client.models.generate_video(
model="some-img2vid-model",
prompt="Animate this scene",
image="./first_frame.png", # image-to-video
seed=42,
negative_prompt="blurry",
)
Text-to-Speech
audio = client.models.text_to_speech(
model="hexgrad/Kokoro-82M",
input="Hello from Runcrate!",
voice="af_heart",
)
with open("speech.mp3", "wb") as f:
f.write(audio)
Extra parameters like speed or language are passed through:
audio = client.models.text_to_speech(
model="hexgrad/Kokoro-82M",
input="Hello!",
voice="af_heart",
speed=1.5, # model-specific
language="en", # model-specific
)
Transcription
with open("recording.wav", "rb") as f:
result = client.models.transcribe(
model="openai/whisper-1",
file=f,
filename="recording.wav",
language="en", # hint language
response_format="srt", # text, json, srt, vtt
)
print(result.text)
Infrastructure Management
GPU Instances
# List instances
instances = client.instances.list()
instances = client.instances.list(search="training")
# Browse available GPU types
types = client.instances.list_types(gpu_type="A100")
for t in types:
print(f"{t.id} — {t.gpu_type} x{t.gpu_count} — ${t.hourly_rate}/hr")
# Create an instance
instance = client.instances.create(
name="training-run",
ssh_key_id="your-key-id",
gpu_type="A100",
gpu_count=1,
startup_commands=["pip install torch"],
)
print(f"Created: {instance.id} — {instance.status}")
# Check status
status = client.instances.get_status(instance.id)
print(f"Status: {status.status}, IP: {status.ip}")
# Terminate
client.instances.terminate(instance.id)
SSH Keys
# List keys
keys = client.ssh_keys.list()
# Add a key
key = client.ssh_keys.create(
name="my-laptop",
public_key="ssh-ed25519 AAAA...",
)
# Delete a key
client.ssh_keys.delete(key.id)
Storage
Storage volumes are environment-scoped. Your workspace’s storage provider (AWS S3, Wasabi, or Backblaze B2) must be configured in the dashboard first — the SDK picks it up automatically.
# List available regions (with friendly names)
regions = client.storage.list_regions()
for r in regions:
print(f"{r.name} ({r.provider})")
# List volumes in the current environment
volumes = client.storage.list()
# Get a specific volume
volume = client.storage.get("volume-id")
# Create a 100GB volume
volume = client.storage.create(
name="datasets",
size_gb=100,
region="us-east-1",
)
# Resize (increase capacity only)
client.storage.resize(volume.id, size_gb=200)
# Delete (refunds unused prepaid days pro-rata)
result = client.storage.delete(volume.id)
print(f"Refunded ${result.refund_amount}")
Billing: $0.03/GB/month, charged weekly in advance. Deletion refunds the unused portion of the current billing week.
Billing
# Check balance
balance = client.billing.get_balance()
print(f"Credits: ${balance.credits_balance}")
# List transactions
txns = client.billing.list_transactions(limit=20)
for t in txns:
print(f"{t.type}: ${t.amount}")
print(f"Has more: {txns.has_more}")
# Usage summary
usage = client.billing.usage(from_date="2025-01-01", to_date="2025-01-31")
print(f"Total cost: ${usage.total_cost}")
Templates
# List templates with search
templates = client.templates.list(search="pytorch", category="ml")
for t in templates:
print(f"{t.name} — {t.category}")
print(f"Total: {templates.total}")
Error Handling
All API errors include the actual error message from the server — never a generic fallback.
from runcrate import (
NotFoundError,
AuthenticationError,
RateLimitError,
InsufficientCreditsError,
BadRequestError,
UnprocessableEntityError,
)
try:
client.instances.get("nonexistent")
except NotFoundError as e:
print(f"Not found: {e.message}")
except AuthenticationError:
print("Invalid API key")
except RateLimitError:
print("Rate limited — retry later")
except InsufficientCreditsError as e:
print(f"Not enough credits: {e.message}")
except UnprocessableEntityError as e:
print(f"Validation error: {e.message}") # e.g. invalid model params
except BadRequestError as e:
print(f"Bad request: {e.message}")
Every error exposes:
e.message — human-readable error description from the API
e.status_code — HTTP status code
e.code — machine-readable error code (e.g. not_found, rate_limited)
e.details — additional details (when available)
Error Hierarchy
| Exception | Status Code | Description |
|---|
BadRequestError | 400 | Invalid parameters |
AuthenticationError | 401 | Invalid or missing API key |
InsufficientCreditsError | 402 | Not enough credits |
PermissionDeniedError | 403 | Insufficient permissions |
NotFoundError | 404 | Resource not found |
ConflictError | 409 | Resource conflict |
UnprocessableEntityError | 422 | Validation error (e.g. invalid model params) |
RateLimitError | 429 | Rate limit exceeded |
InternalServerError | 500 | Server error |
ConnectionError | — | Network failure |
TimeoutError | — | Request timed out |