> ## Documentation Index
> Fetch the complete documentation index at: https://runcrate.ai/docs/llms.txt
> Use this file to discover all available pages before exploring further.

# Run Whisper on a Cloud GPU

> Deploy an RTX 4090, install faster-whisper, and transcribe audio files on a dedicated GPU.

export const RuncrateStyles = () => {
  if (typeof document !== 'undefined' && !document.getElementById('runcrate-overrides')) {
    const s = document.createElement('style');
    s.id = 'runcrate-overrides';
    s.textContent = `
      /* Match Runcrate's rounding scale (--radius: 0.75rem) */
      .rounded-sm { border-radius: 0.5rem !important; }   /* 8px */
      .rounded-md { border-radius: 0.625rem !important; } /* 10px */
      .rounded-lg { border-radius: 0.75rem !important; }  /* 12px */
      .rounded-l-sm { border-top-left-radius: 0.5rem !important; border-bottom-left-radius: 0.5rem !important; }
      .rounded-r-sm { border-top-right-radius: 0.5rem !important; border-bottom-right-radius: 0.5rem !important; }
      .rounded-l-md { border-top-left-radius: 0.625rem !important; border-bottom-left-radius: 0.625rem !important; }
      .rounded-r-md { border-top-right-radius: 0.625rem !important; border-bottom-right-radius: 0.625rem !important; }
      .rounded-l-lg { border-top-left-radius: 0.75rem !important; border-bottom-left-radius: 0.75rem !important; }
      .rounded-r-lg { border-top-right-radius: 0.75rem !important; border-bottom-right-radius: 0.75rem !important; }

      /* Cards: never pure white in light mode */
      .card { background-color: #fcfcfc !important; border-radius: 0.75rem !important; }
      html.dark .card { background-color: #141414 !important; }

      /* Docs hero box */
      .rc-hero { background-color: #fcfcfc; border: 1px solid #e0e0e0; }
      html.dark .rc-hero { background-color: #141414; border-color: #242424; }
      html.dark .rc-hero h1 { color: #f5f5f5; }

      /* Runcrate scrollbar — thin, transparent track, hide-until-hover thumb */
      ::-webkit-scrollbar { width: 6px; height: 6px; background-color: transparent; }
      ::-webkit-scrollbar-track { background-color: transparent; }
      ::-webkit-scrollbar-thumb { background-color: rgba(155, 155, 155, 0.5); border-radius: 10px; transition: opacity 0.3s ease; opacity: 0; }
      ::-webkit-scrollbar-thumb:hover { background-color: rgba(155, 155, 155, 0.7); }
      *:hover::-webkit-scrollbar-thumb,
      *:focus::-webkit-scrollbar-thumb,
      *:active::-webkit-scrollbar-thumb { opacity: 1; }
      * { scrollbar-width: thin; scrollbar-color: rgba(155, 155, 155, 0.5) transparent; }
    `;
    document.head.appendChild(s);
  }
  return null;
};

<RuncrateStyles />

Transcribe audio files on a dedicated GPU using faster-whisper. An RTX 4090 transcribes 1 hour of audio in \~2 minutes. No rate limits, no data leaving your instance.

## 1. Deploy an instance

```bash theme={"theme":"github-dark"}
runcrate instances create --name whisper --gpu RTX4090
runcrate instances status whisper
```

## 2. Install faster-whisper

```bash theme={"theme":"github-dark"}
runcrate ssh whisper -- "pip install faster-whisper"
```

## 3. Upload audio files

```bash theme={"theme":"github-dark"}
runcrate cp ./audio/ whisper:/workspace/audio/
```

## 4. Transcribe a single file

```bash theme={"theme":"github-dark"}
runcrate ssh whisper -- "python -c \"
from faster_whisper import WhisperModel
model = WhisperModel('large-v3', device='cuda', compute_type='float16')
segments, info = model.transcribe('/workspace/audio/interview.mp3', beam_size=5)
print(f'Language: {info.language} (prob: {info.language_probability:.2f})')
for s in segments:
    print(f'[{s.start:.1f}s -> {s.end:.1f}s] {s.text}')
\""
```

## 5. Batch transcribe a directory

```python theme={"theme":"github-dark"}
# transcribe_batch.py
import json, os
from faster_whisper import WhisperModel

model = WhisperModel("large-v3", device="cuda", compute_type="float16")
audio_dir, out_dir = "/workspace/audio", "/workspace/transcripts"
os.makedirs(out_dir, exist_ok=True)

for f in sorted(os.listdir(audio_dir)):
    if not f.endswith((".mp3", ".wav", ".m4a", ".flac")):
        continue
    print(f"Transcribing {f}...")
    segments, info = model.transcribe(os.path.join(audio_dir, f), beam_size=5)
    result = {"file": f, "language": info.language,
              "segments": [{"start": s.start, "end": s.end, "text": s.text} for s in segments]}
    with open(os.path.join(out_dir, f.rsplit(".", 1)[0] + ".json"), "w") as out:
        json.dump(result, out, indent=2)
print("Done.")
```

```bash theme={"theme":"github-dark"}
runcrate cp ./transcribe_batch.py whisper:/workspace/transcribe_batch.py
runcrate ssh whisper -- "cd /workspace && python transcribe_batch.py"
```

## 6. Download results

```bash theme={"theme":"github-dark"}
runcrate cp whisper:/workspace/transcripts/ ./transcripts/
```

## Model sizes

| Model       | VRAM    | Speed (1hr audio) |
| ----------- | ------- | ----------------- |
| tiny / base | \~1 GB  | \~10-15 sec       |
| small       | \~2 GB  | \~25 sec          |
| medium      | \~5 GB  | \~50 sec          |
| large-v3    | \~10 GB | \~2 min           |

## Tips

* Use `large-v3` for production accuracy. Use `small` for fast iteration.
* faster-whisper supports `word_timestamps=True` for word-level alignment.
* The model downloads on first use (\~3 GB for large-v3). Attach a volume to cache it.

## Cleanup

```bash theme={"theme":"github-dark"}
runcrate instances delete whisper
```
