Reuse the fetchWithPayment helper so Coinbase x402 payments replay automatically when Horizon
queues the extraction job.
1. Submit an audio URL
const baseUrl = process.env.HORIZON_BASE_URL ?? 'https://api.horizon.new/v1';
const audioResponse = await fetchWithPayment(`${baseUrl}/extract/audio`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
sourceUrl: 'https://cdn.example.com/audio/office-hours-42.mp3',
sourceName: 'Office Hours Episode 42',
options: {
transcriptionModel: 'whisper-large-v3',
speakerLabels: true,
segmentLength: 700,
},
webhookUrl: 'https://example.com/webhooks/horizon/extraction',
}),
});
const audioJob = await audioResponse.json();
console.log('audio job', audioJob.jobId, audioJob.statusUrl);
2. Handle synchronous completion
if (audioJob.status === 'completed' && audioJob.result) {
console.log('Transcript ready', audioJob.result.transcript.segments.length);
// You can skip polling if the transcript is returned inline.
}
3. Upload raw audio instead of linking
import { readFileSync } from 'node:fs';
const file = readFileSync('./assets/office-hours-42.mp3').toString('base64');
await fetchWithPayment(`${baseUrl}/extract/audio`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
file,
sourceName: 'Office Hours Episode 42',
options: { transcriptionModel: 'whisper-large-v3' },
}),
});
4. Poll for long recordings
let status;
do {
status = await fetchWithPayment(audioJob.statusUrl).then((res) => res.json());
if (status.state === 'processing') {
await new Promise((resolve) => setTimeout(resolve, 5000));
}
} while (status.state === 'processing');
if (status.state !== 'succeeded') {
throw new Error(`Audio extraction failed: ${status.error?.code ?? 'unknown'}`);
}
console.log('Segments', status.result.transcript.segments.length);
- Store transcript segments with timestamps to power search and recap workflows.
- Use speaker labels to attribute dialogue to hosts or guests.
- Combine with
/examples/webhooks/verify-webhooks to validate completion callbacks for long audio.