The Speech Synthesis API (also known as Text-to-Speech or TTS) allows web applications to convert text into spoken words. This guide explores how to use this powerful browser API with TypeScript for type-safe voice synthesis.
The Speech Synthesis API provides a simple yet powerful interface for text-to-speech functionality directly in the browser. It's part of the Web Speech API and is supported in all modern browsers without requiring external libraries or API keys.
First, let's understand the core interfaces you'll work with:
// Core interfaces in TypeScript
interface SpeechSynthesisUtterance {
text: string;
lang: string;
voice: SpeechSynthesisVoice | null;
volume: number; // 0 to 1
rate: number; // 0.1 to 10
pitch: number; // 0 to 2
}
interface SpeechSynthesisVoice {
name: string;
lang: string;
localService: boolean;
default: boolean;
}
Here's the simplest way to make your browser speak:
function speak(text: string): void {
const utterance = new SpeechSynthesisUtterance(text);
window.speechSynthesis.speak(utterance);
}
// Usage
speak("Hello, World!");
You can customize pitch, rate, and volume:
function speakCustom(
text: string,
rate: number = 1,
pitch: number = 1,
volume: number = 1
): void {
const utterance = new SpeechSynthesisUtterance(text);
utterance.rate = rate; // Speed: 0.1 to 10
utterance.pitch = pitch; // Pitch: 0 to 2
utterance.volume = volume; // Volume: 0 to 1
window.speechSynthesis.speak(utterance);
}
Different voices are available depending on the user's system:
function getVoices(): Promise<SpeechSynthesisVoice[]> {
return new Promise((resolve) => {
let voices = window.speechSynthesis.getVoices();
if (voices.length) {
resolve(voices);
} else {
// Voices load asynchronously in some browsers
window.speechSynthesis.onvoiceschanged = () => {
voices = window.speechSynthesis.getVoices();
resolve(voices);
};
}
});
}
async function speakWithVoice(text: string, voiceName: string): Promise<void> {
const voices = await getVoices();
const voice = voices.find(v => v.name === voiceName);
const utterance = new SpeechSynthesisUtterance(text);
if (voice) utterance.voice = voice;
window.speechSynthesis.speak(utterance);
}
Monitor speech progress with event listeners:
function speakWithEvents(text: string): void {
const utterance = new SpeechSynthesisUtterance(text);
utterance.onstart = () => {
console.log('Speech started');
};
utterance.onend = () => {
console.log('Speech ended');
};
utterance.onerror = (event: SpeechSynthesisErrorEvent) => {
console.error('Speech error:', event.error);
};
utterance.onpause = () => {
console.log('Speech paused');
};
utterance.onresume = () => {
console.log('Speech resumed');
};
window.speechSynthesis.speak(utterance);
}
Here's a TypeScript class that wraps all functionality:
class SpeechController {
private synthesis: SpeechSynthesis;
private voices: SpeechSynthesisVoice[] = [];
constructor() {
this.synthesis = window.speechSynthesis;
this.loadVoices();
}
private async loadVoices(): Promise<void> {
return new Promise((resolve) => {
this.voices = this.synthesis.getVoices();
if (this.voices.length) {
resolve();
} else {
this.synthesis.onvoiceschanged = () => {
this.voices = this.synthesis.getVoices();
resolve();
};
}
});
}
speak(
text: string,
options?: {
voice?: string;
rate?: number;
pitch?: number;
volume?: number;
lang?: string;
}
): Promise<void> {
return new Promise((resolve, reject) => {
const utterance = new SpeechSynthesisUtterance(text);
if (options?.voice) {
const voice = this.voices.find(v => v.name === options.voice);
if (voice) utterance.voice = voice;
}
utterance.rate = options?.rate ?? 1;
utterance.pitch = options?.pitch ?? 1;
utterance.volume = options?.volume ?? 1;
if (options?.lang) utterance.lang = options.lang;
utterance.onend = () => resolve();
utterance.onerror = (e) => reject(e);
this.synthesis.speak(utterance);
});
}
pause(): void {
this.synthesis.pause();
}
resume(): void {
this.synthesis.resume();
}
cancel(): void {
this.synthesis.cancel();
}
getVoices(): SpeechSynthesisVoice[] {
return this.voices;
}
}
// Usage
const speechController = new SpeechController();
await speechController.speak("Hello from TypeScript!", {
rate: 1.2,
pitch: 1.1
});
The Speech Synthesis API is widely supported:
if ('speechSynthesis' in window) {
// API is supported
} else {
console.warn('Speech Synthesis not supported');
}