From 4f77e77b1e9087d9d7b6b94400f5da70f0fb043a Mon Sep 17 00:00:00 2001 From: legop3 Date: Sun, 14 Jun 2026 14:37:34 -0400 Subject: [PATCH] slopfeatures --- Dockerfile | 13 +- README.md | 12 +- sipcord-bridge/src/call/mod.rs | 79 +++++++-- sipcord-bridge/src/transport/discord/mod.rs | 156 ++++++++++++++---- sipcord-bridge/src/transport/sip/callbacks.rs | 9 +- sipcord-bridge/src/transport/sip/ffi/types.rs | 2 +- sipcord-bridge/src/transport/sip/mod.rs | 5 +- 7 files changed, 217 insertions(+), 59 deletions(-) diff --git a/Dockerfile b/Dockerfile index f724df3..1f2f94f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -115,10 +115,21 @@ RUN apt-get update && apt-get install -y \ libopus0 \ libtiff6 \ libjpeg62-turbo \ - espeak-ng \ + curl \ ffmpeg \ && rm -rf /var/lib/apt/lists/* +RUN mkdir -p /opt/piper /opt/piper-voices && \ + curl -fL https://github.com/rhasspy/piper/releases/download/2023.11.14-2/piper_linux_x86_64.tar.gz \ + | tar -xzf - -C /opt && \ + printf '#!/bin/sh\nLD_LIBRARY_PATH=/opt/piper exec /opt/piper/piper "$@"\n' \ + > /usr/local/bin/piper && \ + chmod +x /usr/local/bin/piper && \ + curl -fL -o /opt/piper-voices/en_US-amy-medium.onnx \ + https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/amy/medium/en_US-amy-medium.onnx && \ + curl -fL -o /opt/piper-voices/en_US-amy-medium.onnx.json \ + https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/amy/medium/en_US-amy-medium.onnx.json + WORKDIR /app COPY --from=builder /build/target/release/sipcord-bridge /app/sipcord-bridge diff --git a/README.md b/README.md index a8a7d6f..b82dc05 100644 --- a/README.md +++ b/README.md @@ -78,10 +78,12 @@ timeout_seconds = 10 max_attempts = 3 ``` -The menu uses `espeak-ng` for local text-to-speech with a female English voice. -Emoji and common Discord channel separators are skipped in spoken names. Press -`#` to repeat the current menu page, `9` for the next page when available, and -`*` for the previous page when available. +The menu uses Piper for local text-to-speech with a bundled English female +voice. Emoji and common Discord channel separators are skipped in spoken names. +Voice channels with people in them are read first and include the number of +people present, excluding the bot itself. Empty voice channels are read by name. +Press `#` to repeat the current menu page, `9` for the next page when +available, and `*` for the previous page when available. You can also add a phone directory for Discord-originated calls. These entries show up in `/directory` as buttons. Clicking one dials that extension from your @@ -278,7 +280,7 @@ Current scope: ### 4d. Build from source -Requires Rust nightly (for `portable_simd`) and system dependencies for pjproject (OpenSSL, Opus, libtiff, etc). See the `Dockerfile` for the full list. +Requires Rust nightly (for `portable_simd`) and system dependencies for pjproject (OpenSSL, Opus, libtiff, etc). Dynamic menu TTS also requires the `piper` binary and a Piper voice model at `/opt/piper-voices/en_US-amy-medium.onnx`. See the `Dockerfile` for the full list. ```bash cargo run --release -p sipcord-bridge diff --git a/sipcord-bridge/src/call/mod.rs b/sipcord-bridge/src/call/mod.rs index ce63c69..a5d93ce 100644 --- a/sipcord-bridge/src/call/mod.rs +++ b/sipcord-bridge/src/call/mod.rs @@ -23,7 +23,7 @@ use crate::services::snowflake::Snowflake; use crate::services::sound::{SoundManager, create_sound_manager}; use crate::transport::discord::{ DiscordEvent, DiscordVoiceConnection, SharedDiscordClient, register_discord_to_sip_producer, - unregister_discord_to_sip_producer, + set_bot_nickname, unregister_discord_to_sip_producer, }; use crate::transport::sip::{ CONF_SAMPLE_RATE, CallId, SipCommand, SipEvent, cleanup_channel_port, @@ -36,8 +36,10 @@ use crossbeam_channel::{Receiver, Sender, bounded}; use dashmap::{DashMap, DashSet}; use std::collections::HashSet; use std::path::PathBuf; +use std::process::Stdio; use std::sync::Arc; use std::time::{Duration, Instant}; +use tokio::io::AsyncWriteExt; use tokio::process::Command; use tokio::sync::mpsc; use tokio::sync::Notify; @@ -222,6 +224,7 @@ impl BridgeCoordinator { SipEvent::IncomingCall { call_id, digest_auth, + caller_id, extension, source_ip, } => { @@ -270,8 +273,15 @@ impl BridgeCoordinator { let ctx = ctx.clone(); tokio::spawn(async move { - handle_incoming_call(ctx, call_id, *digest_auth, extension, source_ip) - .await; + handle_incoming_call( + ctx, + call_id, + *digest_auth, + caller_id, + extension, + source_ip, + ) + .await; }); } @@ -1036,6 +1046,7 @@ async fn handle_incoming_call( ctx: BridgeContext, call_id: CallId, digest_auth: crate::transport::sip::DigestAuthParams, + caller_id: String, extension: String, source_ip: Option, ) { @@ -1193,6 +1204,7 @@ async fn handle_incoming_call( health_check_notify, }, call_id, + caller_id, extension, menu, ) @@ -1342,6 +1354,8 @@ async fn handle_incoming_call( backend.on_call_started(&info).await; }); + set_bot_nickname(&bot_token, guild_id, &caller_id).await; + // Answer call first, then play join sound let _ = sip_cmd_tx.send(SipCommand::Answer { call_id }); play_discord_join(call_id, &sound_manager, &sip_cmd_tx).await; @@ -1424,6 +1438,8 @@ async fn handle_incoming_call( backend.on_call_started(&info).await; }); + set_bot_nickname(&bot_token, guild_id, &caller_id).await; + // Answer call first, then play join sound let _ = sip_cmd_tx.send(SipCommand::Answer { call_id }); play_discord_join(call_id, &sound_manager, &sip_cmd_tx).await; @@ -1486,11 +1502,13 @@ struct DynamicGuildOption { struct DynamicChannelOption { channel_id: Snowflake, name: String, + user_count: usize, } async fn handle_menu_call( ctx: MenuCallContext, call_id: CallId, + caller_id: String, extension: String, menu: MenuRoute, ) { @@ -1536,7 +1554,12 @@ async fn handle_menu_call( None => return, }; - let channels = match fetch_discord_voice_channels(ctx.backend.bot_token(), guild.guild_id).await + let channels = match fetch_discord_voice_channels( + ctx.backend.bot_token(), + guild.guild_id, + &ctx.shared_discord, + ) + .await { Ok(channels) if !channels.is_empty() => channels, Ok(_) => { @@ -1576,7 +1599,7 @@ async fn handle_menu_call( }; ctx.dtmf_waiters.remove(&call_id); - connect_menu_selection(ctx, call_id, extension, guild, selected).await; + connect_menu_selection(ctx, call_id, caller_id, extension, guild, selected).await; } async fn select_guild_from_menu( @@ -1649,7 +1672,7 @@ async fn select_channel_from_menu( let prompt = build_option_prompt( &intro, page_items, - |channel| clean_tts_label(&channel.name), + channel_tts_label, page, channels.len(), ); @@ -1848,6 +1871,7 @@ async fn fetch_discord_guilds( async fn fetch_discord_voice_channels( bot_token: &str, guild_id: Snowflake, + shared_discord: &SharedDiscordClient, ) -> Result, Box> { let client = reqwest::Client::new(); let url = format!("https://discord.com/api/v10/guilds/{}/channels", guild_id); @@ -1865,16 +1889,31 @@ async fn fetch_discord_voice_channels( .filter(|channel| channel.kind == 2) .filter_map(|channel| { let channel_id = channel.id.parse::().ok()?; + let user_count = shared_discord.voice_channel_user_count(guild_id, channel_id); Some(DynamicChannelOption { channel_id, name: channel.name, + user_count, }) }) .collect(); - channels.sort_by(|a, b| a.name.to_ascii_lowercase().cmp(&b.name.to_ascii_lowercase())); + channels.sort_by(|a, b| { + b.user_count + .cmp(&a.user_count) + .then_with(|| a.name.to_ascii_lowercase().cmp(&b.name.to_ascii_lowercase())) + }); Ok(channels) } +fn channel_tts_label(channel: &DynamicChannelOption) -> String { + let name = clean_tts_label(&channel.name); + match channel.user_count { + 0 => name, + 1 => format!("{name}, with 1 person"), + count => format!("{name}, with {count} people"), + } +} + async fn play_tts_prompt( call_id: CallId, text: &str, @@ -1897,17 +1936,22 @@ async fn synthesize_tts_samples( let raw_path = std::env::temp_dir().join(format!("sipcord-tts-{}-{}-raw.wav", call_id, stamp)); let out_path = std::env::temp_dir().join(format!("sipcord-tts-{}-{}.wav", call_id, stamp)); - let espeak_status = Command::new("espeak-ng") - .arg("-v") - .arg("en+f3") - .arg("-w") + let mut piper = Command::new("piper") + .arg("--model") + .arg("/opt/piper-voices/en_US-amy-medium.onnx") + .arg("--output_file") .arg(&raw_path) - .arg(text) - .status() - .await?; - if !espeak_status.success() { + .stdin(Stdio::piped()) + .spawn()?; + + if let Some(mut stdin) = piper.stdin.take() { + stdin.write_all(text.as_bytes()).await?; + } + + let piper_status = piper.wait().await?; + if !piper_status.success() { let _ = tokio::fs::remove_file(&raw_path).await; - return Err(format!("espeak-ng exited with status {}", espeak_status).into()); + return Err(format!("piper exited with status {}", piper_status).into()); } let ffmpeg_status = Command::new("ffmpeg") @@ -1953,6 +1997,7 @@ async fn synthesize_tts_samples( async fn connect_menu_selection( ctx: MenuCallContext, call_id: CallId, + caller_id: String, extension: String, guild: DynamicGuildOption, selected: DynamicChannelOption, @@ -2067,6 +2112,7 @@ async fn connect_menu_selection( tokio::spawn(async move { backend.on_call_started(&info).await; }); + set_bot_nickname(ctx.backend.bot_token(), guild_id, &caller_id).await; play_discord_join(call_id, &ctx.sound_manager, &ctx.sip_cmd_tx).await; return; } @@ -2134,6 +2180,7 @@ async fn connect_menu_selection( tokio::spawn(async move { backend.on_call_started(&info).await; }); + set_bot_nickname(ctx.backend.bot_token(), guild_id, &caller_id).await; play_discord_join(call_id, &ctx.sound_manager, &ctx.sip_cmd_tx).await; } Err(e) => { diff --git a/sipcord-bridge/src/transport/discord/mod.rs b/sipcord-bridge/src/transport/discord/mod.rs index a4f41e2..82e3867 100644 --- a/sipcord-bridge/src/transport/discord/mod.rs +++ b/sipcord-bridge/src/transport/discord/mod.rs @@ -28,7 +28,7 @@ use songbird::tracks::PlayMode; use songbird::{ Config, CoreEvent, Event, EventContext, EventHandler as VoiceEventHandler, Songbird, TrackEvent, }; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::sync::Arc; use std::sync::OnceLock; use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, Ordering}; @@ -501,9 +501,63 @@ pub enum DiscordEvent { pub struct SharedDiscordClient { songbird: Arc, bot_user_id: AtomicU64, + voice_state_tracker: Arc, _client_handle: tokio::task::JoinHandle<()>, } +#[derive(Default)] +struct VoiceStateTracker { + users: Mutex>, + channels: Mutex>>, +} + +impl VoiceStateTracker { + fn update( + &self, + user_id: Snowflake, + guild_id: Option, + channel_id: Option, + ) { + let mut users = self.users.lock(); + let mut channels = self.channels.lock(); + + if let Some((old_guild_id, old_channel_id)) = users.remove(&user_id) + && let Some(users_in_channel) = channels.get_mut(&(old_guild_id, old_channel_id)) + { + users_in_channel.remove(&user_id); + if users_in_channel.is_empty() { + channels.remove(&(old_guild_id, old_channel_id)); + } + } + + if let (Some(guild_id), Some(channel_id)) = (guild_id, channel_id) { + users.insert(user_id, (guild_id, channel_id)); + channels + .entry((guild_id, channel_id)) + .or_default() + .insert(user_id); + } + } + + fn count_excluding( + &self, + guild_id: Snowflake, + channel_id: Snowflake, + excluded_user_id: Snowflake, + ) -> usize { + self.channels + .lock() + .get(&(guild_id, channel_id)) + .map(|users| { + users + .iter() + .filter(|user_id| **user_id != excluded_user_id) + .count() + }) + .unwrap_or(0) + } +} + #[derive(Clone)] pub struct DiscordOutboundCallConfig { pub sip: DiscordOutboundSipConfig, @@ -529,6 +583,7 @@ impl SharedDiscordClient { let songbird_config = Config::default().decode_mode(DecodeMode::Decode(Default::default())); let songbird = Songbird::serenity_from_config(songbird_config); + let voice_state_tracker = Arc::new(VoiceStateTracker::default()); let (ready_tx, ready_rx) = oneshot::channel::(); let ready_tx = Arc::new(tokio::sync::Mutex::new(Some(ready_tx))); @@ -541,6 +596,7 @@ impl SharedDiscordClient { .event_handler(Arc::new(SharedClientEventHandler { ready_tx, outbound_call_config, + voice_state_tracker: voice_state_tracker.clone(), })) .voice_manager(songbird.clone()) .await?; @@ -573,6 +629,7 @@ impl SharedDiscordClient { Ok(Arc::new(Self { songbird, bot_user_id: AtomicU64::new(bot_user_id), + voice_state_tracker, _client_handle: client_handle, })) } @@ -586,12 +643,19 @@ impl SharedDiscordClient { pub fn bot_user_id(&self) -> Snowflake { Snowflake::new(self.bot_user_id.load(Ordering::Relaxed)) } + + /// Count users in a voice channel, excluding this bot if it is present. + pub fn voice_channel_user_count(&self, guild_id: Snowflake, channel_id: Snowflake) -> usize { + self.voice_state_tracker + .count_excluding(guild_id, channel_id, self.bot_user_id()) + } } /// Serenity event handler for the shared client struct SharedClientEventHandler { ready_tx: Arc>>>, outbound_call_config: Option, + voice_state_tracker: Arc, } #[async_trait] @@ -636,11 +700,63 @@ impl EventHandler for SharedClientEventHandler { } } } + FullEvent::GuildCreate { guild, .. } => { + let guild_id = Snowflake::new(guild.id.get()); + for voice_state in guild.voice_states.values() { + self.voice_state_tracker.update( + Snowflake::new(voice_state.user_id.get()), + Some(guild_id), + voice_state.channel_id.map(|id| Snowflake::new(id.get())), + ); + } + } + FullEvent::VoiceStateUpdate { new, .. } => { + self.voice_state_tracker.update( + Snowflake::new(new.user_id.get()), + new.guild_id.map(|id| Snowflake::new(id.get())), + new.channel_id.map(|id| Snowflake::new(id.get())), + ); + } _ => {} } } } +/// Best-effort bot nickname update for a guild. +pub async fn set_bot_nickname(bot_token: &str, guild_id: Snowflake, display_name: &str) { + let nickname = call_nickname(display_name); + let url = format!( + "https://discord.com/api/v10/guilds/{}/members/@me", + guild_id + ); + + let result = reqwest::Client::new() + .patch(url) + .header("Authorization", format!("Bot {}", bot_token)) + .json(&serde_json::json!({ "nick": nickname })) + .send() + .await; + + match result { + Ok(response) if response.status().is_success() => { + debug!( + "Set bot nickname in guild {} while calling {}", + guild_id, display_name + ); + } + Ok(response) => { + warn!( + "Failed to set bot nickname in guild {}: HTTP {}", + guild_id, + response.status() + ); + } + Err(e) => { + warn!("Failed to set bot nickname in guild {}: {}", guild_id, e); + } + } +} + async fn register_call_commands(ctx: &Context, guild_id: GuildId) -> Result<(), serenity::Error> { let call_command = CreateCommand::new("call") .description("Call a SIP/PBX extension from your current voice channel") @@ -834,38 +950,12 @@ async fn set_call_nickname( let Some(guild_id) = guild_id else { return; }; - - let nickname = call_nickname(display_name); - let url = format!( - "https://discord.com/api/v10/guilds/{}/members/@me", - guild_id.get() - ); - - let result = reqwest::Client::new() - .patch(url) - .header("Authorization", format!("Bot {}", cfg.bot_token)) - .json(&serde_json::json!({ "nick": nickname })) - .send() - .await; - - match result { - Ok(response) if response.status().is_success() => { - debug!( - "Set bot nickname in guild {} while calling {}", - guild_id, display_name - ); - } - Ok(response) => { - warn!( - "Failed to set bot nickname in guild {}: HTTP {}", - guild_id, - response.status() - ); - } - Err(e) => { - warn!("Failed to set bot nickname in guild {}: {}", guild_id, e); - } - } + set_bot_nickname( + &cfg.bot_token, + Snowflake::new(guild_id.get()), + display_name, + ) + .await; } fn call_nickname(display_name: &str) -> String { diff --git a/sipcord-bridge/src/transport/sip/callbacks.rs b/sipcord-bridge/src/transport/sip/callbacks.rs index 76b2edf..1432476 100644 --- a/sipcord-bridge/src/transport/sip/callbacks.rs +++ b/sipcord-bridge/src/transport/sip/callbacks.rs @@ -476,8 +476,13 @@ pub unsafe extern "C" fn on_incoming_call_cb( if let Some(callbacks) = CALLBACKS.get() && let Some(ref handlers) = *callbacks.lock() { - (handlers.on_incoming_call)(call_id, sip_username, extension.clone(), source_ip); - (handlers.on_call_authenticated)(call_id, params, extension, source_ip); + (handlers.on_incoming_call)( + call_id, + sip_username.clone(), + extension.clone(), + source_ip, + ); + (handlers.on_call_authenticated)(call_id, params, sip_username, extension, source_ip); } } else { // No Authorization header - send 401 challenge diff --git a/sipcord-bridge/src/transport/sip/ffi/types.rs b/sipcord-bridge/src/transport/sip/ffi/types.rs index 08e058c..faed28e 100644 --- a/sipcord-bridge/src/transport/sip/ffi/types.rs +++ b/sipcord-bridge/src/transport/sip/ffi/types.rs @@ -158,7 +158,7 @@ pub struct DigestAuthParams { pub struct CallbackHandlers { pub on_incoming_call: Box) + Send + Sync>, pub on_call_authenticated: - Box) + Send + Sync>, + Box) + Send + Sync>, pub on_dtmf: Box, pub on_call_ended: Box, /// Audio frame callback: (channel_id, samples, sample_rate) diff --git a/sipcord-bridge/src/transport/sip/mod.rs b/sipcord-bridge/src/transport/sip/mod.rs index 8058355..0e81d86 100644 --- a/sipcord-bridge/src/transport/sip/mod.rs +++ b/sipcord-bridge/src/transport/sip/mod.rs @@ -42,6 +42,8 @@ pub enum SipEvent { call_id: CallId, /// SIP Digest auth parameters (boxed to reduce enum size) digest_auth: Box, + /// Caller ID / SIP username from the From header. + caller_id: String, /// Extension being called (from To header) extension: String, /// Source IP address of the caller @@ -265,7 +267,7 @@ fn run_pjsua_loop( }), on_call_authenticated: Box::new({ let event_tx = event_tx.clone(); - move |call_id, digest_auth, extension, source_ip| { + move |call_id, digest_auth, caller_id, extension, source_ip| { info!( "Call {} authenticated: user={}", call_id, digest_auth.username @@ -274,6 +276,7 @@ fn run_pjsua_loop( let _ = event_tx.send(SipEvent::IncomingCall { call_id, digest_auth: Box::new(digest_auth), + caller_id, extension, source_ip, });