feat: support command by rts, add rtc support checker, optimize interrupt logic, update openapi param format, update readme.

master
quemingyi.wudong 2025-03-05 11:55:36 +08:00
parent 540ecf261e
commit 966c592544
14 changed files with 200 additions and 69 deletions

View File

@ -6,7 +6,7 @@
- 同时火山引擎 RTC拥有成熟的音频 3A 处理、视频处理等技术以及大规模音视频聊天能力,可支持 AIGC 产品更便捷的支持多模态交互、多人互动等场景能力,保持交互的自然性和高效性。
## 【必看】环境准备
- Node 版本: 16.0+
- **Node 版本: 16.0+**
1. 需要准备两个 Terminal分别启动服务端、前端页面。
2. **根据你自定义的
RoomId、UserId 以及申请的 AppID、BusinessID(如有)、Token、ASR AppID、TTS AppID修改 `src/config/config.ts` 文件中 `ConfigFactory``BaseConfig` 的配置信息**。
@ -15,6 +15,7 @@ RoomId、UserId 以及申请的 AppID、BusinessID(如有)、Token、ASR AppID
5. 如果您已经自行完成了服务端的逻辑,可以不依赖 Demo 中的 Server直接修改前端代码文件 `src/config/index.ts` 中的 `AIGC_PROXY_HOST` 请求域名和接口,并在 `src/app/api.ts` 中修改接口的参数配置 `APIS_CONFIG`
## 快速开始
请注意,服务端和 Web 端都需要启动, 启动步骤如下:
### 服务端
进到项目根目录
#### 安装依赖
@ -41,14 +42,16 @@ yarn dev
### 常见问题
| 问题 | 解决方案 |
| :-- | :-- |
| **启动智能体之后, 对话无反馈,或者一直停留在 "AI 准备中, 请稍侯"** | <li>可能因为控制台中相关权限没有正常授予,请参考[流程](https://www.volcengine.com/docs/6348/1315561)再次确认下是否完成相关操作。此问题的可能性较大,建议仔细对照是否已经将相应的权限开通。</li><li>参数传递可能有问题, 例如参数大小写、类型等问题,请再次确认下这类型问题是否存在。</li><li>相关资源可能未开通或者用量不足,请再次确认。</li><li>**请检查当前使用的模型 ID 等内容都是正确且可用的。**</li> |
| `Server/app.js` 中的 `sessionToken` 是什么,该怎么填,为什么要填 | `sessionToken` 是火山引擎子账号发起 OpenAPI 请求时所必须携带的临时 Token获取方式可参考 [此文章末尾](https://www.volcengine.com/docs/6348/1315561)。 |
| 不清楚什么是主账号,什么是子账号 | 可以参考[官方概念](https://www.volcengine.com/docs/6257/64963?hyperlink_open_type=lark.open_in_browser) 。|
| **启动智能体之后, 对话无反馈** | <li>参数传递可能有问题, 例如参数大小写、类型等问题,请再次确认下这类型问题是否存在。</li><li>另一方面,可能是因为控制台中相关权限没有正常授予,请参考[流程](https://www.volcengine.com/docs/6348/1315561)再次确认下是否完成相关操作。</li><li>相关资源可能未开通或者用量不足,请再次确认。</li><li>请检查本地的网络/带宽情况</li> |
| **浏览器报了 `Uncaught (in promise) r: token_error` 错误** | 请检查您填在项目中的 RTC Token 是否合法,检测用于生成 Token 的 UserId、RoomId 是否与项目中填写的一致。 |
| 什么是 RTC | **R**eal **T**ime **C**ommunication, RTC 的概念可参考[官网文档](https://www.volcengine.com/docs/6348/66812)。 |
| [StartVoiceChat]Failed(Reason: The task has been started. Please do not call the startup task interface repeatedly.) 报错 | 由于目前设置的 RoomId、UserId 为固定值,重复调用 startAudioBot 会导致出错,只需先调用 stopAudioBot 后再重新 startAudioBot 即可。 |
| 为什么我的麦克风正常、摄像头也正常,但是设备没有正常工作? | 可能是设备权限未授予,详情可参考 [Web 排查设备权限获取失败问题](https://www.volcengine.com/docs/6348/1356355)。 |
| 接口调用时, 返回 "Invalid 'Authorization' header, Pls check your authorization header" 错误 | `Server/app.js` 中的 AK/SK/SessionToken 不正确 |
| 什么是 RTC | **R**eal **T**ime **C**ommunication, RTC 的概念可参考[官网文档](https://www.volcengine.com/docs/6348/66812)。 |
| 不清楚什么是主账号,什么是子账号 | 可以参考[官方概念](https://www.volcengine.com/docs/6257/64963?hyperlink_open_type=lark.open_in_browser) 。|
如果有上述以外的问题,也可以参考[问题反馈收集](https://bytedance.larkoffice.com/docx/FM51drJNFoSFcAxciXYcZkpmnBl),或者联系我们帮忙排查处理。
如果有上述以外的问题,欢迎联系我们反馈
### 相关文档
- [场景介绍](https://www.volcengine.com/docs/6348/1310537)

View File

@ -20,11 +20,11 @@ app.use(cors({
*/
const ACCOUNT_INFO = {
/**
* @notes 必填
* @notes 必填, https://console.volcengine.com/iam/keymanage/ 获取
*/
accessKeyId: 'Your AK',
/**
* @notes 必填
* @notes 必填, https://console.volcengine.com/iam/keymanage/ 获取
*/
secretKey: 'Your SK',
/**

View File

@ -22,6 +22,7 @@
"scripts": {
"dev": "npm run echo && npm run start",
"start": "cross-env REACT_APP_LOCAL=cn craco start",
"server:start": "node Server/app.js",
"build": "craco build",
"test": "craco test",
"eject": "react-scripts eject",

View File

@ -3,7 +3,7 @@
* SPDX-license-identifier: BSD-3-Clause
*/
import { Message } from '@arco-design/web-react';
import { Modal } from '@arco-design/web-react';
import { AIGC_PROXY_HOST } from '@/config';
type Headers = Record<string, string>;
@ -60,8 +60,9 @@ export const resultHandler = (res: any) => {
if (Result === 'ok') {
return Result;
}
Message.error(`[${ResponseMetadata?.Action}]Failed(Reason: ${ResponseMetadata?.Error?.Code})`);
throw new Error(
`[${ResponseMetadata?.Action}]Failed(${JSON.stringify(ResponseMetadata, null, 2)})`
);
const error = ResponseMetadata?.Error?.Message || Result;
Modal.error({
title: '接口调用错误',
content: `[${ResponseMetadata?.Action}]Failed(Reason: ${error})`,
});
};

View File

@ -104,7 +104,7 @@ export const AI_MODE_MAP: Partial<Record<AI_MODEL, AI_MODEL_MODE>> = {
/**
* @brief ID
* @note ID https://console.volcengine.com/ark/region:ark+cn-beijing/endpoint 参看/创建
* @note ID https://console.volcengine.com/ark/region:ark+cn-beijing/endpoint?config=%7B%7D 参看/创建
* ID ID, "接入点名称" , "ep-2024xxxxxx-xxx" ID
*/
export const ARK_V3_MODEL_ID: Partial<Record<AI_MODEL, string>> = {

View File

@ -12,7 +12,7 @@ import {
Welcome,
Model,
Voice,
LLM_BOT_ID,
// LLM_BOT_ID,
AI_MODEL,
AI_MODE_MAP,
AI_MODEL_MODE,
@ -36,20 +36,22 @@ export class ConfigFactory {
*/
BusinessId: undefined,
/**
* @brief , ID,
* @brief , ID, "Room123"
*/
RoomId: 'Your Room Id',
RoomId: 'Room123',
/**
* @brief , AI ID,
* @brief , AI ID, "User123"
*/
UserId: 'Your User Id',
UserId: 'User123',
/**
* @brief , RTC Token, AppIdRoomIdUserId, https://console.volcengine.com/rtc/listRTC 列表中
* @brief , RTC Token, AppIdRoomIdUserId
* https://console.volcengine.com/rtc/listRTC 列表中,
* AppId "操作" "临时Token" , RTC
* 使 https://www.volcengine.com/docs/6348/70121 通过代码生成 Token。
* 使 Token
* @note Token , RoomId / UserId RoomId / UserId
* 使 https://www.volcengine.com/docs/6348/70121 通过代码生成 Token。
*/
Token: 'Your Token',
Token: 'Your RTC Token',
/**
* @brief , TTS() AppId, https://console.volcengine.com/speech/app 中获取, 若无可先创建应用。
* @note , "语音合成" , App
@ -106,15 +108,18 @@ export class ConfigFactory {
get LLMConfig() {
const params: Record<string, unknown> = {
Mode: AI_MODE_MAP[this.Model || ''] || AI_MODEL_MODE.CUSTOM,
EndPointId: ARK_V3_MODEL_ID[this.Model],
// BotId: LLM_BOT_ID[this.Model],
MaxTokens: 1024,
Temperature: 0.1,
TopP: 0.3,
SystemMessages: [this.Prompt as string],
Prefill: true,
ModelName: this.Model,
Mode: AI_MODE_MAP[this.Model || ''] || AI_MODEL_MODE.CUSTOM,
ModelVersion: '1.0',
WelcomeSpeech: this.WelcomeSpeech,
SystemMessages: [this.Prompt as string],
EndPointId: ARK_V3_MODEL_ID[this.Model],
ModeSourceType: this.ModeSourceType,
BotId: LLM_BOT_ID[this.Model],
APIKey: this.APIKey,
Url: this.Url,
Feature: JSON.stringify({ Http: true }),
@ -129,20 +134,42 @@ export class ConfigFactory {
get ASRConfig() {
return {
AppId: this.BaseConfig.ASRAppId,
VolumeGain: 0.3,
Provider: 'volcano',
ProviderParams: {
/**
* @note 使, ASR 使
* 使 https://www.volcengine.com/docs/6348/1404673#volcanolmasrconfig
*/
Mode: 'smallmodel',
AppId: this.BaseConfig.ASRAppId,
/**
* @note Cluster ID
* : https://console.volcengine.com/speech/service/16
*/
Cluster: 'volcengine_streaming_common',
},
VADConfig: {
SilenceTime: 600,
SilenceThreshold: 200,
},
VolumeGain: 0.3,
};
}
get TTSConfig() {
return {
AppId: this.BaseConfig.TTSAppId,
VoiceType: this.VoiceType,
Cluster: TTS_CLUSTER.TTS,
Provider: 'volcano',
ProviderParams: {
app: {
AppId: this.BaseConfig.TTSAppId,
Cluster: TTS_CLUSTER.TTS,
},
audio: {
voice_type: this.VoiceType,
speed_ratio: 1.0,
},
},
IgnoreBracketText: [1, 2, 3, 4, 5],
};
}

View File

@ -28,6 +28,7 @@ import RTCAIAnsExtension from '@volcengine/rtc/extension-ainr';
import openAPIs from '@/app/api';
import aigcConfig from '@/config';
import Utils from '@/utils/utils';
import { COMMAND, INTERRUPT_PRIORITY } from '@/utils/handler';
export interface IEventListener {
handleError: (e: { errorCode: any }) => void;
@ -152,6 +153,7 @@ export class RTCClient {
{
userId: this.config.uid!,
extraInfo: JSON.stringify({
call_scene: 'RTC-AIGC',
user_name: username,
user_id: this.config.uid,
}),
@ -342,18 +344,26 @@ export class RTCClient {
/**
* @brief AIGC
*/
commandAudioBot = async (command: string) => {
commandAudioBot = (
command: COMMAND,
interruptMode = INTERRUPT_PRIORITY.NONE,
message = ''
) => {
if (this.audioBotEnabled) {
const res = await openAPIs.UpdateVoiceChat({
AppId: aigcConfig.BaseConfig.AppId,
BusinessId: aigcConfig.BaseConfig.BusinessId,
RoomId: this.basicInfo.room_id,
TaskId: this.basicInfo.user_id,
Command: command,
});
return res;
this.engine.sendUserBinaryMessage(
aigcConfig.BotName,
Utils.string2tlv(
JSON.stringify({
Command: command,
InterruptMode: interruptMode,
Message: message,
}),
'ctrl'
)
);
return;
}
return Promise.reject(new Error('AI 命令调用失败'));
console.warn('Interrupt failed, bot not enabled.');
};
/**

View File

@ -5,7 +5,8 @@
import { useEffect, useState } from 'react';
import { useSelector, useDispatch } from 'react-redux';
import { MediaType } from '@volcengine/rtc';
import VERTC, { MediaType } from '@volcengine/rtc';
import { Modal } from '@arco-design/web-react';
import Utils from '@/utils/utils';
import RtcClient from '@/lib/RtcClient';
import {
@ -84,6 +85,15 @@ export const useJoin = (): [
return;
}
const isSupported = await VERTC.isSupported();
if (!isSupported) {
Modal.error({
title: '不支持 RTC',
content: '您的浏览器可能不支持 RTC 功能,请尝试更换浏览器或升级浏览器后再重试。',
});
return;
}
setJoining(true);
const { username, roomId } = formValues;
const isVisionMode = aigcConfig.Model === AI_MODEL.VISION;

View File

@ -7,34 +7,42 @@ import { useDispatch, useSelector } from 'react-redux';
import AudioLoading from '@/components/Loading/AudioLoading';
import { RootState } from '@/store';
import RtcClient from '@/lib/RtcClient';
import { setInterruptMsg } from '@/store/slices/room';
import { useDeviceState } from '@/lib/useCommon';
import { COMMAND } from '@/utils/handler';
import style from './index.module.less';
import StopRobotBtn from '@/assets/img/StopRobotBtn.svg';
import { setInterruptMsg } from '@/store/slices/room';
const THRESHOLD_VOLUME = 18;
function AudioController(props: React.HTMLAttributes<HTMLDivElement>) {
const { className, ...rest } = props;
const dispatch = useDispatch();
const room = useSelector((state: RootState) => state.room);
const volume = room.localUser.audioPropertiesInfo?.linearVolume || 0;
const { isAudioPublished } = useDeviceState();
const isAITalking = room.isAITalking;
const isUserTalking = room.isUserTalking || volume >= 35;
const isLoading = volume >= THRESHOLD_VOLUME && isAudioPublished;
const handleInterrupt = () => {
RtcClient.commandAudioBot('interrupt');
RtcClient.commandAudioBot(COMMAND.INTERRUPT);
dispatch(setInterruptMsg());
};
return (
<div className={`${className}`} {...rest}>
{isAITalking ? (
<div onClick={handleInterrupt} className={style.interrupt}>
<img src={StopRobotBtn} alt="StopRobotBtn" />
<span className={style['interrupt-text']}></span>
</div>
{isAudioPublished ? (
isAITalking ? (
<div onClick={handleInterrupt} className={style.interrupt}>
<img src={StopRobotBtn} alt="StopRobotBtn" />
<span className={style['interrupt-text']}></span>
</div>
) : (
<div className={style.text}>...</div>
)
) : (
<div className={style.text}>...</div>
<div className={style.closed}></div>
)}
<AudioLoading loading={isUserTalking} />
<AudioLoading loading={isLoading} color={isAudioPublished ? undefined : '#EAEDF1'} />
</div>
);
}

View File

@ -112,6 +112,7 @@
font-weight: 400;
line-height: 20px;
color: rgba(66, 70, 78, 1);
cursor: pointer;
}
}

View File

@ -5,21 +5,45 @@
import VERTC from '@volcengine/rtc';
import { Tooltip, Typography } from '@arco-design/web-react';
import { useSelector } from 'react-redux';
import { useDispatch, useSelector } from 'react-redux';
import { useVisionMode } from '@/lib/useCommon';
import { RootState } from '@/store';
import RtcClient from '@/lib/RtcClient';
import Operation from './components/Operation';
import { Questions } from '@/config';
import { COMMAND, INTERRUPT_PRIORITY } from '@/utils/handler';
import CameraArea from '../MainArea/Room/CameraArea';
import { setCurrentMsg, setHistoryMsg } from '@/store/slices/room';
import utils from '@/utils/utils';
import styles from './index.module.less';
function Menu() {
const dispatch = useDispatch();
const room = useSelector((state: RootState) => state.room);
const scene = room.scene;
const isJoined = room?.isJoined;
const isVisionMode = useVisionMode();
const handleQuestion = (question: string) => {
RtcClient.commandAudioBot(COMMAND.EXTERNAL_TEXT_TO_LLM, INTERRUPT_PRIORITY.HIGH, question);
dispatch(
setHistoryMsg({
text: question,
user: RtcClient.basicInfo.user_id,
paragraph: true,
definite: true,
})
);
dispatch(
setCurrentMsg({
text: question,
user: RtcClient.basicInfo.user_id,
paragraph: true,
definite: true,
})
);
};
return (
<div className={styles.wrapper}>
{isJoined && utils.isMobile() && isVisionMode ? (
@ -51,9 +75,9 @@ function Menu() {
</div>
{isJoined ? (
<div className={`${styles.box} ${styles.questions}`}>
<div className={styles.title}></div>
<div className={styles.title}>:</div>
{Questions[scene].map((question) => (
<div className={styles.line} key={question}>
<div onClick={() => handleQuestion(question)} className={styles.line} key={question}>
{question}
</div>
))}

View File

@ -231,15 +231,6 @@ export const roomSlice = createSlice({
state.isUserTalking = userTalking;
}
}
/** 如果当前说话人是用户, 并且上一条记录是 AI 的话, 并且不成语句, 则是打断 */
if (userTalking) {
const lastMsg = state.msgHistory[state.msgHistory.length - 1];
const isAI = lastMsg.user === config.BotName;
if (!lastMsg.paragraph && isAI) {
lastMsg.isInterrupted = true;
state.msgHistory[state.msgHistory.length - 1] = lastMsg;
}
}
utils.addMsgWithoutDuplicate(state.msgHistory, {
user: payload.user,
value: payload.text,

View File

@ -5,7 +5,12 @@
import { useDispatch } from 'react-redux';
import logger from './logger';
import { setCurrentMsg, setHistoryMsg } from '@/store/slices/room';
import {
setCurrentMsg,
setHistoryMsg,
setInterruptMsg,
updateAITalkState,
} from '@/store/slices/room';
import RtcClient from '@/lib/RtcClient';
import Utils from '@/utils/utils';
@ -26,6 +31,45 @@ export enum AGENT_BRIEF {
FINISHED,
}
/**
* @brief
*/
export enum COMMAND {
/**
* @brief
*/
INTERRUPT = 'interrupt',
/**
* @brief TTS
*/
EXTERNAL_TEXT_TO_SPEECH = 'ExternalTextToSpeech',
/**
* @brief LLM
*/
EXTERNAL_TEXT_TO_LLM = 'ExternalTextToLLM',
}
/**
* @brief
*/
export enum INTERRUPT_PRIORITY {
/**
* @brief
*/
NONE,
/**
* @brief
*/
HIGH,
/**
* @brief
*/
MEDIUM,
/**
* @brief Message
*/
LOW,
}
export const MessageTypeCode = {
[MESSAGE_TYPE.SUBTITLE]: 1,
[MESSAGE_TYPE.FUNCTION_CALL]: 2,
@ -44,6 +88,17 @@ export const useMessageHandler = () => {
const { Stage } = parsed || {};
const { Code, Description } = Stage || {};
logger.debug(Code, Description);
switch (Code) {
case AGENT_BRIEF.FINISHED:
dispatch(updateAITalkState({ isAITalking: false }));
break;
case AGENT_BRIEF.INTERRUPTED:
dispatch(updateAITalkState({ isAITalking: false }));
dispatch(setInterruptMsg());
break;
default:
break;
}
},
/**
* @brief
@ -85,8 +140,9 @@ export const useMessageHandler = () => {
JSON.stringify({
ToolCallID: parsed?.tool_calls?.[0]?.id,
Content: map[name.toLocaleLowerCase().replaceAll('_', '')],
})
)
}),
'func',
),
);
},
};

View File

@ -152,8 +152,7 @@ class Utils {
/**
* @brief TLV
*/
string2tlv(str: string) {
const type = 'func';
string2tlv(str: string, type: string) {
const typeBuffer = new Uint8Array(4);
for (let i = 0; i < type.length; i++) {