从video中抽取audio

from moviepy.editor import VideoFileClipfrom pydub import AudioSegmentvideo_path = '/opt/audio/audios/video1.mp4'audio_path = '/opt/audio/audios/video1.wav'# 提取的音频保存路径# 加载视频文件video = VideoFileClip(video_path)# 提取音频audio = video.audio# 保存音频文件audio.write_audiofile(audio_path)# 读取音频文件sound = AudioSegment.from_wav(audio_path)# 将音频转换为单声道sound = sound.set_channels(1)sound = sound.set_frame_rate(16000)# 保存音频文件(单声道)sound.export(audio_path, format="wav")

截取video

from pydub import AudioSegment# 加载音频文件audio = AudioSegment.from_file("xxx/1.wav")# 定义起始和结束时间(单位为毫秒)start_time = 3000end_time = 28550 # 截取音频extracted = audio[start_time:end_time]# 导出截取的音频extracted.export("xxx/3.wav", format="wav")

拼接音频

import osfrom pydub import AudioSegmentbase_path = f'{os.getcwd()}/audios/reduce_noise/video2/'# 要拼接的音频的路径listshort_audio_files = []for i in range(100, 105):path = base_path + str(i) + ".wav"wav = AudioSegment.from_file(path)duration_seconds = wav.duration_secondsshort_audio_files.append(path)# 声明一个空白音频merged_audio = AudioSegment.empty()# 遍历每个短音频文件并添加到合并后的音频中for audio_file in short_audio_files:# 从文件加载短音频short_audio = AudioSegment.from_file(audio_file)# 将短音频追加到合并后的音频merged_audio = merged_audio.append(short_audio, crossfade=0)# 保存合并后的音频为一个长音频文件merged_audio.export(f"{base_path}merged_audio.wav", format="wav")

说话人分离:

主要使用whisper-diarization:GitHub – MahmoudAshraf97/whisper-diarization: Automatic Speech Recognition with Speaker Diarization based on OpenAI Whisper

使用场景:一整个wav ,分理出不同人对应的wav,具体效果如下: