55 lines
2.3 KiB
Python
55 lines
2.3 KiB
Python
import scipy.io.wavfile as wav
|
||
from scipy import signal
|
||
import numpy as np
|
||
import matplotlib.pyplot as plt
|
||
from mpl_toolkits.mplot3d import Axes3D
|
||
import ipdb;
|
||
|
||
# 读取音频文件
|
||
filename = "./tang1.wav"
|
||
sample_rate, sound_array = wav.read(filename)
|
||
sound_array = sound_array.T[0, :] if sound_array.ndim != 1 else sound_array
|
||
sound_array = sound_array / np.max(np.abs(sound_array)) # 归一化
|
||
|
||
frame_length = int(sample_rate * 0.01)
|
||
num_frames = len(sound_array) // frame_length
|
||
autocorrelation = np.zeros((num_frames, frame_length))
|
||
autocorrelation_of_candidates = np.zeros((num_frames, frame_length))
|
||
min_peak_threshold = min(sample_rate // 400, frame_length)
|
||
max_peak_threshold = min(sample_rate // 80, frame_length)
|
||
for n in range(num_frames):
|
||
frame = sound_array[n * frame_length: (n + 1) * frame_length]
|
||
autocorrelation[n, :] = signal.correlate(frame, frame, mode='full')[frame_length - 1:]
|
||
# 基频阈值为80-400Hz,则基音周期(即延迟)t最小为sample_rate/400,最大为sample_rate/80
|
||
|
||
# 本应该使用峰值的延迟作为基音周期的候选值,但是发现峰值(局部最大值)并不好判断,同时一帧内的点数不多,因此将阈值内的所有点都作为候选点
|
||
# 那么将不在阈值内的自相关系数置为一个非常小的数,从而不让算法选择不在阈值内的基音周期
|
||
autocorrelation_of_candidates[n, :] = np.pad(
|
||
autocorrelation[n, min_peak_threshold : max_peak_threshold],
|
||
(min_peak_threshold, max(frame_length - max_peak_threshold, 0)),
|
||
mode='constant',
|
||
constant_values=-30.0,
|
||
)
|
||
|
||
dist = -autocorrelation
|
||
cost = np.zeros((num_frames, frame_length))
|
||
path = np.zeros((num_frames, frame_length))
|
||
|
||
for n in range(num_frames - 1):
|
||
for j in range(min_peak_threshold, max_peak_threshold):
|
||
# f0 = sample_rate / candidate
|
||
cost[n + 1, j] = dist[n + 1, j] + np.min(
|
||
cost[n, :] + np.abs(sample_rate / np.arange(frame_length) - sample_rate / j)
|
||
)
|
||
path[n + 1, j] = np.argmin(
|
||
cost[n, :] + np.abs(sample_rate / np.arange(frame_length) - sample_rate / j)
|
||
)
|
||
|
||
l_hat = np.zeros(num_frames, dtype=np.int32)
|
||
l_hat[num_frames - 1] = np.argmin(cost[num_frames - 1, :])
|
||
|
||
for n in range(num_frames - 2, -1, -1):
|
||
l_hat[n] = path[n + 1, l_hat[n + 1]]
|
||
|
||
f0 = sample_rate / l_hat
|
||
|