proper trimming
This commit is contained in:
parent
ea5ba44e34
commit
67a46a757d
260
new-process.py
260
new-process.py
|
@ -8,6 +8,7 @@ import wave as wv
|
||||||
import struct
|
import struct
|
||||||
import librosa
|
import librosa
|
||||||
import heapq
|
import heapq
|
||||||
|
import scipy
|
||||||
|
|
||||||
print("Starting...\n")
|
print("Starting...\n")
|
||||||
|
|
||||||
|
@ -52,6 +53,7 @@ def find_bpm_2(sample_rate, data, threshold, maxbpm, show):
|
||||||
return (np.round(beat, 3), np.round(error, 3))
|
return (np.round(beat, 3), np.round(error, 3))
|
||||||
|
|
||||||
def to_ms(song_data, sample_rate, offset):
|
def to_ms(song_data, sample_rate, offset):
|
||||||
|
# converts audio data to have exactly 1 sample per millisecond (aka set sample_rate to 1000)
|
||||||
new_data = []
|
new_data = []
|
||||||
spacing = int(sample_rate * 0.001)
|
spacing = int(sample_rate * 0.001)
|
||||||
mx = max(song_data)
|
mx = max(song_data)
|
||||||
|
@ -77,6 +79,7 @@ def to_ms(song_data, sample_rate, offset):
|
||||||
|
|
||||||
def filter_n_percent(song_name, offset, length, threshold, reduce, show):
|
def filter_n_percent(song_name, offset, length, threshold, reduce, show):
|
||||||
# threshold is in ]0, 100]
|
# threshold is in ]0, 100]
|
||||||
|
# filter data associated with song_name to keep only the highest threshold% values
|
||||||
|
|
||||||
subprocess.run(["ffmpeg", "-ss", str(offset), "-t", str(length), "-i", song_name, "crop.wav"])
|
subprocess.run(["ffmpeg", "-ss", str(offset), "-t", str(length), "-i", song_name, "crop.wav"])
|
||||||
|
|
||||||
|
@ -119,6 +122,7 @@ def filter_n_percent(song_name, offset, length, threshold, reduce, show):
|
||||||
|
|
||||||
|
|
||||||
def write_to_file_thr(sample_rate, song_data, offset, threshold, filename):
|
def write_to_file_thr(sample_rate, song_data, offset, threshold, filename):
|
||||||
|
# write data to output file
|
||||||
file = open(filename, 'w')
|
file = open(filename, 'w')
|
||||||
file.writelines('time,amplitude\n')
|
file.writelines('time,amplitude\n')
|
||||||
mx = max(song_data)
|
mx = max(song_data)
|
||||||
|
@ -132,22 +136,6 @@ def write_to_file_thr(sample_rate, song_data, offset, threshold, filename):
|
||||||
file.writelines(str(np.round(song_data[i], 0)))
|
file.writelines(str(np.round(song_data[i], 0)))
|
||||||
file.writelines('\n')
|
file.writelines('\n')
|
||||||
|
|
||||||
def smooth(data, thr, mergeThr, show):
|
|
||||||
mx = max(data)
|
|
||||||
for i in range(len(data)-mergeThr):
|
|
||||||
if(data[i]/mx > thr):
|
|
||||||
for k in range(1, mergeThr):
|
|
||||||
data[i+k] = 0
|
|
||||||
if(show):
|
|
||||||
t = [j/1000 for j in range(len(data))]
|
|
||||||
plt.plot(t, data)
|
|
||||||
plt.xlabel("Time (not scaled to origin)")
|
|
||||||
plt.ylabel("Amplitude")
|
|
||||||
plt.grid()
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
return data
|
|
||||||
|
|
||||||
def round_t(id, sample_rate, bpm, div, offset, k0):
|
def round_t(id, sample_rate, bpm, div, offset, k0):
|
||||||
k = k0
|
k = k0
|
||||||
t = offset + k/(bpm*div)
|
t = offset + k/(bpm*div)
|
||||||
|
@ -159,7 +147,8 @@ def round_t(id, sample_rate, bpm, div, offset, k0):
|
||||||
return t
|
return t
|
||||||
return (t - 1/(bpm*div), 0)
|
return (t - 1/(bpm*div), 0)
|
||||||
|
|
||||||
def snap(data, sample_rate, bpm, offset, divisor, show):
|
def snap(data, sample_rate, bpm, divisor, show):
|
||||||
|
# adjust time amplitudes to match the given BPM
|
||||||
new = [0 for x in range(int(1000*len(data)/sample_rate))] # 1pt per millisecond
|
new = [0 for x in range(int(1000*len(data)/sample_rate))] # 1pt per millisecond
|
||||||
print("old =", len(data))
|
print("old =", len(data))
|
||||||
print("len =", 1000*len(data)/sample_rate)
|
print("len =", 1000*len(data)/sample_rate)
|
||||||
|
@ -172,6 +161,11 @@ def snap(data, sample_rate, bpm, offset, divisor, show):
|
||||||
t = k/(bpm*divisor)
|
t = k/(bpm*divisor)
|
||||||
k += 60
|
k += 60
|
||||||
|
|
||||||
|
'''
|
||||||
|
if(np.abs(i/sample_rate - k/(bpm*divisor)) > np.abs(i/sample_rate - (k-60)/(bpm*divisor))):
|
||||||
|
k -= 60
|
||||||
|
t = k/(bpm*divisor)'''
|
||||||
|
|
||||||
if(i%(len(data)//100) == 0):
|
if(i%(len(data)//100) == 0):
|
||||||
print(percent, "%")
|
print(percent, "%")
|
||||||
percent += 1
|
percent += 1
|
||||||
|
@ -190,29 +184,120 @@ def snap(data, sample_rate, bpm, offset, divisor, show):
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
return new
|
return new
|
||||||
|
|
||||||
|
def compress(Zxx):
|
||||||
|
res = []
|
||||||
|
|
||||||
|
|
||||||
|
def void_freq(song_name, offset, songlen, increment, minfreq, maxfreq, upperthr, ampthr):
|
||||||
|
fft_list = []
|
||||||
|
times = []
|
||||||
|
current_time = offset
|
||||||
|
k = 0
|
||||||
|
|
||||||
|
subprocess.run(["ffmpeg", "-ss", str(offset), "-t", str(songlen-offset), "-i", song_name, "crop.wav"])
|
||||||
|
|
||||||
|
sample_rate, global_data = wavfile.read("crop.wav")
|
||||||
|
blit = int(sample_rate*increment)
|
||||||
|
|
||||||
|
subprocess.run(["clear"])
|
||||||
|
subprocess.run(["rm", "crop.wav"])
|
||||||
|
|
||||||
|
#print("Blit :", blit)
|
||||||
|
|
||||||
|
pfreq = scipy.fft.rfftfreq(blit, 1/sample_rate)
|
||||||
|
|
||||||
|
#print(len(pfreq))
|
||||||
|
|
||||||
|
while(current_time <= songlen):
|
||||||
|
pff = scipy.fft.rfft(global_data[k*blit:(k+1)*blit])
|
||||||
|
fft_list.append(pff)
|
||||||
|
times.append(k*increment)
|
||||||
|
|
||||||
|
k += 1
|
||||||
|
current_time = offset + k*increment
|
||||||
|
|
||||||
|
print("FFT :", len(fft_list), "\nFFT[0] :", len(fft_list[0]), "\npfreq :", len(pfreq))
|
||||||
|
|
||||||
|
print("Finding global max...")
|
||||||
|
gmax = 0
|
||||||
|
for i in range(len(fft_list)):
|
||||||
|
#fft_list[i] = np.real(fft_list[i])
|
||||||
|
for j in range(len(fft_list[i])):
|
||||||
|
if(np.real(fft_list[i][j]) > gmax):
|
||||||
|
gmax = np.real(fft_list[i][j])
|
||||||
|
|
||||||
|
print("Trimming...")
|
||||||
|
|
||||||
|
for i in range(len(fft_list)):
|
||||||
|
lmax = 0
|
||||||
|
for j in range(len(fft_list[i])):
|
||||||
|
if(np.abs(fft_list[i][j]) > lmax):
|
||||||
|
lmax = np.abs(fft_list[i][j])
|
||||||
|
|
||||||
|
for j in range(len(fft_list[i])):
|
||||||
|
if((pfreq[j] >= minfreq and pfreq[j] < maxfreq) or pfreq[j] > upperthr):
|
||||||
|
fft_list[i][j] = 0+0j
|
||||||
|
|
||||||
|
if(np.abs(fft_list[i][j]) < lmax/ampthr):
|
||||||
|
fft_list[i][j] = 0+0j
|
||||||
|
|
||||||
|
if(True):
|
||||||
|
res = []
|
||||||
|
print("Converting...")
|
||||||
|
for i in range(len(fft_list)):
|
||||||
|
ift = scipy.fft.irfft(fft_list[i], n=blit)
|
||||||
|
for k in ift:
|
||||||
|
res.append(k)
|
||||||
|
#print(type(res[0]))
|
||||||
|
mx = 0
|
||||||
|
for j in range(len(res)):
|
||||||
|
if(res[j] > mx):
|
||||||
|
mx = res[j]
|
||||||
|
|
||||||
|
for i in range(len(res)):
|
||||||
|
res[i] = np.int16(32767*res[i]/mx)
|
||||||
|
|
||||||
|
res = np.array(res)
|
||||||
|
wavfile.write("trimmed.wav", 44100, res)
|
||||||
|
|
||||||
|
#plt.plot(np.abs(pfreq[:len(fft_list[0])]), np.abs(fft_list[0]))
|
||||||
|
#plt.grid()
|
||||||
|
#plt.show()
|
||||||
|
|
||||||
|
print("Done")
|
||||||
|
|
||||||
if(True):
|
if(True):
|
||||||
#data = filter_n_percent("worlds_end_3.wav", 74.582, 30, 0.3, reduce=False, show=False)
|
void_freq("worlds_end_3.wav", 74.582, 84.582, 10.001, minfreq=0, maxfreq=440, upperthr=4500, ampthr=60)
|
||||||
#data = filter_n_percent("no.wav", 1, 15, 0.3)
|
#void_freq("440.wav", 0, 3.9, 3.901, minfreq=0, maxfreq=0, upperthr=20000)
|
||||||
#da = find_bpm(44100, data, 100, 200, 1, 0)
|
|
||||||
|
|
||||||
# def find_bpm_2(sample_rate, data, threshold, maxbpm):
|
if(True):
|
||||||
#da = find_bpm_2(44100, data, 0.92, 240, show=False)
|
#data2 = filter_n_percent("worlds_end_3.wav", 74.582, 15, 0.2, reduce=False, show=True)
|
||||||
#print("BPM is", da[0], "with std of", da[1])
|
data2 = filter_n_percent("trimmed.wav", 0, 10, 0.1, reduce=False, show=False)
|
||||||
|
data2 = snap(data2, 44100, 180, 4, show=True)
|
||||||
|
#write_to_file_thr(1000, data2, 74.582, 0.02, "timing_points.csv")
|
||||||
data2 = filter_n_percent("worlds_end_3.wav", 74.582, 15, 0.2, reduce=False, show=True)
|
|
||||||
data2 = snap(data2, 44100, 178, 74.582, 4, show=True)
|
|
||||||
write_to_file_thr(1000, data2, 74.582, 0.02, "timing_points.csv")
|
|
||||||
'''
|
|
||||||
data2 = filter_n_percent("no.wav", 1, 30, 0.8, reduce=True, show=True)
|
|
||||||
write_to_file_thr(1000, smooth(data2, 0.5, 50, show=True), 1, 0.02, "timing_points.csv")
|
|
||||||
'''
|
|
||||||
|
|
||||||
#data = to_ms(data, 44100, 1)
|
|
||||||
|
|
||||||
print("Program finished with return 0")
|
print("Program finished with return 0")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
''' -------------------------------------------------------------------- '''
|
''' -------------------------------------------------------------------- '''
|
||||||
''' -----------------------| Feuilles mortes |-------------------------- '''
|
''' -----------------------| Feuilles mortes |-------------------------- '''
|
||||||
|
@ -220,6 +305,21 @@ print("Program finished with return 0")
|
||||||
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
|
def smooth(data, thr, mergeThr, show):
|
||||||
|
mx = max(data)
|
||||||
|
for i in range(len(data)-mergeThr):
|
||||||
|
if(data[i]/mx > thr):
|
||||||
|
for k in range(1, mergeThr):
|
||||||
|
data[i+k] = 0
|
||||||
|
if(show):
|
||||||
|
t = [j/1000 for j in range(len(data))]
|
||||||
|
plt.plot(t, data)
|
||||||
|
plt.xlabel("Time (not scaled to origin)")
|
||||||
|
plt.ylabel("Amplitude")
|
||||||
|
plt.grid()
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
return data
|
||||||
if(False):
|
if(False):
|
||||||
#t, f, Zxx = fct("no.wav", 0, 0.032, 10, 5000, False)
|
#t, f, Zxx = fct("no.wav", 0, 0.032, 10, 5000, False)
|
||||||
#t, f, Zxx = fct("worlds_end_3.wav", 150.889, 0.032, 170.889, 3000, False)
|
#t, f, Zxx = fct("worlds_end_3.wav", 150.889, 0.032, 170.889, 3000, False)
|
||||||
|
@ -489,3 +589,95 @@ def find_bpm(sample_rate, data, minbpm, maxbpm, step, width):
|
||||||
|
|
||||||
return (optimal, optimal_acc)
|
return (optimal, optimal_acc)
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
def void_freq(song_name, offset, songlen, increment, lthr, gthr):
|
||||||
|
to_cut = 20000//2500
|
||||||
|
global_Zxx = np.array([])
|
||||||
|
global_f = np.array([])
|
||||||
|
global_t = np.array([])
|
||||||
|
current_time = offset
|
||||||
|
k = 0
|
||||||
|
sample_rate, global_data = wavfile.read(song_name)
|
||||||
|
blit = int(sample_rate*increment)
|
||||||
|
print("Blit :", blit)
|
||||||
|
while(current_time <= songlen):
|
||||||
|
#subprocess.run(["ffmpeg", "-ss", str(current_time), "-t", str(increment), "-i", song_name, "crop.wav"])
|
||||||
|
|
||||||
|
#sample_rate, audio_data = wavfile.read('crop.wav')
|
||||||
|
audio_data = global_data[int(k*blit):int((k+1)*blit)]
|
||||||
|
size = audio_data.size
|
||||||
|
|
||||||
|
#subprocess.run(["clear"])
|
||||||
|
#subprocess.run(["rm", "crop.wav"])
|
||||||
|
|
||||||
|
# do stuff here
|
||||||
|
#f, t, Zxx = signal.stft(audio_data, sample_rate, nperseg=1000)
|
||||||
|
f, t, Zxx = signal.spectrogram(audio_data, fs=sample_rate, nfft=size)
|
||||||
|
leng = len(f)
|
||||||
|
|
||||||
|
f, Zxx = f[:leng//to_cut], Zxx[:leng//to_cut]
|
||||||
|
|
||||||
|
for i in range(len(Zxx)):
|
||||||
|
for j in range(len(Zxx[i])):
|
||||||
|
#Zxx[i][j] *= 1127*np.log(1+f[i]/700)
|
||||||
|
Zxx[i][j] *= 1000
|
||||||
|
|
||||||
|
t = np.array([current_time + x for x in t])
|
||||||
|
|
||||||
|
if(k == 0):
|
||||||
|
global_f = f
|
||||||
|
global_t = t
|
||||||
|
global_Zxx = Zxx
|
||||||
|
else:
|
||||||
|
global_Zxx = np.concatenate((global_Zxx, Zxx), axis=1)
|
||||||
|
global_t = np.concatenate((global_t, t))
|
||||||
|
|
||||||
|
#print(len(global_t))
|
||||||
|
|
||||||
|
k += 1
|
||||||
|
current_time = offset + k*increment
|
||||||
|
|
||||||
|
print("Completion rate : ", np.round(100*(current_time-offset)/(songlen-offset), 4), "%")
|
||||||
|
|
||||||
|
print("Finding global max...")
|
||||||
|
gmax = 0
|
||||||
|
for i in range(len(global_Zxx)):
|
||||||
|
for j in range(len(global_Zxx[i])):
|
||||||
|
if(global_Zxx[i][j] > gmax):
|
||||||
|
gmax = global_Zxx[i][j]
|
||||||
|
|
||||||
|
print("Trimming...")
|
||||||
|
for j in range(len(global_Zxx[0])):
|
||||||
|
lmax = 0
|
||||||
|
for i in range(len(global_Zxx)):
|
||||||
|
if(global_Zxx[i][j] > lmax):
|
||||||
|
lmax = global_Zxx[i][j]
|
||||||
|
|
||||||
|
for i in range(len(global_Zxx)):
|
||||||
|
val = global_Zxx[i][j]
|
||||||
|
if(val/lmax <= lthr/100):
|
||||||
|
global_Zxx[i][j] = 0
|
||||||
|
elif(val/gmax <= gthr/100):
|
||||||
|
global_Zxx[i][j] = 0
|
||||||
|
|
||||||
|
if(False):
|
||||||
|
print("Plotting...")
|
||||||
|
plt.pcolormesh(global_t, global_f, np.abs(global_Zxx), shading='gouraud')
|
||||||
|
# print(len(global_Zxx), len(global_Zxx[0]))
|
||||||
|
print("XLEN :", len(global_Zxx), "\nYLEN :", len(global_Zxx[0]))
|
||||||
|
plt.title('STFT Magnitude')
|
||||||
|
plt.ylabel('Frequency [Hz]')
|
||||||
|
plt.xlabel('Time [sec]')
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
if(True):
|
||||||
|
print("Converting...")
|
||||||
|
audio_signal = librosa.griffinlim(global_Zxx)
|
||||||
|
#scipy.io.wavfile.write('trimmed.wav', sample_rate, np.array(audio_signal, dtype=np.int16))
|
||||||
|
wavfile.write('test.wav', sample_rate, np.array(audio_signal, dtype=np.int16))
|
||||||
|
|
||||||
|
print("Done")
|
||||||
|
'''
|
Loading…
Reference in New Issue