diff --git a/.gitignore b/.gitignore index 418d0ad..51fd4cb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ +*.osu *.csv .venv diff --git a/new-process.py b/new-process.py old mode 100644 new mode 100755 index ed5aed8..6a91141 --- a/new-process.py +++ b/new-process.py @@ -8,6 +8,11 @@ import wave as wv import struct import librosa import heapq +import scipy +import os +import random +from pathlib import Path +from time import sleep print("Starting...\n") @@ -52,6 +57,7 @@ def find_bpm_2(sample_rate, data, threshold, maxbpm, show): return (np.round(beat, 3), np.round(error, 3)) def to_ms(song_data, sample_rate, offset): + # converts audio data to have exactly 1 sample per millisecond (aka set sample_rate to 1000) new_data = [] spacing = int(sample_rate * 0.001) mx = max(song_data) @@ -77,6 +83,7 @@ def to_ms(song_data, sample_rate, offset): def filter_n_percent(song_name, offset, length, threshold, reduce, show): # threshold is in ]0, 100] + # filter data associated with song_name to keep only the highest threshold% values subprocess.run(["ffmpeg", "-ss", str(offset), "-t", str(length), "-i", song_name, "crop.wav"]) @@ -117,8 +124,43 @@ def filter_n_percent(song_name, offset, length, threshold, reduce, show): return song_data +def filter_n_percent_serial(song_name, offset, n_iter, step, threshold): + # threshold is in ]0, 100] + # filter data associated with song_name to keep only the highest threshold% values + + subprocess.run(["ffmpeg", "-ss", str(offset), "-t", str(offset+step*n_iter), "-i", song_name, "crop.wav"]) + + sample_rate, global_data = wavfile.read('crop.wav') + + subprocess.run(["clear"]) + subprocess.run(["rm", "crop.wav"]) + + for i in range(n_iter): + print(i, "/", n_iter) + song_data = global_data[int(i*step*sample_rate):int((i+1)*step*sample_rate)] + + mx = max(song_data) + + is_locked = [False for i in range(len(song_data))] + x = int((len(song_data)*threshold)//100) + #print("X = ", x) + + #print("Retreiving the", int(x), "/", len(song_data), "highest values") + elements = heapq.nlargest(int(x), enumerate(song_data), key=lambda x: x[1]) + #print("Done") + + for idx in range(len(elements)): + is_locked[elements[idx][0]] = True + + for r in range(len(song_data)): + if(is_locked[r] == False): + global_data[r+int(i*step*sample_rate)] = 0 + + return global_data + def write_to_file_thr(sample_rate, song_data, offset, threshold, filename): + # write data to output file file = open(filename, 'w') file.writelines('time,amplitude\n') mx = max(song_data) @@ -132,22 +174,6 @@ def write_to_file_thr(sample_rate, song_data, offset, threshold, filename): file.writelines(str(np.round(song_data[i], 0))) file.writelines('\n') -def smooth(data, thr, mergeThr, show): - mx = max(data) - for i in range(len(data)-mergeThr): - if(data[i]/mx > thr): - for k in range(1, mergeThr): - data[i+k] = 0 - if(show): - t = [j/1000 for j in range(len(data))] - plt.plot(t, data) - plt.xlabel("Time (not scaled to origin)") - plt.ylabel("Amplitude") - plt.grid() - plt.show() - - return data - def round_t(id, sample_rate, bpm, div, offset, k0): k = k0 t = offset + k/(bpm*div) @@ -159,7 +185,8 @@ def round_t(id, sample_rate, bpm, div, offset, k0): return t return (t - 1/(bpm*div), 0) -def snap(data, sample_rate, bpm, offset, divisor, show): +def snap(data, sample_rate, bpm, divisor, show=False): + # adjust time amplitudes to match the given BPM new = [0 for x in range(int(1000*len(data)/sample_rate))] # 1pt per millisecond print("old =", len(data)) print("len =", 1000*len(data)/sample_rate) @@ -172,6 +199,11 @@ def snap(data, sample_rate, bpm, offset, divisor, show): t = k/(bpm*divisor) k += 60 + ''' + if(np.abs(i/sample_rate - k/(bpm*divisor)) > np.abs(i/sample_rate - (k-60)/(bpm*divisor))): + k -= 60 + t = k/(bpm*divisor)''' + if(i%(len(data)//100) == 0): print(percent, "%") percent += 1 @@ -190,27 +222,209 @@ def snap(data, sample_rate, bpm, offset, divisor, show): plt.show() return new -if(True): - #data = filter_n_percent("worlds_end_3.wav", 74.582, 30, 0.3, reduce=False, show=False) - #data = filter_n_percent("no.wav", 1, 15, 0.3) - #da = find_bpm(44100, data, 100, 200, 1, 0) - # def find_bpm_2(sample_rate, data, threshold, maxbpm): - #da = find_bpm_2(44100, data, 0.92, 240, show=False) - #print("BPM is", da[0], "with std of", da[1]) +def compress(Zxx): + res = [] + +def get_freq(song_name, offset, step, songlen, data, display=False): + fft_list = [] + times = [] + current_time = offset + k = 0 + + subprocess.run(["ffmpeg", "-ss", str(offset), "-t", str(offset+songlen), "-i", song_name, "crop.wav"]) + + sample_rate, global_data = wavfile.read("crop.wav") + #blit = int(len(global_data) / len(data)) + blit = int(sample_rate*step) + + subprocess.run(["clear"]) + subprocess.run(["rm", "crop.wav"]) + + pfreq = scipy.fft.rfftfreq(blit, 1/sample_rate) + + print("len : ", len(global_data)) + print("len : ", len(data)) + + frequencies = [0 for s in range(len(data))] + print(len(pfreq)) + + for s in range(len(data)): + if(data[s] != 0): + pff = scipy.fft.rfft(global_data[int(s*len(global_data)/len(data)):int(44100*step+int(s*len(global_data)/len(data)))]) + + mx = max(np.abs(pff)) + for id in range(len(pff)): + if frequencies[s] == 0 and np.abs(pff[id]) == mx: + frequencies[s] = pfreq[id] + + elif s != 0: + frequencies[s] = 0 + + if(display): + plt.plot([t/1000 for t in range(len(data))], frequencies) + plt.grid() + plt.xlabel("Time (s)") + plt.ylabel("Dominant frequency (Hz)") + plt.title("Dominant frequencies at peaks") + plt.show() + + return frequencies + + +def void_freq(song_name, offset, songlen, increment, minfreq, maxfreq, upperthr, ampthr, ampfreq, ampval, leniency, write, output_file="trimmed.wav"): + fft_list = [] + times = [] + current_time = offset + k = 0 + + subprocess.run(["ffmpeg", "-ss", str(offset), "-t", str(songlen+offset), "-i", song_name, "crop.wav"]) + + sample_rate, global_data = wavfile.read("crop.wav") + blit = int(sample_rate*increment) + + subprocess.run(["clear"]) + subprocess.run(["rm", "crop.wav"]) + + #print("Blit :", blit) + + pfreq = scipy.fft.rfftfreq(blit, 1/sample_rate) + + #print(len(pfreq)) + + while(current_time <= songlen): + pff = scipy.fft.rfft(global_data[k*blit:(k+1)*blit]) + fft_list.append(pff) + times.append(k*increment) + + k += 1 + current_time = offset + k*increment + + print("FFT :", len(fft_list), "\nFFT[0] :", len(fft_list[0]), "\npfreq :", len(pfreq)) - data2 = filter_n_percent("worlds_end_3.wav", 74.582, 15, 0.2, reduce=False, show=True) - data2 = snap(data2, 44100, 178, 74.582, 4, show=True) - write_to_file_thr(1000, data2, 74.582, 0.02, "timing_points.csv") - ''' - data2 = filter_n_percent("no.wav", 1, 30, 0.8, reduce=True, show=True) - write_to_file_thr(1000, smooth(data2, 0.5, 50, show=True), 1, 0.02, "timing_points.csv") - ''' + print("Finding global max...") + + for i in range(len(fft_list)): + for j in range(len(fft_list[i])): + fft_list[i][j] *= (1 + ampval/max(1, np.abs(pfreq[j] - ampfreq))) + + print("Trimming...") + + for i in range(len(fft_list)): + lmax = 0 + for j in range(len(fft_list[i])): + if(np.abs(fft_list[i][j]) > lmax): + lmax = np.abs(fft_list[i][j]) + + for j in range(len(fft_list[i])): + if((pfreq[j] >= minfreq and pfreq[j] < maxfreq) or pfreq[j] > upperthr): + fft_list[i][j] = 0+0j + + if(np.abs(fft_list[i][j]) < lmax/ampthr): + fft_list[i][j] = 0+0j + + + if(write): + res = [] + print("Converting...") + for i in range(len(fft_list)): + ift = scipy.fft.irfft(fft_list[i], n=blit) + for k in ift: + res.append(k) + #print(type(res[0])) + mx = 0 + for j in range(len(res)): + if(res[j] > mx): + mx = res[j] + + for i in range(len(res)): + res[i] = np.int16(32767*res[i]/mx) + + res = np.array(res) + wavfile.write(output_file, 44100, res) + + #plt.plot(np.abs(pfreq[:len(fft_list[0])]), np.abs(fft_list[0])) + #plt.grid() + #plt.show() + + print("Done") + +def get_tpts(data, sample_rate, thr): + res = [] + for i in range(len(data)): + if(data[i] > thr): + res.append(i/sample_rate) + + for i in res: + print(i) + return res + +def test_sample(timelist): + for i in range(1,len(timelist)): + #os.system('play -n synth %s sin %s' % (0.05, 440)) + for k in range(random.randint(1, 10)): + print("E", end="") + print("F") + sleep(timelist[i]-timelist[i-1]) + +#Offset = 74.582 +#BPM = 178 +#Length = 48*60/BPM-0.01 + +#Offset = 0 +#BPM = 180 +#Length = 48*60/BPM-0.01 + +#Offset = 7 +#BPM = 140 +#Length = 32*60/BPM-0.01 + +def convert_tuple(datares, freq): + """ + Takes datares and converts it to a list of tuples (amplitude, time in ms) + """ + return [(i, datares[i], freq[i]) for i in range(len(datares)) if datares[i] > 0] + + + +def process_song(filename, offset, bpm, div_len_factor=60, n_iter=48, threshold=0.5, divisor=4): + #zaejzlk + div_len = div_len_factor/bpm-0.01 + filtered_name = f"{filename}_trimmed.wav" + void_freq(filename, offset, offset+div_len*(n_iter+1)+0.01, 4*60/bpm, minfreq=0, maxfreq=330, upperthr=5000, ampthr=60, ampfreq = 1200, ampval = 7.27, leniency = 0.005, write=True, output_file=filtered_name) + datares = filter_n_percent_serial(filtered_name, offset, n_iter, div_len, threshold) + datares = snap(datares, 44100, bpm, 4, True) + frequencies = get_freq(filtered_name, offset, div_len, div_len*n_iter, datares, True) + Path(f"{filename}_trimmed.wav").unlink() + return convert_tuple(datares, frequencies) + + +def main(): + data = process_song("tetris_4.wav", 0, 160) + print(data) + print("Program finished with return 0") + +if __name__ == "__main__": + main() + + + + + + + + + + + + + + + + - #data = to_ms(data, 44100, 1) -print("Program finished with return 0") @@ -220,6 +434,21 @@ print("Program finished with return 0") ''' +def smooth(data, thr, mergeThr, show): + mx = max(data) + for i in range(len(data)-mergeThr): + if(data[i]/mx > thr): + for k in range(1, mergeThr): + data[i+k] = 0 + if(show): + t = [j/1000 for j in range(len(data))] + plt.plot(t, data) + plt.xlabel("Time (not scaled to origin)") + plt.ylabel("Amplitude") + plt.grid() + plt.show() + + return data if(False): #t, f, Zxx = fct("no.wav", 0, 0.032, 10, 5000, False) #t, f, Zxx = fct("worlds_end_3.wav", 150.889, 0.032, 170.889, 3000, False) @@ -488,4 +717,96 @@ def find_bpm(sample_rate, data, minbpm, maxbpm, step, width): plt.show() return (optimal, optimal_acc) -''' \ No newline at end of file +''' + + + +''' +def void_freq(song_name, offset, songlen, increment, lthr, gthr): + to_cut = 20000//2500 + global_Zxx = np.array([]) + global_f = np.array([]) + global_t = np.array([]) + current_time = offset + k = 0 + sample_rate, global_data = wavfile.read(song_name) + blit = int(sample_rate*increment) + print("Blit :", blit) + while(current_time <= songlen): + #subprocess.run(["ffmpeg", "-ss", str(current_time), "-t", str(increment), "-i", song_name, "crop.wav"]) + + #sample_rate, audio_data = wavfile.read('crop.wav') + audio_data = global_data[int(k*blit):int((k+1)*blit)] + size = audio_data.size + + #subprocess.run(["clear"]) + #subprocess.run(["rm", "crop.wav"]) + + # do stuff here + #f, t, Zxx = signal.stft(audio_data, sample_rate, nperseg=1000) + f, t, Zxx = signal.spectrogram(audio_data, fs=sample_rate, nfft=size) + leng = len(f) + + f, Zxx = f[:leng//to_cut], Zxx[:leng//to_cut] + + for i in range(len(Zxx)): + for j in range(len(Zxx[i])): + #Zxx[i][j] *= 1127*np.log(1+f[i]/700) + Zxx[i][j] *= 1000 + + t = np.array([current_time + x for x in t]) + + if(k == 0): + global_f = f + global_t = t + global_Zxx = Zxx + else: + global_Zxx = np.concatenate((global_Zxx, Zxx), axis=1) + global_t = np.concatenate((global_t, t)) + + #print(len(global_t)) + + k += 1 + current_time = offset + k*increment + + print("Completion rate : ", np.round(100*(current_time-offset)/(songlen-offset), 4), "%") + + print("Finding global max...") + gmax = 0 + for i in range(len(global_Zxx)): + for j in range(len(global_Zxx[i])): + if(global_Zxx[i][j] > gmax): + gmax = global_Zxx[i][j] + + print("Trimming...") + for j in range(len(global_Zxx[0])): + lmax = 0 + for i in range(len(global_Zxx)): + if(global_Zxx[i][j] > lmax): + lmax = global_Zxx[i][j] + + for i in range(len(global_Zxx)): + val = global_Zxx[i][j] + if(val/lmax <= lthr/100): + global_Zxx[i][j] = 0 + elif(val/gmax <= gthr/100): + global_Zxx[i][j] = 0 + + if(False): + print("Plotting...") + plt.pcolormesh(global_t, global_f, np.abs(global_Zxx), shading='gouraud') + # print(len(global_Zxx), len(global_Zxx[0])) + print("XLEN :", len(global_Zxx), "\nYLEN :", len(global_Zxx[0])) + plt.title('STFT Magnitude') + plt.ylabel('Frequency [Hz]') + plt.xlabel('Time [sec]') + plt.show() + + if(True): + print("Converting...") + audio_signal = librosa.griffinlim(global_Zxx) + #scipy.io.wavfile.write('trimmed.wav', sample_rate, np.array(audio_signal, dtype=np.int16)) + wavfile.write('test.wav', sample_rate, np.array(audio_signal, dtype=np.int16)) + + print("Done") +''' diff --git a/tetris_4.wav b/tetris_4.wav new file mode 100755 index 0000000..2cf6bae Binary files /dev/null and b/tetris_4.wav differ