diff --git a/Zblit.wav b/Zblit.wav index 568fc0c..a48b7bb 100644 Binary files a/Zblit.wav and b/Zblit.wav differ diff --git a/audio.wav b/audio.wav index 3598027..87f7c58 100644 Binary files a/audio.wav and b/audio.wav differ diff --git a/cleaned_sp.py b/cleaned_sp.py index 855f910..f675e68 100644 --- a/cleaned_sp.py +++ b/cleaned_sp.py @@ -20,6 +20,317 @@ def is_data_stereo(raw_global_data:list) -> bool: return True return True +def is_note_within(fr1, fr2): + if(fr1 > fr2): + return (fr1/fr2 <= NOTE_DIST) + else: + return (fr2/fr1 <= NOTE_DIST) + +def keep_highest(song_name, offset, songlen, segsize, count, output_name, minfreq=110, maxfreq=5000, ampthr=250): + # extracting data from cropped song + sample_rate, raw_song_data = wavfile.read(song_name) + blit = int(sample_rate*segsize) # Te + + song_data = [0 for i in range(len(raw_song_data))] + + id_start = int(offset*sample_rate) + id_end = min(len(raw_song_data), int((offset+songlen)*sample_rate)) + + a = 0 + if(is_data_stereo(raw_song_data)): + print("Converting to mono...") + for x in range(id_start, id_end): + song_data[x] = raw_song_data[x][0]/2 + raw_song_data[x][1]/2 + + if(x % (int(len(raw_song_data)/100)) == 0): + print(a, "/ 100") + a += 1 + else: + song_data = raw_song_data + + print("\nSampleRate : ", sample_rate) + print("SegSize : ", blit) + + # calculate the frequencies associated to the FFTs + pfreq = scp.fft.rfftfreq(blit, 1/sample_rate) + + # left boundary of segment to crop + current_time = offset + + # list of FFTs + fft_list = [] + fft_list_untouched = [] + + # number of samples + k = 0 + + print("Retrieving freqs from", offset, "to", songlen+offset, "...") + while(current_time < songlen+offset-segsize): + # index corresponding to left boundary + left_id = int(current_time*sample_rate) + + # index corresponding to right boundary + right_id = int((current_time+segsize)*sample_rate) + + # calculate the fft, append it to fft_list + pff = scp.fft.rfft(song_data[int(current_time*sample_rate):int(sample_rate*(current_time+segsize))]) + fft_list.append(pff) + fft_list_untouched.append([ee for ee in pff]) + + # just to avoid what causes 0.1 + 0.1 == 0.2 to be False + k += 1 + current_time = offset + k*segsize + #print(current_time) + + print("\n\nSegSize :", segsize, "\nFFT :", len(fft_list), "\nFFT[0] :", len(fft_list[0]), "\npfreq :", len(pfreq), "\n\n") + + # -------------------------------------------- Clean song -------------------------------------------- # + pfreq_minid = 0 + pfreq_maxid = len(pfreq) -1 + while(pfreq[pfreq_minid] < minfreq): + for t in range(len(fft_list)): + fft_list[t][pfreq_minid] = 0+0j + pfreq_minid += 1 + + while(pfreq[pfreq_maxid] > maxfreq): + for t in range(len(fft_list)): + fft_list[t][pfreq_maxid] = 0+0j + pfreq_maxid -= 1 + + new_times = [] + new_freqs = [] + new_ampls = [] + new_kept = [] + + # i = time, j = freq + for i in range(len(fft_list)): + #returns a list of couples [id, value] + elements = heapq.nlargest(count, enumerate(fft_list[i]), key=lambda x: x[1]) + + for idx in range(len(elements)): + if(elements[idx][0] < len(pfreq)): + new_times.append(offset + i*segsize) + new_freqs.append(pfreq[elements[idx][0]]) + new_ampls.append(fft_list[i][elements[idx][0]]) + + '''for i in range(len(new_freqs)): + while(new_freqs[i]>1000): + new_freqs[i] = new_freqs[i]/2''' + + # -------------------------------------------- Localize -------------------------------------------- # + + timing_points = [] + last_freq = new_freqs[0] + for i in range(len(new_times)): + if(np.abs(new_ampls[i]) > ampthr and (i == 0 or not is_note_within(new_freqs[i], last_freq))): + last_freq = new_freqs[i] + timing_points.append(new_times[i]) + new_kept.append(new_freqs[i]) + else: + new_kept.append(0) + + mx = max(new_ampls) + + plt.plot(new_times, new_freqs) + plt.plot(new_times, [elt*1000/mx for elt in new_ampls]) + plt.plot(new_times, new_kept, "ro") + plt.grid() + plt.show() + # -------------------------------------------- Write -------------------------------------------- # + i0 = 0 + timing_points.append(999999) + + write_freq = 880 + write_cur = 0 + write_id = -1 + while(write_cur <= write_freq): # shouldnt seg fault + write_id += 1 + write_cur = pfreq[write_id] + + + # remove + # i = time, j = freq + for i in range(len(fft_list)): + # retrieve dominant freq + if(segsize*i >= timing_points[i0]-offset): + i0 += 1 + + maxfreq = 0 + maxfreqid = 0 + maxamp = 0 + for j in range(len(fft_list[0])): + if(np.abs(fft_list[i][j]) > maxamp): + maxamp = np.abs(fft_list[i][j]) + maxfreq = pfreq[j] + maxfreqid = j + + fft_list_untouched[i][write_id] = max(maxamp*2, 32767) + fft_list_untouched[i][write_id-1] = max(maxamp*2, 32767) + fft_list_untouched[i][write_id+1] = max(maxamp*2, 32767) + + res = [] + print("Converting...") + for i in range(len(fft_list_untouched)): + ift = scp.fft.irfft(fft_list_untouched[i], n=blit) + for k in ift: + res.append(k) + #print(type(res[0])) + mx = 0 + for j in range(len(res)): + if(res[j] > mx): + mx = res[j] + + for i in range(len(res)): + res[i] = np.int16(32767*res[i]/mx) + + res = np.array(res) + wavfile.write(output_name, sample_rate, res) + +def convert_to_wav(song_name:str, output_file="audio.wav") -> str: + """ + Converts the song to .wav, only if it's not already in wave format. + Currently relies on file extension. + Returns: the song_name that should be used afterwards. + """ + extension = Path(song_name).suffix + match extension: + case ".mp3" | ".ogg": + print("Converting to .wav...") + subprocess.run(["ffmpeg", "-y", "-i", song_name, output_file], shell=False) + return output_file + return song_name + +def retrieve_all_from_song(filename, t0, t1, bpm, dta=0.001, dtf=0.01, threshold=0.06, show=True): + # dt = sample interval + # threshold is in percent + + if(t1 <= t0): + print("ERROR : t1 <= t0\n") + exit(1) + + # converts format to .wav + new_fn = convert_to_wav(filename) + + print("Filtering song...") + #void_freq_clean(new_fn, t0, t1, dtf, 20, 20000, 0.05, "crop1.wav") + #def void_freq_clean(song_name, offset, songlen, segsize, minfreq, maxfreq, ampthr, output_name): + + print("Now retrieving the frequencies") + (maxlist, maxamps) = retrieve_dominant_freqs(new_fn, t0, t1, dtf) + #def retrieve_dominant_freqs(song_name, offset, songlen, segsize): + + print("Now retrieving the amplitudes") + amps = retrieve_dominant_amps(new_fn, t0, t1, dta, threshold, (4/(bpm/60))/4) + + print("Len of freqs : ", len(maxlist), "|", len(maxamps)) + print("Len of amps : ", len(maxlist), "|", len(amps)) + + maxa = amps[0] + for jj in amps: + if(jj > maxa): + maxa = jj + + for i in range(len(amps)): + amps[i] = (amps[i] * 2000) / maxa + + if(show): + timesF = [t0 + dtf*k for k in range(len(maxlist))] + timesA = [t0 + dta*k for k in range(len(amps))] + + plt.plot(timesA, amps) + plt.plot(timesF, maxlist) + plt.show() + + # free() +''' +# c-type +SONG_LEN = 8 +OFFSET = 0.042 +BPM = 149.3 +SEGSIZE = 1/(BPM/60) +wavved_song = convert_to_wav("songs/ctype.mp3") +''' +''' +# tetris_2 +SONG_LEN = 8 +OFFSET = 0 +BPM = 157 +SEGSIZE = 1/(BPM/60) +''' +''' +# test +SONG_LEN = 1 +OFFSET = 0 +BPM = 240 +SEGSIZE = 1/(BPM/60) +''' + +# gmtn +SONG_LEN = 3 +OFFSET = 1.652 +BPM = 155 +SEGSIZE = 1/(BPM/60) +wavved_song = convert_to_wav("songs/furioso melodia.mp3") + +''' +# E +SONG_LEN = 10 +OFFSET = 2.641 +BPM = 155 +SEGSIZE = 1/(BPM/60) +wavved_song = convert_to_wav("songs/rushe.mp3") +''' +''' +# Tsubaki +SONG_LEN = 10 +OFFSET = 35.659 +BPM = 199 +SEGSIZE = 1/(BPM/60) +wavved_song = convert_to_wav("songs/TSUBAKI.mp3") +''' +''' +# death +SONG_LEN = 8 +OFFSET = 21.750 +BPM = 180 +SEGSIZE = 1/(BPM/60) +wavved_song = convert_to_wav("songs/Night of Knights.mp3") +''' + +#wavved_song = convert_to_wav("tetris_2.wav") + +NOTE_DIST = (2**(1/4)) +keep_highest(wavved_song, OFFSET, SONG_LEN, SEGSIZE/4, 1, "Zblit.wav", minfreq=300, maxfreq=3000, ampthr=500) + +print("yipee") + + + + + + + + + + + + + + + + + + + + + + + + + + +''' + def retrieve_dominant_freqs(song_name, offset, songlen, segsize): # returns a list with peak frequencies alongside the sample rate # /!\ song_name is specified to be a list, NOT a list of couples (aka song is mono) @@ -324,166 +635,6 @@ def localize_frequencies(song_name, offset, songlen, segsize, output_name): res = np.array(res) wavfile.write(output_name, sample_rate, res) -NOTE_DIST = (2**(1/12)) -def is_note_within(fr1, fr2): - if(fr1 > fr2): - return (fr1/fr2 <= NOTE_DIST) - else: - return (fr2/fr1 <= NOTE_DIST) - -def keep_highest(song_name, offset, songlen, segsize, count, output_name, minfreq=110, maxfreq=5000): - # extracting data from cropped song - sample_rate, raw_song_data = wavfile.read(song_name) - blit = int(sample_rate*segsize) # Te - - song_data = [0 for i in range(len(raw_song_data))] - - id_start = int(offset*sample_rate) - id_end = min(len(raw_song_data), int((offset+songlen)*sample_rate)) - - a = 0 - if(is_data_stereo(raw_song_data)): - print("Converting to mono...") - for x in range(id_start, id_end): - song_data[x] = raw_song_data[x][0]/2 + raw_song_data[x][1]/2 - - if(x % (int(len(raw_song_data)/100)) == 0): - print(a, "/ 100") - a += 1 - else: - song_data = raw_song_data - - print("\nSampleRate : ", sample_rate) - print("SegSize : ", blit) - - # calculate the frequencies associated to the FFTs - pfreq = scp.fft.rfftfreq(blit, 1/sample_rate) - - # left boundary of segment to crop - current_time = offset - - # list of FFTs - fft_list = [] - - # number of samples - k = 0 - - print("Retrieving freqs from", offset, "to", songlen+offset, "...") - while(current_time < songlen+offset-segsize): - # index corresponding to left boundary - left_id = int(current_time*sample_rate) - - # index corresponding to right boundary - right_id = int((current_time+segsize)*sample_rate) - - # calculate the fft, append it to fft_list - pff = scp.fft.rfft(song_data[int(current_time*sample_rate):int(sample_rate*(current_time+segsize))]) - fft_list.append(pff) - - # just to avoid what causes 0.1 + 0.1 == 0.2 to be False - k += 1 - current_time = offset + k*segsize - #print(current_time) - - print("\n\nSegSize :", segsize, "\nFFT :", len(fft_list), "\nFFT[0] :", len(fft_list[0]), "\npfreq :", len(pfreq), "\n\n") - - # -------------------------------------------- Clean song -------------------------------------------- # - pfreq_minid = 0 - pfreq_maxid = len(pfreq) -1 - while(pfreq[pfreq_minid] < minfreq): - for t in range(len(fft_list)): - fft_list[t][pfreq_minid] = 0+0j - pfreq_minid += 1 - - while(pfreq[pfreq_maxid] > maxfreq): - for t in range(len(fft_list)): - fft_list[t][pfreq_maxid] = 0+0j - pfreq_maxid -= 1 - - new_times = [] - new_freqs = [] - new_ampls = [] - new_kept = [] - - # i = time, j = freq - for i in range(len(fft_list)): - #returns a list of couples [id, value] - elements = heapq.nlargest(count, enumerate(fft_list[i]), key=lambda x: x[1]) - - for idx in range(len(elements)): - if(elements[idx][0] < len(pfreq)): - new_times.append(offset + i*segsize) - new_freqs.append(pfreq[elements[idx][0]]) - new_ampls.append(fft_list[i][elements[idx][0]]) - - '''for i in range(len(new_freqs)): - while(new_freqs[i]>1000): - new_freqs[i] = new_freqs[i]/2''' - - # -------------------------------------------- Localize -------------------------------------------- # - - timing_points = [] - for i in range(len(new_times)): - if(i == 0 or not is_note_within(new_freqs[i], new_freqs[i-1])): - timing_points.append(new_times[i]) - new_kept.append(new_freqs[i]) - else: - new_kept.append(0) - - plt.plot(new_times, new_freqs) - plt.plot(new_times, new_kept, "ro") - plt.grid() - plt.show() - # -------------------------------------------- Write -------------------------------------------- # - i0 = 0 - timing_points.append(999999) - - write_freq = 880 - write_cur = 0 - write_id = -1 - while(write_cur <= write_freq): # shouldnt seg fault - write_id += 1 - write_cur = pfreq[write_id] - - - # remove - # i = time, j = freq - for i in range(len(fft_list)): - # retrieve dominant freq - if(segsize*i >= timing_points[i0]-offset): - i0 += 1 - - maxfreq = 0 - maxfreqid = 0 - maxamp = 0 - for j in range(len(fft_list[0])): - if(np.abs(fft_list[i][j]) > maxamp): - maxamp = np.abs(fft_list[i][j]) - maxfreq = pfreq[j] - maxfreqid = j - - fft_list[i][write_id] = max(maxamp*2, 32767) - fft_list[i][write_id-1] = max(maxamp*2, 32767) - fft_list[i][write_id+1] = max(maxamp*2, 32767) - - res = [] - print("Converting...") - for i in range(len(fft_list)): - ift = scp.fft.irfft(fft_list[i], n=blit) - for k in ift: - res.append(k) - #print(type(res[0])) - mx = 0 - for j in range(len(res)): - if(res[j] > mx): - mx = res[j] - - for i in range(len(res)): - res[i] = np.int16(32767*res[i]/mx) - - res = np.array(res) - wavfile.write(output_name, sample_rate, res) - def write_result(song_name, offset, songlen, segsize, timing_pts, output_name): # removes unnecessary frequencies/amps from a song # ampthr is in [0, 1] @@ -751,103 +902,6 @@ def parse_after_filter(song_name, offset, songlen, segsize, dt0): print(timing_points) return timing_points -def convert_to_wav(song_name:str, output_file="audio.wav") -> str: - """ - Converts the song to .wav, only if it's not already in wave format. - Currently relies on file extension. - Returns: the song_name that should be used afterwards. - """ - extension = Path(song_name).suffix - match extension: - case ".mp3" | ".ogg": - print("Converting to .wav...") - subprocess.run(["ffmpeg", "-y", "-i", song_name, output_file], shell=False) - return output_file - return song_name - -def retrieve_all_from_song(filename, t0, t1, bpm, dta=0.001, dtf=0.01, threshold=0.06, show=True): - # dt = sample interval - # threshold is in percent - - if(t1 <= t0): - print("ERROR : t1 <= t0\n") - exit(1) - - # converts format to .wav - new_fn = convert_to_wav(filename) - - print("Filtering song...") - #void_freq_clean(new_fn, t0, t1, dtf, 20, 20000, 0.05, "crop1.wav") - #def void_freq_clean(song_name, offset, songlen, segsize, minfreq, maxfreq, ampthr, output_name): - - print("Now retrieving the frequencies") - (maxlist, maxamps) = retrieve_dominant_freqs(new_fn, t0, t1, dtf) - #def retrieve_dominant_freqs(song_name, offset, songlen, segsize): - - print("Now retrieving the amplitudes") - amps = retrieve_dominant_amps(new_fn, t0, t1, dta, threshold, (4/(bpm/60))/4) - - print("Len of freqs : ", len(maxlist), "|", len(maxamps)) - print("Len of amps : ", len(maxlist), "|", len(amps)) - - maxa = amps[0] - for jj in amps: - if(jj > maxa): - maxa = jj - - for i in range(len(amps)): - amps[i] = (amps[i] * 2000) / maxa - - if(show): - timesF = [t0 + dtf*k for k in range(len(maxlist))] - timesA = [t0 + dta*k for k in range(len(amps))] - - plt.plot(timesA, amps) - plt.plot(timesF, maxlist) - plt.show() - - # free() -''' -# c-type -SONG_LEN = 5 -OFFSET = 0.042 -BPM = 149.3 -SEGSIZE = 1/(BPM/60) -''' -''' -# tetris_2 -SONG_LEN = 8 -OFFSET = 0 -BPM = 157 -SEGSIZE = 1/(BPM/60) -''' -''' -# test -SONG_LEN = 1 -OFFSET = 0 -BPM = 240 -SEGSIZE = 1/(BPM/60) -''' -''' -# gmtn -SONG_LEN = 3 -OFFSET = 1.652 -BPM = 155 -SEGSIZE = 1/(BPM/60) -''' -# E -SONG_LEN = 10 -OFFSET = 2.641 -BPM = 155 -SEGSIZE = 1/(BPM/60) - -#wavved_song = convert_to_wav("tetris_2.wav") -wavved_song = convert_to_wav("songs/rushe.mp3") - -keep_highest(wavved_song, OFFSET, SONG_LEN, SEGSIZE/4, 1, "Zblit.wav", minfreq=300, maxfreq=3000) - - -''' # remove high/low frequencies (often noise) #void_freq_clean(wavved_song, OFFSET, SONG_LEN, SEGSIZE/8, 100, 3000, 0.05, "Zvoided_song.wav") @@ -863,9 +917,6 @@ write_result(wavved_song, OFFSET, SONG_LEN-0.1, SEGSIZE/8, tp, "Zoutput_song.wav #retrieve_all_from_song("Zcleaned_song.wav", 0, 5, 149.3, dtf=1/(149.3/60)/8) ''' -print("yipee") - - diff --git a/songs/TSUBAKI.mp3 b/songs/TSUBAKI.mp3 new file mode 100644 index 0000000..8b18af1 Binary files /dev/null and b/songs/TSUBAKI.mp3 differ