diff --git a/cleaned_sp.py b/cleaned_sp.py index ab19ec3..208d9c6 100644 --- a/cleaned_sp.py +++ b/cleaned_sp.py @@ -324,6 +324,111 @@ def localize_frequencies(song_name, offset, songlen, segsize, output_name): res = np.array(res) wavfile.write(output_name, sample_rate, res) +def write_result(song_name, offset, songlen, segsize, timing_pts, output_name): + # removes unnecessary frequencies/amps from a song + # ampthr is in [0, 1] + + # extracting data from cropped song + sample_rate, raw_song_data = wavfile.read(song_name) + blit = int(sample_rate*segsize) # Te + + song_data = [0 for i in range(len(raw_song_data))] + + a = 0 + if(is_data_stereo(raw_song_data)): + print("Converting to mono...") + for x in range(len(raw_song_data)): + song_data[x] = raw_song_data[x][0]/2 + raw_song_data[x][1]/2 + + if(x % (int(len(raw_song_data)/100)) == 0): + print(a, "/ 100") + a += 1 + else: + song_data = raw_song_data + + print("\nSampleRate : ", sample_rate) + print("SegSize : ", blit) + + # calculate the frequencies associated to the FFTs + pfreq = scp.fft.rfftfreq(blit, 1/sample_rate) + + # left boundary of segment to crop + current_time = offset + + # list of FFTs + fft_list = [] + + # number of samples + k = 0 + + print("Retrieving freqs from", offset, "to", songlen+offset, "...") + while(current_time < songlen+offset-segsize): + # index corresponding to left boundary + left_id = int(current_time*sample_rate) + + # index corresponding to right boundary + right_id = int((current_time+segsize)*sample_rate) + + # calculate the fft, append it to fft_list + pff = scp.fft.rfft(song_data[int(current_time*sample_rate):int(sample_rate*(current_time+segsize))]) + fft_list.append(pff) + + # just to avoid what causes 0.1 + 0.1 == 0.2 to be False + k += 1 + current_time = offset + k*segsize + #print(current_time) + + print("\n\nSegSize :", segsize, "\nFFT :", len(fft_list), "\nFFT[0] :", len(fft_list[0]), "\npfreq :", len(pfreq), "\n\n") + + i0 = 0 + timing_pts.append(999999) + + write_freq = 880 + write_cur = 0 + write_id = -1 + while(write_cur <= write_freq): # shouldnt seg fault + write_id += 1 + write_cur = pfreq[write_id] + + + # remove + # i = time, j = freq + for i in range(len(fft_list)): + # retrieve dominant freq + if(segsize*i >= timing_pts[i0]): + i0 += 1 + + maxfreq = 0 + maxfreqid = 0 + maxamp = 0 + for j in range(len(fft_list[i])): + if(np.abs(fft_list[i][j]) > maxamp): + maxamp = np.abs(fft_list[i][j]) + maxfreq = pfreq[j] + maxfreqid = j + + fft_list[i][write_id] = max(maxamp*2, 10000) + + + # writing new .wav + res = [] + print("Converting...") + for i in range(len(fft_list)): + ift = scp.fft.irfft(fft_list[i], n=blit) + for k in ift: + res.append(k) + #print(type(res[0])) + mx = 0 + for j in range(len(res)): + if(res[j] > mx): + mx = res[j] + + for i in range(len(res)): + res[i] = np.int16(32767*res[i]/mx) + + res = np.array(res) + wavfile.write(output_name, sample_rate, res) + def retrieve_dominant_amps(song_name, offset, songlen, segsize, percent, divlen): # returns a list with the percent% peak amplitudes alongside the sample rate # /!\ song_name is specified to be a list, NOT a list of couples (aka song is mono) @@ -407,6 +512,83 @@ def retrieve_dominant_amps(song_name, offset, songlen, segsize, percent, divlen) # res[i] corresponds to time (offset + i*segsize) return res +def parse_after_filter(song_name, offset, songlen, segsize, dt0): + sample_rate, raw_song_data = wavfile.read(song_name) + blit = int(sample_rate*segsize) # Te + + song_data = [0 for i in range(len(raw_song_data))] + + a = 0 + if(is_data_stereo(raw_song_data)): + print("Converting to mono...") + for x in range(len(raw_song_data)): + song_data[x] = raw_song_data[x][0]/2 + raw_song_data[x][1]/2 + + if(x % (int(len(raw_song_data)/100)) == 0): + print(a, "/ 100") + a += 1 + else: + song_data = raw_song_data + + print("\nSampleRate : ", sample_rate) + print("SegSize : ", blit) + + # calculate the frequencies associated to the FFTs + pfreq = scp.fft.rfftfreq(blit, 1/sample_rate) + + # left boundary of segment to crop + current_time = offset + + # list of FFTs + fft_list = [] + + # number of samples + k = 0 + + print("Retrieving freqs from", offset, "to", songlen+offset, "...") + while(current_time < songlen+offset-segsize): + # index corresponding to left boundary + left_id = int(current_time*sample_rate) + + # index corresponding to right boundary + right_id = int((current_time+segsize)*sample_rate) + + # calculate the fft, append it to fft_list + pff = scp.fft.rfft(song_data[int(current_time*sample_rate):int(sample_rate*(current_time+segsize))]) + fft_list.append(pff) + + # just to avoid what causes 0.1 + 0.1 == 0.2 to be False + k += 1 + current_time = offset + k*segsize + #print(current_time) + + print("\n\nSegSize :", segsize, "\nFFT :", len(fft_list), "\nFFT[0] :", len(fft_list[0]), "\npfreq :", len(pfreq), "\n\n") + + timing_points = [] + prev_append = 2 + + for i in range(len(fft_list)): + # retrieve dominant freq + maxfreq = 0 + maxfreqid = 0 + maxamp = 0 + for j in range(len(fft_list[i])): + if(np.abs(fft_list[i][j]) > maxamp): + maxamp = np.abs(fft_list[i][j]) + maxfreq = pfreq[j] + maxfreqid = j + + print("dominant at", i, ":" , maxamp) + if(maxamp > 100): + if(prev_append > 0): + timing_points.append(i*segsize + dt0) + prev_append -= 1 + else: + prev_append = 2 + + print(timing_points) + return timing_points + def convert_to_wav(song_name:str, output_file="audio.wav") -> str: """ Converts the song to .wav, only if it's not already in wave format. @@ -469,9 +651,23 @@ void_freq_clean(convert_to_wav("ctype.mp3"), 0.042, 5, 1/(149.3/60)/8, 100, 3000 localize_frequencies(convert_to_wav("ctype_void.mp3"), 0, 5, 1/(149.3/60)/12, "ctype_filtered.mp3") retrieve_all_from_song("ctype_filtered.mp3", 0, 5, 149.3, dta=1/(149.3/60)/128, dtf=1/(149.3/60)/8) ''' -void_freq_clean(convert_to_wav("tetris_2.wav"), 0.042, 5, 1/(149.3/60)/8, 100, 3000, 0.05, "tetris_2_void.wav") -localize_frequencies(convert_to_wav("tetris_2_void.wav"), 0, 5, 1/(149.3/60)/12, "tetris_2_f.wav") -retrieve_all_from_song("tetris_2_f.wav", 0, 5, 149.3, dtf=1/(149.3/60)/8) +SONG_LEN = 5 +OFFSET = 0.042 +SEGSIZE = 1/(149.3/60) + +# remove high/low frequencies (often noise) +void_freq_clean(convert_to_wav("tetris_2.wav"), OFFSET, SONG_LEN, SEGSIZE/8, 100, 3000, 0.05, "tetris_2_void.wav") + +# crops any part with let ring +localize_frequencies(convert_to_wav("tetris_2_void.wav"), 0, SONG_LEN-0.01, SEGSIZE/16, "tetris_2_f.wav") + +# find timings +tp = parse_after_filter("tetris_2_f.wav", 0, SONG_LEN-0.01, SEGSIZE/16, OFFSET) + +# write +write_result("tetris_2.wav", OFFSET, SONG_LEN-0.01, SEGSIZE/16, tp, "tetris_2_result.wav") + +#retrieve_all_from_song("tetris_2_f.wav", 0, 5, 149.3, dtf=1/(149.3/60)/8) print("yipee") diff --git a/crop.wav b/crop.wav new file mode 100644 index 0000000..7964693 Binary files /dev/null and b/crop.wav differ diff --git a/tetris_2_f.wav b/tetris_2_f.wav new file mode 100644 index 0000000..a3222ed Binary files /dev/null and b/tetris_2_f.wav differ diff --git a/tetris_2_result.wav b/tetris_2_result.wav new file mode 100644 index 0000000..27a02d2 Binary files /dev/null and b/tetris_2_result.wav differ diff --git a/tetris_2_void.wav b/tetris_2_void.wav new file mode 100644 index 0000000..1ca5474 Binary files /dev/null and b/tetris_2_void.wav differ