added parsing fct and write fcté

2024-11-25 17:52:25 +01:00 · 2024-11-25 17:52:25 +01:00 · 02c5579186
parent c969501c52
commit 02c5579186
5 changed files with 199 additions and 3 deletions
--- a/cleaned_sp.py
+++ b/cleaned_sp.py
@ -324,6 +324,111 @@ def localize_frequencies(song_name, offset, songlen, segsize, output_name):
    res = np.array(res)
    wavfile.write(output_name, sample_rate, res)

+def write_result(song_name, offset, songlen, segsize, timing_pts, output_name):
+    # removes unnecessary frequencies/amps from a song
+    # ampthr is in [0, 1]
+
+    # extracting data from cropped song
+    sample_rate, raw_song_data = wavfile.read(song_name)
+    blit = int(sample_rate*segsize) # Te
+
+    song_data = [0 for i in range(len(raw_song_data))]
+
+    a = 0
+    if(is_data_stereo(raw_song_data)):
+        print("Converting to mono...")
+        for x in range(len(raw_song_data)):
+            song_data[x] = raw_song_data[x][0]/2 + raw_song_data[x][1]/2
+
+            if(x % (int(len(raw_song_data)/100)) == 0):
+                print(a, "/ 100")
+                a += 1
+    else:
+        song_data = raw_song_data
+
+    print("\nSampleRate : ", sample_rate)
+    print("SegSize : ", blit)
+    
+    # calculate the frequencies associated to the FFTs
+    pfreq = scp.fft.rfftfreq(blit, 1/sample_rate)
+
+    # left boundary of segment to crop
+    current_time = offset
+
+    # list of FFTs
+    fft_list = []
+    
+    # number of samples
+    k = 0
+
+    print("Retrieving freqs from", offset, "to", songlen+offset, "...")
+    while(current_time < songlen+offset-segsize):
+        # index corresponding to left boundary
+        left_id = int(current_time*sample_rate)
+        
+        # index corresponding to right boundary
+        right_id = int((current_time+segsize)*sample_rate)
+        
+        # calculate the fft, append it to fft_list
+        pff = scp.fft.rfft(song_data[int(current_time*sample_rate):int(sample_rate*(current_time+segsize))])
+        fft_list.append(pff)
+
+        # just to avoid what causes 0.1 + 0.1 == 0.2 to be False
+        k += 1
+        current_time = offset + k*segsize
+        #print(current_time)
+
+    print("\n\nSegSize :", segsize, "\nFFT :", len(fft_list), "\nFFT[0] :", len(fft_list[0]), "\npfreq :", len(pfreq), "\n\n")
+
+    i0 = 0
+    timing_pts.append(999999)
+
+    write_freq = 880
+    write_cur = 0
+    write_id = -1
+    while(write_cur <= write_freq): # shouldnt seg fault
+        write_id += 1
+        write_cur = pfreq[write_id]
+    
+    
+    # remove
+    # i = time, j = freq
+    for i in range(len(fft_list)):
+        # retrieve dominant freq
+        if(segsize*i >= timing_pts[i0]):
+            i0 += 1
+
+            maxfreq = 0
+            maxfreqid = 0
+            maxamp = 0
+            for j in range(len(fft_list[i])):
+                if(np.abs(fft_list[i][j]) > maxamp):
+                    maxamp = np.abs(fft_list[i][j])
+                    maxfreq = pfreq[j]
+                    maxfreqid = j
+
+            fft_list[i][write_id] = max(maxamp*2, 10000)
+
+        
+    # writing new .wav
+    res = []
+    print("Converting...")
+    for i in range(len(fft_list)):
+        ift = scp.fft.irfft(fft_list[i], n=blit)
+        for k in ift:
+            res.append(k)
+    #print(type(res[0]))
+    mx = 0
+    for j in range(len(res)):
+        if(res[j] > mx):
+            mx = res[j]
+
+    for i in range(len(res)):
+        res[i] = np.int16(32767*res[i]/mx)
+
+    res = np.array(res)
+    wavfile.write(output_name, sample_rate, res)
+
 def retrieve_dominant_amps(song_name, offset, songlen, segsize, percent, divlen):
    # returns a list with the percent% peak amplitudes alongside the sample rate
    # /!\ song_name is specified to be a list, NOT a list of couples (aka song is mono)
@ -407,6 +512,83 @@ def retrieve_dominant_amps(song_name, offset, songlen, segsize, percent, divlen)
    # res[i] corresponds to time (offset + i*segsize)
    return res

+def parse_after_filter(song_name, offset, songlen, segsize, dt0):
+    sample_rate, raw_song_data = wavfile.read(song_name)
+    blit = int(sample_rate*segsize) # Te
+
+    song_data = [0 for i in range(len(raw_song_data))]
+
+    a = 0
+    if(is_data_stereo(raw_song_data)):
+        print("Converting to mono...")
+        for x in range(len(raw_song_data)):
+            song_data[x] = raw_song_data[x][0]/2 + raw_song_data[x][1]/2
+
+            if(x % (int(len(raw_song_data)/100)) == 0):
+                print(a, "/ 100")
+                a += 1
+    else:
+        song_data = raw_song_data
+
+    print("\nSampleRate : ", sample_rate)
+    print("SegSize : ", blit)
+    
+    # calculate the frequencies associated to the FFTs
+    pfreq = scp.fft.rfftfreq(blit, 1/sample_rate)
+
+    # left boundary of segment to crop
+    current_time = offset
+
+    # list of FFTs
+    fft_list = []
+    
+    # number of samples
+    k = 0
+
+    print("Retrieving freqs from", offset, "to", songlen+offset, "...")
+    while(current_time < songlen+offset-segsize):
+        # index corresponding to left boundary
+        left_id = int(current_time*sample_rate)
+        
+        # index corresponding to right boundary
+        right_id = int((current_time+segsize)*sample_rate)
+        
+        # calculate the fft, append it to fft_list
+        pff = scp.fft.rfft(song_data[int(current_time*sample_rate):int(sample_rate*(current_time+segsize))])
+        fft_list.append(pff)
+
+        # just to avoid what causes 0.1 + 0.1 == 0.2 to be False
+        k += 1
+        current_time = offset + k*segsize
+        #print(current_time)
+
+    print("\n\nSegSize :", segsize, "\nFFT :", len(fft_list), "\nFFT[0] :", len(fft_list[0]), "\npfreq :", len(pfreq), "\n\n")
+
+    timing_points = []
+    prev_append = 2
+
+    for i in range(len(fft_list)):
+        # retrieve dominant freq
+        maxfreq = 0
+        maxfreqid = 0
+        maxamp = 0
+        for j in range(len(fft_list[i])):
+            if(np.abs(fft_list[i][j]) > maxamp):
+                maxamp = np.abs(fft_list[i][j])
+                maxfreq = pfreq[j]
+                maxfreqid = j
+
+        print("dominant at", i, ":" , maxamp)
+        if(maxamp > 100):
+            if(prev_append > 0):
+                timing_points.append(i*segsize + dt0)
+                prev_append -= 1
+        else:
+            prev_append = 2
+
+    print(timing_points)
+    return timing_points
+
 def convert_to_wav(song_name:str, output_file="audio.wav") -> str:
    """
    Converts the song to .wav, only if it's not already in wave format.
@ -469,9 +651,23 @@ void_freq_clean(convert_to_wav("ctype.mp3"), 0.042, 5, 1/(149.3/60)/8, 100, 3000
 localize_frequencies(convert_to_wav("ctype_void.mp3"), 0, 5, 1/(149.3/60)/12, "ctype_filtered.mp3")
 retrieve_all_from_song("ctype_filtered.mp3", 0, 5, 149.3, dta=1/(149.3/60)/128, dtf=1/(149.3/60)/8)
 '''
-void_freq_clean(convert_to_wav("tetris_2.wav"), 0.042, 5, 1/(149.3/60)/8, 100, 3000, 0.05, "tetris_2_void.wav")
-localize_frequencies(convert_to_wav("tetris_2_void.wav"), 0, 5, 1/(149.3/60)/12, "tetris_2_f.wav")
-retrieve_all_from_song("tetris_2_f.wav", 0, 5, 149.3, dtf=1/(149.3/60)/8)
+SONG_LEN = 5
+OFFSET = 0.042
+SEGSIZE = 1/(149.3/60)
+
+# remove high/low frequencies (often noise)
+void_freq_clean(convert_to_wav("tetris_2.wav"), OFFSET, SONG_LEN, SEGSIZE/8, 100, 3000, 0.05, "tetris_2_void.wav")
+
+# crops any part with let ring
+localize_frequencies(convert_to_wav("tetris_2_void.wav"), 0, SONG_LEN-0.01, SEGSIZE/16, "tetris_2_f.wav")
+
+# find timings
+tp = parse_after_filter("tetris_2_f.wav", 0, SONG_LEN-0.01, SEGSIZE/16, OFFSET)
+
+# write
+write_result("tetris_2.wav", OFFSET, SONG_LEN-0.01, SEGSIZE/16, tp, "tetris_2_result.wav")
+
+#retrieve_all_from_song("tetris_2_f.wav", 0, 5, 149.3, dtf=1/(149.3/60)/8)
 print("yipee")


--- a/crop.wav
+++ b/crop.wav
--- a/tetris_2_f.wav
+++ b/tetris_2_f.wav
--- a/tetris_2_result.wav
+++ b/tetris_2_result.wav
--- a/tetris_2_void.wav
+++ b/tetris_2_void.wav