small refactoring of main function

2024-12-16 21:52:15 +01:00 · 2024-12-16 21:52:15 +01:00 · 05b642bdcc
parent 64a2a96628
commit 05b642bdcc
3 changed files with 93 additions and 107 deletions
--- a/Zblit.wav
+++ b/Zblit.wav
--- a/audio.wav
+++ b/audio.wav
--- a/cleaned_sp.py
+++ b/cleaned_sp.py
@ -1,3 +1,13 @@
+from math import *
+import numpy as np
+import scipy as scp
+from scipy.io import wavfile
+import matplotlib.pyplot as plt
+import subprocess
+import heapq
+from pathlib import Path
+from time import sleep
+
 def is_data_stereo(raw_global_data:list) -> bool:
    """
    self-explainatory
@ -11,14 +21,17 @@ def is_data_stereo(raw_global_data:list) -> bool:
    return True

 def dist_to_integer(x):
-    ent = np.floor(x+0.5)
-    return np.abs(x - ent)
+    ent = np.floor(x)
+    if(ent < 0.5):
+        return ent
+    else:
+        return (1-ent)

 def is_note_within(fr1, fr2):
    if(fr1 > fr2):
-        return (fr1/fr2 <= NOTE_DIST and dist_to_integer(fr1/fr2) >= OCTAVE_DIST)
+        return (fr1/fr2 <= NOTE_DIST or dist_to_integer(fr1/fr2) >= OCTAVE_DIST) # same tone or octave
    else:
-        return (fr2/fr1 <= NOTE_DIST and dist_to_integer(fr2/fr1) >= OCTAVE_DIST)
+        return (fr2/fr1 <= NOTE_DIST or dist_to_integer(fr2/fr1) >= OCTAVE_DIST)

 def keep_highest(song_name, offset, songlen, segsize, count, output_name, minfreq=110, maxfreq=5000, ampthr=250):
    # extracting data from cropped song
@ -114,7 +127,7 @@ def keep_highest(song_name, offset, songlen, segsize, count, output_name, minfre
    amp_ct = 0
    incr_a = segsize*4
    len_seg_a = int(sample_rate*incr_a)
-    count_a = len_seg_a//1250
+    count_a = len_seg_a//1000
    left_0 = int(sample_rate*(amp_ct+offset))
    while(amp_ct < songlen-segsize):
        left = int(sample_rate*(amp_ct+offset))
@ -139,8 +152,10 @@ def keep_highest(song_name, offset, songlen, segsize, count, output_name, minfre
    time_d = 0.035
    cur_t = 0

-    locs = []
-    loct = []
+    last_t = -10.0 
+
+    locs = []   # amplitudes
+    loct = []   # times
    for i in range(len(new_new_amps)):
        if(new_new_amps[i] > 100):
            if(not in_seg):
@ -153,109 +168,80 @@ def keep_highest(song_name, offset, songlen, segsize, count, output_name, minfre
            cur_t += 1/sample_rate
            if(in_seg and cur_t >= time_d):
                in_seg = False
-                locs.append(a_ampl)
-                loct.append((left_id + right_id)/(2*sample_rate) + offset)
+                delta_t = (right_id - left_id)/sample_rate
+                if(np.abs(left_id/sample_rate - last_t) >= 0.01):            # these notes are less than 10ms apart !
+                    last_t = right_id/sample_rate
+                    if(delta_t < segsize*1.1):
+                        locs.append(a_ampl)
+                        loct.append((left_id + right_id)/(2*sample_rate) + offset)
+                    else:
+                        locs.append(a_ampl)
+                        loct.append([left_id/sample_rate, right_id/sample_rate])

                a_ampl = 0

-    # detect sliders
-    sl_a = []
-    sl_t = []
-    in_slider = False
-    slider_dtct = segsize
-    for i in range(1, len(loct)-1):
-        delta = loct[i] - loct[i-1]
-        delta2 = loct[i+1] - loct[i]
-        if(delta < slider_dtct and delta2 < slider_dtct):
-            if(in_slider):
-                sl_t.append(loct[i])
-                sl_a.append(locs[i])
-            else:
-                in_slider = True
-                sl_t.append(loct[i-1])
-                sl_a.append(locs[i-1])
-                sl_t.append(loct[i])
-                sl_a.append(locs[i])
+    # -------------------------------------------- Clean with freq -------------------------------------------- #
+
+    ssize_0 = segsize/3
+    locf = [] # frequencies
+    for k in range(len(locs)):
+        ktime = 0
+        ssize = ssize_0
+        if(type(loct[k]) == float):             # circle
+            ktime = loct[k]
+        else:                                   # slider
+            ktime = (loct[k][1]+loct[k][0])/2
+            ssize = max((loct[k][1]-loct[k][0])/2, ssize_0)
+
+        left_id = max(0, int((ktime-ssize/2)*sample_rate))
+        
+        right_id = min(int((ktime+ssize/2)*sample_rate), len(song_data))
+        
+        # calculate the fft
+        pff = scp.fft.rfft(song_data[left_id:right_id])
+
+        fmax = pfreq[0]
+        fampmax = 0
+        for i in range(1, len(pff)):
+            if(pfreq[i] > minfreq and pfreq[i] < maxfreq and fampmax < np.abs(pff[i])):
+                fmax = pfreq[i]
+                fampmax = np.abs(pff[i])
+
+        locf.append(fmax)
+    
+    # -------------------------------------------- Plot -------------------------------------------- #
+
+    plt_loct_all = []
+    plt_loct = []
+    plt_locs = []
+    plt_slidt = []
+    plt_slids = []
+    for i in range(len(loct)):
+        if(type(loct[i]) == float):
+            plt_loct_all.append(loct[i])
+            plt_loct.append(loct[i])
+            plt_locs.append(locs[i])
        else:
-            in_slider = False
+            plt_loct_all.append(loct[i][0])
+            plt_slidt.append(loct[i][0])
+            plt_slidt.append(loct[i][1])
+            plt_slids.append(locs[i])
+            plt_slids.append(locs[i])

-    plt.plot(new_new_t, new_new_amps, "b-")
-    plt.plot(loct, locs, "ro")
-    plt.plot(sl_t, sl_a, "go")
-    plt.grid()
-    plt.show()
-
-    # -------------------------------------------- Localize -------------------------------------------- #
-
-    timing_points = []
-    last_freq = new_freqs[0]
-    for i in range(len(new_times)):
-        if(np.abs(new_ampls[i]) > ampthr and (i == 0 or not is_note_within(new_freqs[i], last_freq))):
-            last_freq = new_freqs[i]
-            timing_points.append(new_times[i])
-            new_kept.append(new_freqs[i])
-        else:
-            new_kept.append(0)
-
-    mx = max(new_ampls)
+    plt.plot(new_new_t, new_new_amps, "y-")
+    plt.plot(plt_loct, plt_locs, "ro")
+    plt.plot(plt_slidt, plt_slids, "go")
+    plt.plot(plt_loct_all, locf, "mo")
            
-    plt.plot(new_times, new_freqs)
+    '''plt.plot(new_times, new_freqs)
    plt.plot(new_times, [elt*1000/mx for elt in new_ampls])
-    plt.plot(new_times, new_kept, "ro")
+    plt.plot(new_times, new_kept, "bo")'''
    plt.grid()
    plt.show()
    
    # -------------------------------------------- Write -------------------------------------------- #
-    i0 = 0
-    timing_points.append(999999)

-    write_freq = 880
-    write_cur = 0
-    write_id = -1
-    while(write_cur <= write_freq): # shouldnt seg fault
-        write_id += 1
-        write_cur = pfreq[write_id]
-    
-    
-    # remove
-    # i = time, j = freq
-    for i in range(len(fft_list)):
-        # retrieve dominant freq
-        if(segsize*i >= timing_points[i0]-offset):
-            i0 += 1
-
-            maxfreq = 0
-            maxfreqid = 0
-            maxamp = 0
-            for j in range(len(fft_list[0])):
-                if(np.abs(fft_list[i][j]) > maxamp):
-                    maxamp = np.abs(fft_list[i][j])
-                    maxfreq = pfreq[j]
-                    maxfreqid = j
-
-            fft_list_untouched[i][write_id] = max(maxamp*2, 32767)
-            fft_list_untouched[i][write_id-1] = max(maxamp*2, 32767)
-            fft_list_untouched[i][write_id+1] = max(maxamp*2, 32767)
-
-    res = []
-    print("Converting...")
-    for i in range(len(fft_list_untouched)):
-        ift = scp.fft.irfft(fft_list_untouched[i], n=blit)
-        for k in ift:
-            res.append(k)
-    #print(type(res[0]))
-    mx = 0
-    for j in range(len(res)):
-        if(res[j] > mx):
-            mx = res[j]
-
-    for i in range(len(res)):
-        res[i] = np.int16(32767*res[i]/mx)
-
-    res = np.array(res)
-    wavfile.write(output_name, sample_rate, res)
-
-    return (loct, sl_t, timing_points) # amplitude result, sliders and frequency result
+    return (loct, locs) # amplitude result, sliders and frequency result

 def convert_to_wav(song_name:str, output_file="audio.wav") -> str:
    """
@ -312,22 +298,22 @@ def retrieve_all_from_song(filename, t0, t1, bpm, dta=0.001, dtf=0.01, threshold
        plt.show()

    # free()
-
+'''
 # c-type
 SONG_LEN = 7
 OFFSET = 0.042
 BPM = 149.3
 SEGSIZE = 1/(BPM/60)
-wavved_song = convert_to_wav("ctype.mp3")
-
+wavved_song = convert_to_wav("songs/ctype.mp3")
 '''
+
 # tetris_2
 SONG_LEN = 10
 OFFSET = 0
 BPM = 157
 SEGSIZE = 1/(BPM/60)
-wavved_song = convert_to_wav("tetris_2.wav")
-'''
+wavved_song = convert_to_wav("songs/tetris_2.wav")
+
 '''
 # test
 SONG_LEN = 1
@ -341,7 +327,7 @@ SONG_LEN = 5
 OFFSET = 1.652
 BPM = 155
 SEGSIZE = 1/(BPM/60)
-wavved_song = convert_to_wav("furioso melodia.mp3")
+wavved_song = convert_to_wav("songs/furioso melodia.mp3")
 '''
 '''
 # E
@ -349,7 +335,7 @@ SONG_LEN = 30
 OFFSET = 2.641
 BPM = 155
 SEGSIZE = 1/(BPM/60)
-wavved_song = convert_to_wav("rushe.mp3")
+wavved_song = convert_to_wav("songs/rushe.mp3")
 '''
 '''
 # Tsubaki
@ -357,7 +343,7 @@ SONG_LEN = 10
 OFFSET = 35.659
 BPM = 199
 SEGSIZE = 1/(BPM/60)
-wavved_song = convert_to_wav("TSUBAKI.mp3")
+wavved_song = convert_to_wav("songs/TSUBAKI.mp3")
 '''
 '''
 # death
@ -368,7 +354,7 @@ SEGSIZE = 1/(BPM/60)
 wavved_song = convert_to_wav("songs/Night of Knights.mp3")
 '''

-#wavved_song = convert_to_wav("tetris_2.wav")
+#wavved_song = convert_to_wav("songs/tetris_2.wav")

 NOTE_DIST = (2**(1/4))
 OCTAVE_DIST = 0.05