general refactoring of folder + rework of most functions

2024-12-10 20:19:28 +01:00 · 2024-12-10 20:19:28 +01:00 · 421cddf267
parent a59c0c4e08
commit 421cddf267
22 changed files with 202 additions and 356 deletions
--- a/Zblit.wav
+++ b/Zblit.wav
--- a/Zcleaned_song.wav
+++ b/Zcleaned_song.wav
--- a/Zoutput_song.wav
+++ b/Zoutput_song.wav
--- a/Zvoided_song.wav
+++ b/Zvoided_song.wav
--- a/audio.wav
+++ b/audio.wav
--- a/cleaned_sp.py
+++ b/cleaned_sp.py
@ -324,6 +324,166 @@ def localize_frequencies(song_name, offset, songlen, segsize, output_name):
    res = np.array(res)
    wavfile.write(output_name, sample_rate, res)
 NOTE_DIST = (2**(1/12))
 def is_note_within(fr1, fr2):
    if(fr1 > fr2):
        return (fr1/fr2 <= NOTE_DIST)
    else:
        return (fr2/fr1 <= NOTE_DIST)
 def keep_highest(song_name, offset, songlen, segsize, count, output_name, minfreq=110, maxfreq=5000):
    # extracting data from cropped song
    sample_rate, raw_song_data = wavfile.read(song_name)
    blit = int(sample_rate*segsize) # Te
    song_data = [0 for i in range(len(raw_song_data))]
    id_start = int(offset*sample_rate)
    id_end = min(len(raw_song_data), int((offset+songlen)*sample_rate))
    a = 0
    if(is_data_stereo(raw_song_data)):
        print("Converting to mono...")
        for x in range(id_start, id_end):
            song_data[x] = raw_song_data[x][0]/2 + raw_song_data[x][1]/2
            if(x % (int(len(raw_song_data)/100)) == 0):
                print(a, "/ 100")
                a += 1
    else:
        song_data = raw_song_data
    print("\nSampleRate : ", sample_rate)
    print("SegSize : ", blit)
    # calculate the frequencies associated to the FFTs
    pfreq = scp.fft.rfftfreq(blit, 1/sample_rate)
    # left boundary of segment to crop
    current_time = offset
    # list of FFTs
    fft_list = []
    # number of samples
    k = 0
    print("Retrieving freqs from", offset, "to", songlen+offset, "...")
    while(current_time < songlen+offset-segsize):
        # index corresponding to left boundary
        left_id = int(current_time*sample_rate)
        # index corresponding to right boundary
        right_id = int((current_time+segsize)*sample_rate)
        # calculate the fft, append it to fft_list
        pff = scp.fft.rfft(song_data[int(current_time*sample_rate):int(sample_rate*(current_time+segsize))])
        fft_list.append(pff)
        # just to avoid what causes 0.1 + 0.1 == 0.2 to be False
        k += 1
        current_time = offset + k*segsize
        #print(current_time)
    print("\n\nSegSize :", segsize, "\nFFT :", len(fft_list), "\nFFT[0] :", len(fft_list[0]), "\npfreq :", len(pfreq), "\n\n")
    # -------------------------------------------- Clean song -------------------------------------------- #
    pfreq_minid = 0
    pfreq_maxid = len(pfreq) -1
    while(pfreq[pfreq_minid] < minfreq):
        for t in range(len(fft_list)):
            fft_list[t][pfreq_minid] = 0+0j
        pfreq_minid += 1
    while(pfreq[pfreq_maxid] > maxfreq):
        for t in range(len(fft_list)):
            fft_list[t][pfreq_maxid] = 0+0j
        pfreq_maxid -= 1
    new_times = []
    new_freqs = []
    new_ampls = []
    new_kept = []
    # i = time, j = freq
    for i in range(len(fft_list)):
        #returns a list of couples [id, value]
        elements = heapq.nlargest(count, enumerate(fft_list[i]), key=lambda x: x[1])
        for idx in range(len(elements)):
            if(elements[idx][0] < len(pfreq)):
                new_times.append(offset + i*segsize)
                new_freqs.append(pfreq[elements[idx][0]])
                new_ampls.append(fft_list[i][elements[idx][0]])
    '''for i in range(len(new_freqs)):
        while(new_freqs[i]>1000):
            new_freqs[i] = new_freqs[i]/2'''
    # -------------------------------------------- Localize -------------------------------------------- #
    timing_points = []
    for i in range(len(new_times)):
        if(i == 0 or not is_note_within(new_freqs[i], new_freqs[i-1])):
            timing_points.append(new_times[i])
            new_kept.append(new_freqs[i])
        else:
            new_kept.append(0)
    plt.plot(new_times, new_freqs)
    plt.plot(new_times, new_kept, "ro")
    plt.grid()
    plt.show()
    # -------------------------------------------- Write -------------------------------------------- #
    i0 = 0
    timing_points.append(999999)
    write_freq = 880
    write_cur = 0
    write_id = -1
    while(write_cur <= write_freq): # shouldnt seg fault
        write_id += 1
        write_cur = pfreq[write_id]
    # remove
    # i = time, j = freq
    for i in range(len(fft_list)):
        # retrieve dominant freq
        if(segsize*i >= timing_points[i0]-offset):
            i0 += 1
            maxfreq = 0
            maxfreqid = 0
            maxamp = 0
            for j in range(len(fft_list[0])):
                if(np.abs(fft_list[i][j]) > maxamp):
                    maxamp = np.abs(fft_list[i][j])
                    maxfreq = pfreq[j]
                    maxfreqid = j
            fft_list[i][write_id] = max(maxamp*2, 32767)
            fft_list[i][write_id-1] = max(maxamp*2, 32767)
            fft_list[i][write_id+1] = max(maxamp*2, 32767)
    res = []
    print("Converting...")
    for i in range(len(fft_list)):
        ift = scp.fft.irfft(fft_list[i], n=blit)
        for k in ift:
            res.append(k)
    #print(type(res[0]))
    mx = 0
    for j in range(len(res)):
        if(res[j] > mx):
            mx = res[j]
    for i in range(len(res)):
        res[i] = np.int16(32767*res[i]/mx)
    res = np.array(res)
    wavfile.write(output_name, sample_rate, res)
 def write_result(song_name, offset, songlen, segsize, timing_pts, output_name):
    # removes unnecessary frequencies/amps from a song
    # ampthr is in [0, 1]
@ -407,7 +567,9 @@ def write_result(song_name, offset, songlen, segsize, timing_pts, output_name):
                    maxfreq = pfreq[j]
                    maxfreqid = j
-            fft_list[i][write_id] = max(maxamp*2, 10000)
+            fft_list[i][write_id] = max(maxamp*2, 32767)
            fft_list[i][write_id-1] = max(maxamp*2, 32767)
            fft_list[i][write_id+1] = max(maxamp*2, 32767)
    # writing new .wav
@ -645,27 +807,53 @@ def retrieve_all_from_song(filename, t0, t1, bpm, dta=0.001, dtf=0.01, threshold
        plt.show()
    # free()
 '''
-void_freq_clean(convert_to_wav("ctype.mp3"), 0.042, 5, 1/(149.3/60)/8, 100, 3000, 0.05, "ctype_void.mp3")
+# c-type
 localize_frequencies(convert_to_wav("ctype_void.mp3"), 0, 5, 1/(149.3/60)/12, "ctype_filtered.mp3")
 retrieve_all_from_song("ctype_filtered.mp3", 0, 5, 149.3, dta=1/(149.3/60)/128, dtf=1/(149.3/60)/8)
 '''
 #OFFSET = 0.042
 #BPM = 149.3
 SONG_LEN = 5
-OFFSET = 117.790
+OFFSET = 0.042
-BPM = 150
+BPM = 149.3
 SEGSIZE = 1/(BPM/60)
 '''
 '''
 # tetris_2
 SONG_LEN = 8
 OFFSET = 0
 BPM = 157
 SEGSIZE = 1/(BPM/60)
 '''
 '''
 # test
 SONG_LEN = 1
 OFFSET = 0
 BPM = 240
 SEGSIZE = 1/(BPM/60)
 '''
 '''
 # gmtn
 SONG_LEN = 3
 OFFSET = 1.652
 BPM = 155
 SEGSIZE = 1/(BPM/60)
 '''
 # E
 SONG_LEN = 10
 OFFSET = 2.641
 BPM = 155
 SEGSIZE = 1/(BPM/60)
-wavved_song = convert_to_wav("Galaxy Collapse.mp3")
+#wavved_song = convert_to_wav("tetris_2.wav")
 wavved_song = convert_to_wav("songs/rushe.mp3")
 keep_highest(wavved_song, OFFSET, SONG_LEN, SEGSIZE/4, 1, "Zblit.wav", minfreq=300, maxfreq=3000)
 '''
 # remove high/low frequencies (often noise)
 #void_freq_clean(wavved_song, OFFSET, SONG_LEN, SEGSIZE/8, 100, 3000, 0.05, "Zvoided_song.wav")
 # crops any part with let ring
-localize_frequencies(convert_to_wav("Zvoided_song.wav"), 0, SONG_LEN-0.1, SEGSIZE/8, "Zcleaned_song.wav")
+localize_frequencies("Zblit.wav", 0, SONG_LEN-0.1, SEGSIZE/8, "Zcleaned_song.wav")
 #localize_frequencies(wavved_song, OFFSET, SONG_LEN, SEGSIZE/8, "Zcleaned_song.wav")
 # find timings
 tp = parse_after_filter("Zcleaned_song.wav", 0, SONG_LEN-0.1, SEGSIZE/8, OFFSET)
@ -674,6 +862,7 @@ tp = parse_after_filter("Zcleaned_song.wav", 0, SONG_LEN-0.1, SEGSIZE/8, OFFSET)
 write_result(wavved_song, OFFSET, SONG_LEN-0.1, SEGSIZE/8, tp, "Zoutput_song.wav")
 #retrieve_all_from_song("Zcleaned_song.wav", 0, 5, 149.3, dtf=1/(149.3/60)/8)
 '''
 print("yipee")
--- a/old/compare_plot.py
+++ b/old/compare_plot.py
--- a/old/debug.py
+++ b/old/debug.py
--- a/old/main.py
+++ b/old/main.py
--- a/old/place.py
+++ b/old/place.py
--- a/old/posu.py
+++ b/old/posu.py
--- a/old/requirements.txt
+++ b/old/requirements.txt
--- a/Collapse.mp3
+++ b/Collapse.mp3
--- a/Knights.mp3
+++ b/Knights.mp3
--- a/songs/audio.wav
+++ b/songs/audio.wav
--- a/songs/crop.wav
+++ b/songs/crop.wav
--- a/songs/ctype.mp3
+++ b/songs/ctype.mp3
--- a/songs/furioso
+++ b/songs/furioso
--- a/songs/no.mp3
+++ b/songs/no.mp3
--- a/songs/rushe.mp3
+++ b/songs/rushe.mp3
--- a/songs/tetris_2.wav
+++ b/songs/tetris_2.wav
--- a/sound_process.py
+++ b/sound_process.py
@ -1,343 +0,0 @@
 from math import *
 import numpy as np
 from scipy.io import wavfile
 from scipy import signal
 import matplotlib.pyplot as plt
 import subprocess
 import wave as wv
 import struct
 import librosa
 import heapq
 import scipy
 import os
 import random
 from pathlib import Path
 from time import sleep
 from datetime import timedelta
 import debug
 print("Starting...\n")
 def filter_n_percent_serial(song_name, offset, n_iter, step, threshold):
    """
    song_name : string
    offset : int
    n_iter : int (number of turns)
    step : int (length of each small segment)
    threshold : int (is in ]0, 100])
    filter data associated with song_name to keep only the highest threshold% values
    """
    subprocess.run(["ffmpeg", "-ss", str(offset), "-t", str(offset+step*n_iter), "-i", song_name, "crop.wav"], shell=False) 
    sample_rate, global_data = wavfile.read('crop.wav')
    subprocess.run(["clear"], shell=False)
    subprocess.run(["rm", "crop.wav"], shell=False)
    for i in range(n_iter):
        print(i, "/", n_iter)
        #print(i * step)
        song_data = global_data[int(i*step*sample_rate):int((i+1)*step*sample_rate)]
        if(len(song_data) != 0):
            mx = max(song_data)
            is_locked = [False for i in range(len(song_data))]
            x = int((len(song_data)*threshold)//100)
            #print("X = ", x)
            #print("Retreiving the", int(x), "/", len(song_data), "highest values")
            elements = heapq.nlargest(int(x), enumerate(song_data), key=lambda x: x[1])
            #print("Done")
            for idx in range(len(elements)):
                is_locked[elements[idx][0]] = True
            for r in range(len(song_data)):
                if(is_locked[r] == False):
                    global_data[r+int(i*step*sample_rate)] = 0
    return global_data
 def write_to_file_thr(sample_rate, song_data, offset, threshold, filename):
    # write data to output file
    file = open(filename, 'w')
    file.writelines('time,amplitude\n')
    mx = max(song_data)
    print("writing to output...")
    for i in range(len(song_data)):
        if(i%(len(song_data)//50) == 0):
            print(i, "/", len(song_data))
        if(song_data[i]/mx > threshold):
            file.writelines(str(np.round(offset + i/sample_rate, 3)))
            file.writelines(',')
            file.writelines(str(np.round(song_data[i], 0)))
            file.writelines('\n')
 def round_t(id, sample_rate, bpm, div, offset, k0):
    k = k0
    t = offset + k/(bpm*div)
    while(t < id/sample_rate):
        t = offset + k/(bpm*div)
        k += 1
    if(np.abs(t - id/sample_rate) < np.abs((t - 1/(bpm*div)) - id/sample_rate)):
        return t
    return (t - 1/(bpm*div), 0)
 def compress(Zxx):
    res = []
 def get_freq(song_name, times, width=1000, display=False):
    """
    for a given list of times (in seconds), returns the corresponding peak frequencies
    """
    subprocess.run(["ffmpeg", "-ss", str(0), "-t", str(max(np.array(times))), "-i", song_name, "crop.wav"], shell=False) 
    sample_rate, global_data = wavfile.read(song_name)
    #blit = int(sample_rate*step)
    subprocess.run(["clear"], shell=False)
    subprocess.run(["rm", "crop.wav"], shell=False)
    pfreq = scipy.fft.rfftfreq(2*width, 1/sample_rate)
    frequencies = [0 for s in range(len(times))]
    print(len(pfreq))
    for s in range(len(times)):
        left = max(0, int(times[s]*44100)-width)
        right = min(len(global_data), int(times[s]*44100)+width)
        pff = scipy.fft.rfft(global_data[left:right])
        #print(len(pff), len(pfreq))
        mx = max(np.abs(pff))
        for id in range(len(pff)):
            if frequencies[s] == 0 and np.abs(pff[id]) == mx:
                frequencies[s] = pfreq[id]
    if(display):
        plt.plot(times, frequencies)
        plt.grid()
        plt.xlabel("Time (s)")
        plt.ylabel("Dominant frequency (Hz)")
        plt.title("Dominant frequencies at peaks")
        plt.show()
    return frequencies
 def is_data_stereo(raw_global_data:list) -> bool:
    """
    raw_global_data : list
    """
    try:
        assert(raw_global_data[0][0])
    except IndexError:
        return False
    except AssertionError:
        return True
    return True
 def void_freq(song_name, offset, songlen, increment, minfreq, maxfreq, upperthr, ampthr, ampfreq, ampval, leniency, write, linear, output_file="trimmed.wav"):
    """
    song_name : string
    offset : int 
    songlen : int (length of the part that will be filtered, starting from offset)
    increment : float (technical parameter)
    minfreq and maxfreq : every frequency in [minfreq, maxfreq] will be voided
    upperthr : every frequency above upperthr will be voided
    ampthr : every frequency with amplitude under MAX/ampthr (aka amplitudes under (100/ampthr)% of the max will be voided 
    ampfreq, leniency (if linear is false), linear : technical parameters
    ampval : int 
        - if linear is false, then this willbe the maximum amplification possible
        - if linear is true, this is the multiplier (Amp <- Amp * (ampval * frequency + leniency))
    write : bool (should be set to True)
    output_file : technical
    """
    fft_list = []
    times = []
    current_time = offset
    k = 0
    subprocess.run(["ffmpeg", "-ss", str(offset), "-t", str(songlen+offset), "-i", song_name, "crop.wav"], shell=False) 
    sample_rate, raw_global_data = wavfile.read("crop.wav")
    blit = int(sample_rate*increment)
    global_data = [0 for i in range(len(raw_global_data))]
    #subprocess.run(["clear"])
    subprocess.run(["rm", "crop.wav"], shell=False)
    a = 0
    if(is_data_stereo(raw_global_data)):
        print("Converting to mono...")
        for x in range(len(raw_global_data)):
            global_data[x] = raw_global_data[x][0]/2 + raw_global_data[x][1]/2
            if(x % (int(len(raw_global_data)/100)) == 0):
                print(a, "/ 100")
                a += 1
    else:
        global_data = raw_global_data
    #print("Blit :", blit)
    pfreq = scipy.fft.rfftfreq(blit, 1/sample_rate)
    #print(len(pfreq))
    while(current_time <= songlen):
        pff = scipy.fft.rfft(global_data[k*blit:(k+1)*blit])
        fft_list.append(pff)
        times.append(k*increment)
        k += 1
        current_time = offset + k*increment
    print("FFT :", len(fft_list), "\nFFT[0] :", len(fft_list[0]), "\npfreq :", len(pfreq))
    print("Finding global max...")
    if(linear == False):
        for i in range(len(fft_list)):
            for j in range(len(fft_list[i])):
                fft_list[i][j] *= (1 + ampval/max(1, np.abs(pfreq[j] - ampfreq)))  
    else:
        for i in range(len(fft_list)):
            for j in range(len(fft_list[i])):
                fft_list[i][j] *= (ampval*pfreq[j] + leniency)
    print("Trimming...")
    for i in range(len(fft_list)):
        lmax = 0
        for j in range(len(fft_list[i])):
            if(np.abs(fft_list[i][j]) > lmax):
                lmax = np.abs(fft_list[i][j])
        for j in range(len(fft_list[i])): 
            if((pfreq[j] >= minfreq and pfreq[j] < maxfreq) or pfreq[j] > upperthr):
                fft_list[i][j] = 0+0j
            if(np.abs(fft_list[i][j]) < lmax/ampthr):
                fft_list[i][j] = 0+0j
    if(write):
        res = []
        print("Converting...")
        for i in range(len(fft_list)):
            ift = scipy.fft.irfft(fft_list[i], n=blit)
            for k in ift:
                res.append(k)
        #print(type(res[0]))
        mx = 0
        for j in range(len(res)):
            if(res[j] > mx):
                mx = res[j]
        for i in range(len(res)):
            res[i] = np.int16(32767*res[i]/mx)
        res = np.array(res)
        wavfile.write(output_file, 44100, res)
    #plt.plot(np.abs(pfreq[:len(fft_list[0])]), np.abs(fft_list[0]))
    #plt.grid()
    #plt.show()
    print("Done")
 def convert_tuple(data, times):
    """
    Takes data and converts it to a list of tuples (amplitude, datetimes)
    """
    return [(times[i], data[i]) for i in range(len(data))]
 def get_songlen(filename):
    """
    retrieves the length of the song in seconds 
    """
    sample_rate, global_data = wavfile.read(filename)
    print("LEN :", len(global_data)/sample_rate)
    return (len(global_data)/sample_rate)
 def convert_to_wav(song_name:str, output_file="audio.wav") -> str:
    """
    Converts the song to .wav, only if it's not already in wave format.
    Currently relies on file extension.
    Returns: the song_name that should be used afterwards.
    """
    extension = Path(song_name).suffix
    match extension:
        case ".mp3" | ".ogg":
            print("Converting to .wav...")
            subprocess.run(["ffmpeg", "-y", "-i", song_name, output_file], shell=False) 
            return output_file
    return song_name
 def process_song(filename, bpm, offset0=0, div_len_factor=1, n_iter_2=-1, threshold=0.5, divisor=4):
    """
    filename : string (name of the song)
    offset : int [+] (song mapping will start from this time in seconds, default is 0)
    bpm : int [+]
    div_len_factor : float [+] (the length multiplier of each segment, default is 1)
    n_iter : int [+*] (the number of iterations, default is -1 (maps the whole music))
    threshold : int [0, 100] (used by the filter function to only keep the largest threshold% of timing points, default is 0.5)
    divisor : int [+] (beat divisor used to snap the notes, default is 4)
    """
    filename = convert_to_wav(filename)
    offset = offset0/1000
    div_len = div_len_factor*60/bpm-0.01
    n_iter = n_iter_2
    song_len = get_songlen(filename)
    if(n_iter == -1):
        n_iter = int((song_len-offset/1000)/div_len)-1
    filtered_name = f"{filename}_trimmed.wav"
    void_freq(filename, offset, min(song_len, offset+div_len*(n_iter+1)+0.01), 4*60/bpm, minfreq=0, maxfreq=220, upperthr=5000, ampthr=60, ampfreq = 1200, ampval = 5.0, leniency = 0.005, write=True, linear=False, output_file=filtered_name)
    datares = filter_n_percent_serial(filtered_name, offset, n_iter, div_len, threshold) 
    #snapped_data = amplitude
    #times in ms
    (snapped_data, times) = debug.snap3(datares, mintime=50, initial_plot=True, after_plot=True)
    #frequencies=get_freq(filtered_name, offset, div_len, div_len*n_iter, snapped_data, True)
    frequencies = get_freq(filtered_name, times, display=True)
    Path(f"{filename}_trimmed.wav").unlink()
    return snapped_data, times, frequencies
    '''
    datares = debug.snap2(datares, 44100, bpm, first_offset=offset, div=divisor, show=True, adjust=True)
    frequencies = get_freq(filtered_name, offset, div_len, div_len*n_iter, datares, True)
    Path(f"{filename}_trimmed.wav").unlink()
    return convert_tuple(datares, frequencies)
    '''
 def main():
    aa, bb, cc = process_song("tetris_4.wav", 160, n_iter_2=48)
    #print(data)
    print("Program finished with return 0")
 if __name__ == "__main__":
    main()