osutipe/cleaned_sp.py

from math import *
import numpy as np
import scipy as scp
from scipy.io import wavfile
import matplotlib.pyplot as plt
import subprocess
import heapq
from pathlib import Path
from time import sleep
import datetime

def is_data_stereo(raw_global_data:list) -> bool:
    """
    self-explainatory
    """
    try:
        assert(raw_global_data[0][0])
    except IndexError:
        return False
    except AssertionError:
        return True
    return True

def dist_to_integer(x):
    ent = np.floor(x)
    if(ent < 0.5):
        return ent
    else:
        return (1-ent)

def is_note_within(fr1, fr2):
    if(fr1 > fr2):
        return (fr1/fr2 <= NOTE_DIST or dist_to_integer(fr1/fr2) >= OCTAVE_DIST) # same tone or octave
    else:
        return (fr2/fr1 <= NOTE_DIST or dist_to_integer(fr2/fr1) >= OCTAVE_DIST)

def keep_highest(song_name, offset, songlen, segsize, count, output_name, minfreq=110, maxfreq=5000, ampthr=250, canPlot=True, writeO = True):
    '''
    INPUT : data relative to music + config about the analysis
    OUTPUT :
        * a list of timings : it contains floats (representing circles) and couple of floats (representing sliders) (e.g. [float, float])
        * a list of amplitudes relative to timings
    '''
    # extracting data from cropped song
    sample_rate, raw_song_data = wavfile.read(song_name)
    blit = int(sample_rate*segsize) # Te

    song_data = [0 for i in range(len(raw_song_data))]

    id_start = int(offset*sample_rate)
    id_end = min(len(raw_song_data), int((offset+songlen)*sample_rate))

    a = 0
    if(is_data_stereo(raw_song_data)):
        print("Converting to mono...")
        for x in range(id_start, id_end):
            song_data[x] = raw_song_data[x][0]/2 + raw_song_data[x][1]/2

            if(x % (int(len(raw_song_data)/100)) == 0):
                print(a, "/ 100")
                a += 1
    else:
        song_data = raw_song_data

    print("\nSampleRate : ", sample_rate)
    print("SegSize : ", blit)

    # calculate the frequencies associated to the FFTs
    pfreq = scp.fft.rfftfreq(blit, 1/sample_rate)

    # left boundary of segment to crop
    current_time = offset

    # list of FFTs
    fft_list = []
    fft_list_untouched = []

    # number of samples
    k = 0

    print("Retrieving freqs from", offset, "to", songlen+offset, "...")
    while(current_time < songlen+offset-segsize):
        # index corresponding to left boundary
        left_id = int(current_time*sample_rate)

        # index corresponding to right boundary
        right_id = int((current_time+segsize)*sample_rate)

        # calculate the fft, append it to fft_list
        pff = scp.fft.rfft(song_data[int(current_time*sample_rate):int(sample_rate*(current_time+segsize))])
        fft_list.append(pff)
        fft_list_untouched.append([ee for ee in pff])

        # just to avoid what causes 0.1 + 0.1 == 0.2 to be False
        k += 1
        current_time = offset + k*segsize
        #print(current_time)

    print("\n\nSegSize :", segsize, "\nFFT :", len(fft_list), "\nFFT[0] :", len(fft_list[0]), "\npfreq :", len(pfreq), "\n\n")

    # -------------------------------------------- Clean song -------------------------------------------- #
    pfreq_minid = 0
    pfreq_maxid = len(pfreq) -1
    while(pfreq[pfreq_minid] < minfreq):
        for t in range(len(fft_list)):
            fft_list[t][pfreq_minid] = 0+0j
        pfreq_minid += 1

    while(pfreq[pfreq_maxid] > maxfreq):
        for t in range(len(fft_list)):
            fft_list[t][pfreq_maxid] = 0+0j
        pfreq_maxid -= 1

    new_times = []
    new_freqs = []
    new_ampls = []
    new_kept = []

    # i = time, j = freq
    for i in range(len(fft_list)):
        #returns a list of couples [id, value]
        elements = heapq.nlargest(count, enumerate(fft_list[i]), key=lambda x: x[1])

        for idx in range(len(elements)):
            if(elements[idx][0] < len(pfreq)):
                new_times.append(offset + i*segsize)
                new_freqs.append(pfreq[elements[idx][0]])
                new_ampls.append(fft_list[i][elements[idx][0]])

    # -------------------------------------------- Get amp distribution -------------------------------------------- #
    new_new_amps = [0 for i in range(int(sample_rate*songlen))]
    new_new_t = [offset + i/sample_rate for i in range(int(sample_rate*songlen))]

    amp_ct = 0
    incr_a = segsize*4
    len_seg_a = int(sample_rate*incr_a)
    count_a = len_seg_a//1000
    left_0 = int(sample_rate*(amp_ct+offset))
    while(amp_ct < songlen-segsize):
        left = int(sample_rate*(amp_ct+offset))
        right = int(sample_rate*(amp_ct+offset + incr_a))

        #returns a list of couples [id, value]
        elements = heapq.nlargest(count_a, enumerate([song_data[i] for i in range(left, right)]), key=lambda x: x[1])

        amp_ct += incr_a

        for idx in range(len(elements)):
            try:
                new_new_amps[elements[idx][0]+left-left_0] = song_data[left+elements[idx][0]]
            except:
                pass

    mmxx = max(new_new_amps)
    new_new_amps = [nnw*1000/mmxx for nnw in new_new_amps]

    # localize peaks
    left_id = 0
    right_id = 0
    a_ampl = 0
    in_seg = False
    time_d = 0.035
    cur_t = 0

    last_t = -10.0

    locs = []   # amplitudes
    loct = []   # times
    for i in range(len(new_new_amps)):
        if(new_new_amps[i] > 100):
            if(not in_seg):
                in_seg = True
                left_id = i
            right_id = i
            a_ampl = max(a_ampl, new_new_amps[i])
            cur_t = 0
        else:
            cur_t += 1/sample_rate
            if(in_seg and cur_t >= time_d):
                in_seg = False
                delta_t = (right_id - left_id)/sample_rate
                if(np.abs(left_id/sample_rate - last_t) >= 0.01):            # these notes are less than 10ms apart !
                    last_t = right_id/sample_rate
                    if(delta_t < segsize*1.1):
                        locs.append(a_ampl)
                        loct.append((left_id + right_id)/(2*sample_rate) + offset)
                    else:
                        locs.append(a_ampl)
                        loct.append([left_id/sample_rate + offset, right_id/sample_rate + offset])

                a_ampl = 0

    # -------------------------------------------- Compute freqs -------------------------------------------- #

    ssize_0 = segsize/3
    locf = [] # frequencies
    for k in range(len(locs)):
        ktime = 0
        ssize = ssize_0
        if(type(loct[k]) == float):             # circle
            ktime = loct[k]
        else:                                   # slider
            ktime = (loct[k][1]+loct[k][0])/2
            ssize = max((loct[k][1]-loct[k][0])/2, ssize_0)

        left_id = max(0, int((ktime-ssize/2)*sample_rate))

        right_id = min(int((ktime+ssize/2)*sample_rate), len(song_data))

        # calculate the fft
        pff = scp.fft.rfft(song_data[left_id:right_id])

        fmax = pfreq[0]
        fampmax = 0
        for i in range(1, len(pff)):
            if(pfreq[i] > minfreq and pfreq[i] < maxfreq and fampmax < np.abs(pff[i])):
                fmax = pfreq[i]
                fampmax = np.abs(pff[i])

        locf.append(fmax)

    # -------------------------------------------- Merge -------------------------------------------- #

    k = 0
    while(k < len(locs)):
        delta_t = 0
        if(type(loct[k]) == float):
            delta_t += loct[k]
        else:
            delta_t += (loct[k][0] + loct[k][1])/2

        if(type(loct[k-1]) == float):
            delta_t -= loct[k-1]
        else:
            delta_t -= (loct[k-1][0] + loct[k-1][1])/2
        if(k > 0 and np.abs(delta_t) < segsize and np.abs(locs[k] - locs[k-1]) < 50 and is_note_within(locf[k], locf[k-1])):
            loct[k-1] = [loct[k-1], loct[k]]
            locs[k-1] = (locs[k-1] + locs[k])/2
            loct[k] = -1
            locs[k] = -1
            locf[k] = -1
            loct.remove(-1)
            locs.remove(-1)
            locf.remove(-1)
        k += 1


    # -------------------------------------------- Plot -------------------------------------------- #

    if(canPlot):
        plt_loct_all = []
        plt_loct = []
        plt_locs = []
        plt_slidt = []
        plt_slids = []
        for i in range(len(loct)):
            if(type(loct[i]) == float):
                plt_loct_all.append(loct[i])
                plt_loct.append(loct[i])
                plt_locs.append(locs[i])
            else:
                plt_loct_all.append(loct[i][0])
                plt_slidt.append(loct[i][0])
                plt_slidt.append(loct[i][1])
                plt_slids.append(locs[i])
                plt_slids.append(locs[i])

        plt.plot(new_new_t, new_new_amps, "y-", label="amplitude (ua)")
        plt.plot(plt_loct, plt_locs, "ro", label="circles")
        plt.plot(plt_slidt, plt_slids, "go", label="sliders")
        plt.plot(plt_loct_all, locf, "mo", label="frequencies (Hz)")
        plt.legend(loc="upper left")

        '''plt.plot(new_times, new_freqs)
        plt.plot(new_times, [elt*1000/mx for elt in new_ampls])
        plt.plot(new_times, new_kept, "bo")'''
        plt.grid()
        plt.show()

    # -------------------------------------------- Write -------------------------------------------- #

    if(writeO):
        f = open("result_bad_apple[90].txt", "w")
        f.write("Song name : " + song_name + "\n")
        f.write("Start : " + str(offset) + "\n")
        f.write("End   : " + str(offset+songlen) + "\n\n")

        f.write("Hit Objects : \n")
        for ct in loct:
            f.write(str(ct))
            f.write("\n")

        f.close()

    return (loct, locs)

def convert_to_wav(song_name:str, output_file="audio.wav") -> str:
    """
    Converts the song to .wav, only if it's not already in wave format.
    Currently relies on file extension.
    Returns: the song_name that should be used afterwards.
    """
    extension = Path(song_name).suffix
    if(extension == ".mp3" or extension == ".ogg"):
        print("Converting to .wav...")
        subprocess.run(["ffmpeg", "-y", "-i", song_name, output_file], shell=False)
        return output_file
    return song_name

'''
# c-type
SONG_LEN = 7
OFFSET = 0.042
BPM = 149.3
SEGSIZE = 1/(BPM/60)
wavved_song = convert_to_wav("songs/ctype.mp3")
'''
'''
# tetris_2
SONG_LEN = 14
OFFSET = 0
BPM = 157
SEGSIZE = 1/(BPM/60)
wavved_song = convert_to_wav("songs/tetris_2.wav")
'''
'''
# test
SONG_LEN = 1
OFFSET = 0
BPM = 240
SEGSIZE = 1/(BPM/60)
'''
'''
# gmtn
SONG_LEN = 5
OFFSET = 1.652
BPM = 155
SEGSIZE = 1/(BPM/60)
wavved_song = convert_to_wav("songs/furioso melodia.mp3")
'''
'''
# E
SONG_LEN = 15
OFFSET = 2.641
BPM = 155
SEGSIZE = 1/(BPM/60)
wavved_song = convert_to_wav("songs/rushe.mp3")
'''
'''
# Tsubaki
SONG_LEN = 20
OFFSET = 35.659
BPM = 199
SEGSIZE = 1/(BPM/60)
wavved_song = convert_to_wav("songs/TSUBAKI.mp3")
'''
'''
# Owen 1/2
SONG_LEN = 20
OFFSET = 1.008
BPM = 157
SEGSIZE = 1/(BPM/60)
wavved_song = convert_to_wav("songs/owen(157.00024-1008).mp3")
'''
'''
# Owen 2/2
SONG_LEN = 7
OFFSET = 25.466
BPM = 157
SEGSIZE = 1/(BPM/60)
wavved_song = convert_to_wav("songs/owen(157.00024-1008).mp3")
'''

# death
SONG_LEN = 10
OFFSET = 21.750
BPM = 180
SEGSIZE = 1/(BPM/60)
wavved_song = convert_to_wav("songs/Night of Knights.mp3")

'''
# Bad apple
SONG_LEN = 15
OFFSET = 0.152
BPM = 138
SEGSIZE = 1/(BPM/60)
#wavved_song = convert_to_wav("songs/Bad apple (138-152).mp3")
wavved_song = convert_to_wav("songs/Bad apple (138-152)[filtered].wav")
'''
'''
# Freedom dive
SONG_LEN = 7
OFFSET = 1.058
BPM = 222.22
SEGSIZE = 1/(BPM/60)
#wavved_song = convert_to_wav("songs/Freedom Dive (222.22-1058).mp3")
wavved_song = convert_to_wav("songs/Freedom Dive (222.22-1058)[filtered].wav")
'''
'''
# Mevalogania
SONG_LEN = 7
OFFSET = 7.984
BPM = 240
SEGSIZE = 1/(BPM/60)
#wavved_song = convresult_bad_appleert_to_wav("songs/Megalovania(240-7984).mp3")
wavved_song = convert_to_wav("songs/Megalovania(240-7984)[filtered].wav")
'''
'''
SONG_LEN = 0 # length of the song, in seconds
OFFSET = 0 # offset of the 1st note (aka time offset of the first red bar), in seconds
BPM = 0 # BPM
wavved_song = convert_to_wav("insert_song_name_here.wav")
'''
# Do not touch
DIVIDER = 4 # note divider
SEGSIZE = 1/(BPM/60)
NOTE_DIST = (2**(1/4))
OCTAVE_DIST = 0.05

# keep_highest(song_name, offset, songlen, segsize, count, output_name, minfreq=110, maxfreq=5000, ampthr=250):
(loct, locs) = keep_highest(wavved_song, OFFSET, SONG_LEN, SEGSIZE/DIVIDER, 1, "Zblit.wav", minfreq=220, maxfreq=3000, ampthr=800)

'''
minfreq and maxfred are thresholds for frequency analysts (anything outside of [minfreq, maxfreq] will not be accounted for)
ampthr is a threshold for amplitude (arbitrary unit)
'''

''' you can deactivate this if you want (show timings points in terminal) '''
'''
import time
import random
loct2 = []

for k in loct:
    if(type(k) == float):
        loct2.append(k)
    else:
        loct2.append(k[0])
        loct2.append(k[1])

for i in range(len(loct2)-1):
    print("*"*(random.randint(10, 100)))
    time.sleep(loct2[i+1]-loct2[i])

print("yipee")
'''
# complexity test
fl = open("complexity.txt", "w")

# f.write("Song name : " + song_name + "\n")
'''
deltat = []
compl = []
for end in range(2,120):
    st = datetime.datetime.now()
    (e, ee) = keep_highest(wavved_song, OFFSET, OFFSET+end/2, SEGSIZE/DIVIDER, 1, "Zblit.wav", minfreq=220, maxfreq=3000, ampthr=800, canPlot=False,writeO=False)
    et = datetime.datetime.now()
    dt = et.microsecond - st.microsecond + (et.second - st.second)*1000000 + (et.minute - st.minute)/60
    if(dt>0):
        deltat.append(end/2)
        compl.append(dt)

plt.plot(deltat, compl, "y-")
plt.plot(deltat, compl, "ro")
plt.xlabel("size of the song")
plt.ylabel("time complexity (us)")

plt.grid()
plt.show()

fl.close()
'''