stats changes

This commit is contained in:
Alexandre 2024-12-12 21:43:22 +01:00
parent 421cddf267
commit 7615d41c01
4 changed files with 311 additions and 260 deletions

BIN
Zblit.wav

Binary file not shown.

BIN
audio.wav

Binary file not shown.

View File

@ -20,6 +20,317 @@ def is_data_stereo(raw_global_data:list) -> bool:
return True
return True
def is_note_within(fr1, fr2):
if(fr1 > fr2):
return (fr1/fr2 <= NOTE_DIST)
else:
return (fr2/fr1 <= NOTE_DIST)
def keep_highest(song_name, offset, songlen, segsize, count, output_name, minfreq=110, maxfreq=5000, ampthr=250):
# extracting data from cropped song
sample_rate, raw_song_data = wavfile.read(song_name)
blit = int(sample_rate*segsize) # Te
song_data = [0 for i in range(len(raw_song_data))]
id_start = int(offset*sample_rate)
id_end = min(len(raw_song_data), int((offset+songlen)*sample_rate))
a = 0
if(is_data_stereo(raw_song_data)):
print("Converting to mono...")
for x in range(id_start, id_end):
song_data[x] = raw_song_data[x][0]/2 + raw_song_data[x][1]/2
if(x % (int(len(raw_song_data)/100)) == 0):
print(a, "/ 100")
a += 1
else:
song_data = raw_song_data
print("\nSampleRate : ", sample_rate)
print("SegSize : ", blit)
# calculate the frequencies associated to the FFTs
pfreq = scp.fft.rfftfreq(blit, 1/sample_rate)
# left boundary of segment to crop
current_time = offset
# list of FFTs
fft_list = []
fft_list_untouched = []
# number of samples
k = 0
print("Retrieving freqs from", offset, "to", songlen+offset, "...")
while(current_time < songlen+offset-segsize):
# index corresponding to left boundary
left_id = int(current_time*sample_rate)
# index corresponding to right boundary
right_id = int((current_time+segsize)*sample_rate)
# calculate the fft, append it to fft_list
pff = scp.fft.rfft(song_data[int(current_time*sample_rate):int(sample_rate*(current_time+segsize))])
fft_list.append(pff)
fft_list_untouched.append([ee for ee in pff])
# just to avoid what causes 0.1 + 0.1 == 0.2 to be False
k += 1
current_time = offset + k*segsize
#print(current_time)
print("\n\nSegSize :", segsize, "\nFFT :", len(fft_list), "\nFFT[0] :", len(fft_list[0]), "\npfreq :", len(pfreq), "\n\n")
# -------------------------------------------- Clean song -------------------------------------------- #
pfreq_minid = 0
pfreq_maxid = len(pfreq) -1
while(pfreq[pfreq_minid] < minfreq):
for t in range(len(fft_list)):
fft_list[t][pfreq_minid] = 0+0j
pfreq_minid += 1
while(pfreq[pfreq_maxid] > maxfreq):
for t in range(len(fft_list)):
fft_list[t][pfreq_maxid] = 0+0j
pfreq_maxid -= 1
new_times = []
new_freqs = []
new_ampls = []
new_kept = []
# i = time, j = freq
for i in range(len(fft_list)):
#returns a list of couples [id, value]
elements = heapq.nlargest(count, enumerate(fft_list[i]), key=lambda x: x[1])
for idx in range(len(elements)):
if(elements[idx][0] < len(pfreq)):
new_times.append(offset + i*segsize)
new_freqs.append(pfreq[elements[idx][0]])
new_ampls.append(fft_list[i][elements[idx][0]])
'''for i in range(len(new_freqs)):
while(new_freqs[i]>1000):
new_freqs[i] = new_freqs[i]/2'''
# -------------------------------------------- Localize -------------------------------------------- #
timing_points = []
last_freq = new_freqs[0]
for i in range(len(new_times)):
if(np.abs(new_ampls[i]) > ampthr and (i == 0 or not is_note_within(new_freqs[i], last_freq))):
last_freq = new_freqs[i]
timing_points.append(new_times[i])
new_kept.append(new_freqs[i])
else:
new_kept.append(0)
mx = max(new_ampls)
plt.plot(new_times, new_freqs)
plt.plot(new_times, [elt*1000/mx for elt in new_ampls])
plt.plot(new_times, new_kept, "ro")
plt.grid()
plt.show()
# -------------------------------------------- Write -------------------------------------------- #
i0 = 0
timing_points.append(999999)
write_freq = 880
write_cur = 0
write_id = -1
while(write_cur <= write_freq): # shouldnt seg fault
write_id += 1
write_cur = pfreq[write_id]
# remove
# i = time, j = freq
for i in range(len(fft_list)):
# retrieve dominant freq
if(segsize*i >= timing_points[i0]-offset):
i0 += 1
maxfreq = 0
maxfreqid = 0
maxamp = 0
for j in range(len(fft_list[0])):
if(np.abs(fft_list[i][j]) > maxamp):
maxamp = np.abs(fft_list[i][j])
maxfreq = pfreq[j]
maxfreqid = j
fft_list_untouched[i][write_id] = max(maxamp*2, 32767)
fft_list_untouched[i][write_id-1] = max(maxamp*2, 32767)
fft_list_untouched[i][write_id+1] = max(maxamp*2, 32767)
res = []
print("Converting...")
for i in range(len(fft_list_untouched)):
ift = scp.fft.irfft(fft_list_untouched[i], n=blit)
for k in ift:
res.append(k)
#print(type(res[0]))
mx = 0
for j in range(len(res)):
if(res[j] > mx):
mx = res[j]
for i in range(len(res)):
res[i] = np.int16(32767*res[i]/mx)
res = np.array(res)
wavfile.write(output_name, sample_rate, res)
def convert_to_wav(song_name:str, output_file="audio.wav") -> str:
"""
Converts the song to .wav, only if it's not already in wave format.
Currently relies on file extension.
Returns: the song_name that should be used afterwards.
"""
extension = Path(song_name).suffix
match extension:
case ".mp3" | ".ogg":
print("Converting to .wav...")
subprocess.run(["ffmpeg", "-y", "-i", song_name, output_file], shell=False)
return output_file
return song_name
def retrieve_all_from_song(filename, t0, t1, bpm, dta=0.001, dtf=0.01, threshold=0.06, show=True):
# dt = sample interval
# threshold is in percent
if(t1 <= t0):
print("ERROR : t1 <= t0\n")
exit(1)
# converts format to .wav
new_fn = convert_to_wav(filename)
print("Filtering song...")
#void_freq_clean(new_fn, t0, t1, dtf, 20, 20000, 0.05, "crop1.wav")
#def void_freq_clean(song_name, offset, songlen, segsize, minfreq, maxfreq, ampthr, output_name):
print("Now retrieving the frequencies")
(maxlist, maxamps) = retrieve_dominant_freqs(new_fn, t0, t1, dtf)
#def retrieve_dominant_freqs(song_name, offset, songlen, segsize):
print("Now retrieving the amplitudes")
amps = retrieve_dominant_amps(new_fn, t0, t1, dta, threshold, (4/(bpm/60))/4)
print("Len of freqs : ", len(maxlist), "|", len(maxamps))
print("Len of amps : ", len(maxlist), "|", len(amps))
maxa = amps[0]
for jj in amps:
if(jj > maxa):
maxa = jj
for i in range(len(amps)):
amps[i] = (amps[i] * 2000) / maxa
if(show):
timesF = [t0 + dtf*k for k in range(len(maxlist))]
timesA = [t0 + dta*k for k in range(len(amps))]
plt.plot(timesA, amps)
plt.plot(timesF, maxlist)
plt.show()
# free()
'''
# c-type
SONG_LEN = 8
OFFSET = 0.042
BPM = 149.3
SEGSIZE = 1/(BPM/60)
wavved_song = convert_to_wav("songs/ctype.mp3")
'''
'''
# tetris_2
SONG_LEN = 8
OFFSET = 0
BPM = 157
SEGSIZE = 1/(BPM/60)
'''
'''
# test
SONG_LEN = 1
OFFSET = 0
BPM = 240
SEGSIZE = 1/(BPM/60)
'''
# gmtn
SONG_LEN = 3
OFFSET = 1.652
BPM = 155
SEGSIZE = 1/(BPM/60)
wavved_song = convert_to_wav("songs/furioso melodia.mp3")
'''
# E
SONG_LEN = 10
OFFSET = 2.641
BPM = 155
SEGSIZE = 1/(BPM/60)
wavved_song = convert_to_wav("songs/rushe.mp3")
'''
'''
# Tsubaki
SONG_LEN = 10
OFFSET = 35.659
BPM = 199
SEGSIZE = 1/(BPM/60)
wavved_song = convert_to_wav("songs/TSUBAKI.mp3")
'''
'''
# death
SONG_LEN = 8
OFFSET = 21.750
BPM = 180
SEGSIZE = 1/(BPM/60)
wavved_song = convert_to_wav("songs/Night of Knights.mp3")
'''
#wavved_song = convert_to_wav("tetris_2.wav")
NOTE_DIST = (2**(1/4))
keep_highest(wavved_song, OFFSET, SONG_LEN, SEGSIZE/4, 1, "Zblit.wav", minfreq=300, maxfreq=3000, ampthr=500)
print("yipee")
'''
def retrieve_dominant_freqs(song_name, offset, songlen, segsize):
# returns a list with peak frequencies alongside the sample rate
# /!\ song_name is specified to be a list, NOT a list of couples (aka song is mono)
@ -324,166 +635,6 @@ def localize_frequencies(song_name, offset, songlen, segsize, output_name):
res = np.array(res)
wavfile.write(output_name, sample_rate, res)
NOTE_DIST = (2**(1/12))
def is_note_within(fr1, fr2):
if(fr1 > fr2):
return (fr1/fr2 <= NOTE_DIST)
else:
return (fr2/fr1 <= NOTE_DIST)
def keep_highest(song_name, offset, songlen, segsize, count, output_name, minfreq=110, maxfreq=5000):
# extracting data from cropped song
sample_rate, raw_song_data = wavfile.read(song_name)
blit = int(sample_rate*segsize) # Te
song_data = [0 for i in range(len(raw_song_data))]
id_start = int(offset*sample_rate)
id_end = min(len(raw_song_data), int((offset+songlen)*sample_rate))
a = 0
if(is_data_stereo(raw_song_data)):
print("Converting to mono...")
for x in range(id_start, id_end):
song_data[x] = raw_song_data[x][0]/2 + raw_song_data[x][1]/2
if(x % (int(len(raw_song_data)/100)) == 0):
print(a, "/ 100")
a += 1
else:
song_data = raw_song_data
print("\nSampleRate : ", sample_rate)
print("SegSize : ", blit)
# calculate the frequencies associated to the FFTs
pfreq = scp.fft.rfftfreq(blit, 1/sample_rate)
# left boundary of segment to crop
current_time = offset
# list of FFTs
fft_list = []
# number of samples
k = 0
print("Retrieving freqs from", offset, "to", songlen+offset, "...")
while(current_time < songlen+offset-segsize):
# index corresponding to left boundary
left_id = int(current_time*sample_rate)
# index corresponding to right boundary
right_id = int((current_time+segsize)*sample_rate)
# calculate the fft, append it to fft_list
pff = scp.fft.rfft(song_data[int(current_time*sample_rate):int(sample_rate*(current_time+segsize))])
fft_list.append(pff)
# just to avoid what causes 0.1 + 0.1 == 0.2 to be False
k += 1
current_time = offset + k*segsize
#print(current_time)
print("\n\nSegSize :", segsize, "\nFFT :", len(fft_list), "\nFFT[0] :", len(fft_list[0]), "\npfreq :", len(pfreq), "\n\n")
# -------------------------------------------- Clean song -------------------------------------------- #
pfreq_minid = 0
pfreq_maxid = len(pfreq) -1
while(pfreq[pfreq_minid] < minfreq):
for t in range(len(fft_list)):
fft_list[t][pfreq_minid] = 0+0j
pfreq_minid += 1
while(pfreq[pfreq_maxid] > maxfreq):
for t in range(len(fft_list)):
fft_list[t][pfreq_maxid] = 0+0j
pfreq_maxid -= 1
new_times = []
new_freqs = []
new_ampls = []
new_kept = []
# i = time, j = freq
for i in range(len(fft_list)):
#returns a list of couples [id, value]
elements = heapq.nlargest(count, enumerate(fft_list[i]), key=lambda x: x[1])
for idx in range(len(elements)):
if(elements[idx][0] < len(pfreq)):
new_times.append(offset + i*segsize)
new_freqs.append(pfreq[elements[idx][0]])
new_ampls.append(fft_list[i][elements[idx][0]])
'''for i in range(len(new_freqs)):
while(new_freqs[i]>1000):
new_freqs[i] = new_freqs[i]/2'''
# -------------------------------------------- Localize -------------------------------------------- #
timing_points = []
for i in range(len(new_times)):
if(i == 0 or not is_note_within(new_freqs[i], new_freqs[i-1])):
timing_points.append(new_times[i])
new_kept.append(new_freqs[i])
else:
new_kept.append(0)
plt.plot(new_times, new_freqs)
plt.plot(new_times, new_kept, "ro")
plt.grid()
plt.show()
# -------------------------------------------- Write -------------------------------------------- #
i0 = 0
timing_points.append(999999)
write_freq = 880
write_cur = 0
write_id = -1
while(write_cur <= write_freq): # shouldnt seg fault
write_id += 1
write_cur = pfreq[write_id]
# remove
# i = time, j = freq
for i in range(len(fft_list)):
# retrieve dominant freq
if(segsize*i >= timing_points[i0]-offset):
i0 += 1
maxfreq = 0
maxfreqid = 0
maxamp = 0
for j in range(len(fft_list[0])):
if(np.abs(fft_list[i][j]) > maxamp):
maxamp = np.abs(fft_list[i][j])
maxfreq = pfreq[j]
maxfreqid = j
fft_list[i][write_id] = max(maxamp*2, 32767)
fft_list[i][write_id-1] = max(maxamp*2, 32767)
fft_list[i][write_id+1] = max(maxamp*2, 32767)
res = []
print("Converting...")
for i in range(len(fft_list)):
ift = scp.fft.irfft(fft_list[i], n=blit)
for k in ift:
res.append(k)
#print(type(res[0]))
mx = 0
for j in range(len(res)):
if(res[j] > mx):
mx = res[j]
for i in range(len(res)):
res[i] = np.int16(32767*res[i]/mx)
res = np.array(res)
wavfile.write(output_name, sample_rate, res)
def write_result(song_name, offset, songlen, segsize, timing_pts, output_name):
# removes unnecessary frequencies/amps from a song
# ampthr is in [0, 1]
@ -751,103 +902,6 @@ def parse_after_filter(song_name, offset, songlen, segsize, dt0):
print(timing_points)
return timing_points
def convert_to_wav(song_name:str, output_file="audio.wav") -> str:
"""
Converts the song to .wav, only if it's not already in wave format.
Currently relies on file extension.
Returns: the song_name that should be used afterwards.
"""
extension = Path(song_name).suffix
match extension:
case ".mp3" | ".ogg":
print("Converting to .wav...")
subprocess.run(["ffmpeg", "-y", "-i", song_name, output_file], shell=False)
return output_file
return song_name
def retrieve_all_from_song(filename, t0, t1, bpm, dta=0.001, dtf=0.01, threshold=0.06, show=True):
# dt = sample interval
# threshold is in percent
if(t1 <= t0):
print("ERROR : t1 <= t0\n")
exit(1)
# converts format to .wav
new_fn = convert_to_wav(filename)
print("Filtering song...")
#void_freq_clean(new_fn, t0, t1, dtf, 20, 20000, 0.05, "crop1.wav")
#def void_freq_clean(song_name, offset, songlen, segsize, minfreq, maxfreq, ampthr, output_name):
print("Now retrieving the frequencies")
(maxlist, maxamps) = retrieve_dominant_freqs(new_fn, t0, t1, dtf)
#def retrieve_dominant_freqs(song_name, offset, songlen, segsize):
print("Now retrieving the amplitudes")
amps = retrieve_dominant_amps(new_fn, t0, t1, dta, threshold, (4/(bpm/60))/4)
print("Len of freqs : ", len(maxlist), "|", len(maxamps))
print("Len of amps : ", len(maxlist), "|", len(amps))
maxa = amps[0]
for jj in amps:
if(jj > maxa):
maxa = jj
for i in range(len(amps)):
amps[i] = (amps[i] * 2000) / maxa
if(show):
timesF = [t0 + dtf*k for k in range(len(maxlist))]
timesA = [t0 + dta*k for k in range(len(amps))]
plt.plot(timesA, amps)
plt.plot(timesF, maxlist)
plt.show()
# free()
'''
# c-type
SONG_LEN = 5
OFFSET = 0.042
BPM = 149.3
SEGSIZE = 1/(BPM/60)
'''
'''
# tetris_2
SONG_LEN = 8
OFFSET = 0
BPM = 157
SEGSIZE = 1/(BPM/60)
'''
'''
# test
SONG_LEN = 1
OFFSET = 0
BPM = 240
SEGSIZE = 1/(BPM/60)
'''
'''
# gmtn
SONG_LEN = 3
OFFSET = 1.652
BPM = 155
SEGSIZE = 1/(BPM/60)
'''
# E
SONG_LEN = 10
OFFSET = 2.641
BPM = 155
SEGSIZE = 1/(BPM/60)
#wavved_song = convert_to_wav("tetris_2.wav")
wavved_song = convert_to_wav("songs/rushe.mp3")
keep_highest(wavved_song, OFFSET, SONG_LEN, SEGSIZE/4, 1, "Zblit.wav", minfreq=300, maxfreq=3000)
'''
# remove high/low frequencies (often noise)
#void_freq_clean(wavved_song, OFFSET, SONG_LEN, SEGSIZE/8, 100, 3000, 0.05, "Zvoided_song.wav")
@ -863,9 +917,6 @@ write_result(wavved_song, OFFSET, SONG_LEN-0.1, SEGSIZE/8, tp, "Zoutput_song.wav
#retrieve_all_from_song("Zcleaned_song.wav", 0, 5, 149.3, dtf=1/(149.3/60)/8)
'''
print("yipee")

BIN
songs/TSUBAKI.mp3 Normal file

Binary file not shown.