146 lines
4.4 KiB
Python
146 lines
4.4 KiB
Python
import numpy as np
|
|
import scipy as scp
|
|
import heapq
|
|
|
|
def retrieve_dominant_freqs(song_name, offset, songlen, segsize):
|
|
# returns a list with peak frequencies alongside the sample rate
|
|
# /!\ song_name is specified to be a list, NOT a list of couples (aka song is mono)
|
|
# segsize is in seconds
|
|
|
|
# remove high_pitched/low-pitched frequencies
|
|
minfreq = 110
|
|
maxfreq = 440*8
|
|
|
|
# cutting the song to only keep the one we're interested in
|
|
subprocess.run(["ffmpeg", "-ss", str(offset), "-t", str(songlen+offset), "-i", song_name, "crop.wav"], shell=False)
|
|
|
|
# extracting data from cropped song
|
|
sample_rate, song_data = wavfile.read("crop.wav")
|
|
blit = int(sample_rate*segsize) # Te
|
|
|
|
# remove the copy of the song
|
|
subprocess.run(["rm", "crop.wav"], shell=False)
|
|
|
|
# calculate the frequencies associated to the FFTs
|
|
pfreq = scipy.fft.rfftfreq(blit, 1/sample_rate)
|
|
|
|
# left boundary of segment to crop
|
|
current_time = offset
|
|
|
|
# list of FFTs
|
|
fft_list = []
|
|
|
|
# number of samples
|
|
k = 0
|
|
|
|
while(current_time <= songlen+offset):
|
|
# index corresponding to left boundary
|
|
left_id = int(current_time*sample_rate)
|
|
|
|
# index corresponding to right boundary
|
|
right_id = int((current_time+segsize)*sample_rate)
|
|
|
|
# calculate the fft, append it to fft_list
|
|
pff = scp.fft.rfft(global_data[left:right])
|
|
fft_list.append(pff)
|
|
|
|
# just to avoid what causes 0.1 + 0.1 == 0.2 to be False
|
|
k += 1
|
|
current_time = offset + k*segsize
|
|
|
|
# spacing between samples (time)
|
|
fe = segsize/sample_rate
|
|
|
|
# list that will contain the maximum frequencies/amplitudes for all FFTs
|
|
maxlist = []
|
|
maxamps = []
|
|
|
|
# find all maximums
|
|
for i in range(len(fft_list)):
|
|
current_max = -1
|
|
current_fmax = 0
|
|
|
|
for j in range(len(fft_list[i])):
|
|
if(pfreq[j] < maxfreq & pfreq[j] >= minfreq & np.abs(fft_list[i][j]) > current_max):
|
|
current_max = np.abs(fft_list[i][j])
|
|
current_fmax = pfreq[j]
|
|
|
|
maxlist.append(current_fmax)
|
|
maxamps.append(current_max)
|
|
|
|
# gg
|
|
# maxlist[i] corresponds to time (offset + i*segsize)
|
|
return (maxlist, maxamps, segsize)
|
|
|
|
def retrieve_dominant_amps(song_name, offset, songlen, segsize, percent):
|
|
# returns a list with the percent% peak amplitudes alongside the sample rate
|
|
# /!\ song_name is specified to be a list, NOT a list of couples (aka song is mono)
|
|
# segsize is in seconds
|
|
|
|
# cutting the song to only keep the one we're interested in
|
|
subprocess.run(["ffmpeg", "-ss", str(offset), "-t", str(songlen+offset), "-i", song_name, "crop.wav"], shell=False)
|
|
|
|
# extracting data from cropped song
|
|
sample_rate, song_data = wavfile.read("crop.wav")
|
|
blit = int(sample_rate*segsize) # Te
|
|
|
|
# remove the copy of the song
|
|
subprocess.run(["rm", "crop.wav"], shell=False)
|
|
|
|
# which notes will be voided
|
|
is_locked = [False for i in range(len(song_data))]
|
|
x = int((len(song_data)*threshold)//100)
|
|
|
|
print("Retreiving the", int(x), "/", len(song_data), "highest values")
|
|
elements = heapq.nlargest(int(x), enumerate(song_data), key=lambda x: x[1])
|
|
#returns a list of couples [id, value]
|
|
|
|
for idx in range(len(elements)):
|
|
is_locked[elements[idx][0]] = True
|
|
|
|
for r in range(len(song_data)):
|
|
if(is_locked[r] == False):
|
|
song_data[r] = 0
|
|
|
|
# now we need to reduce song_data so that it matches the length of the previous function's return
|
|
res = []
|
|
k = 0
|
|
current_time = offset
|
|
|
|
while(current_time <= songlen+offset):
|
|
# index corresponding to left boundary
|
|
left_id = int(current_time*sample_rate)
|
|
|
|
# index corresponding to right boundary
|
|
right_id = int((current_time+segsize)*sample_rate)
|
|
|
|
# merge the segment into one value
|
|
cmax = 0
|
|
for i in range(left_id, right_id):
|
|
if(i < len(song_data) & cmax < song_data[i]):
|
|
cmax = song_data[i]
|
|
|
|
res.append(cmax)
|
|
|
|
k += 1
|
|
current_time = current_time + k*segsize
|
|
|
|
# gg
|
|
# res[i] corresponds to time (offset + i*segsize)
|
|
return res
|
|
|
|
print("done")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|