added amplitude detection and tweaked some parameters
This commit is contained in:
parent
7615d41c01
commit
64a2a96628
761
cleaned_sp.py
761
cleaned_sp.py
|
@ -1,13 +1,3 @@
|
|||
from math import *
|
||||
import numpy as np
|
||||
import scipy as scp
|
||||
from scipy.io import wavfile
|
||||
import matplotlib.pyplot as plt
|
||||
import subprocess
|
||||
import heapq
|
||||
from pathlib import Path
|
||||
from time import sleep
|
||||
|
||||
def is_data_stereo(raw_global_data:list) -> bool:
|
||||
"""
|
||||
self-explainatory
|
||||
|
@ -20,11 +10,15 @@ def is_data_stereo(raw_global_data:list) -> bool:
|
|||
return True
|
||||
return True
|
||||
|
||||
def dist_to_integer(x):
|
||||
ent = np.floor(x+0.5)
|
||||
return np.abs(x - ent)
|
||||
|
||||
def is_note_within(fr1, fr2):
|
||||
if(fr1 > fr2):
|
||||
return (fr1/fr2 <= NOTE_DIST)
|
||||
return (fr1/fr2 <= NOTE_DIST and dist_to_integer(fr1/fr2) >= OCTAVE_DIST)
|
||||
else:
|
||||
return (fr2/fr1 <= NOTE_DIST)
|
||||
return (fr2/fr1 <= NOTE_DIST and dist_to_integer(fr2/fr1) >= OCTAVE_DIST)
|
||||
|
||||
def keep_highest(song_name, offset, songlen, segsize, count, output_name, minfreq=110, maxfreq=5000, ampthr=250):
|
||||
# extracting data from cropped song
|
||||
|
@ -113,9 +107,83 @@ def keep_highest(song_name, offset, songlen, segsize, count, output_name, minfre
|
|||
new_freqs.append(pfreq[elements[idx][0]])
|
||||
new_ampls.append(fft_list[i][elements[idx][0]])
|
||||
|
||||
'''for i in range(len(new_freqs)):
|
||||
while(new_freqs[i]>1000):
|
||||
new_freqs[i] = new_freqs[i]/2'''
|
||||
# -------------------------------------------- Get amp distribution -------------------------------------------- #
|
||||
new_new_amps = [0 for i in range(int(sample_rate*songlen))]
|
||||
new_new_t = [offset + i/sample_rate for i in range(int(sample_rate*songlen))]
|
||||
|
||||
amp_ct = 0
|
||||
incr_a = segsize*4
|
||||
len_seg_a = int(sample_rate*incr_a)
|
||||
count_a = len_seg_a//1250
|
||||
left_0 = int(sample_rate*(amp_ct+offset))
|
||||
while(amp_ct < songlen-segsize):
|
||||
left = int(sample_rate*(amp_ct+offset))
|
||||
right = int(sample_rate*(amp_ct+offset + incr_a))
|
||||
|
||||
#returns a list of couples [id, value]
|
||||
elements = heapq.nlargest(count_a, enumerate([song_data[i] for i in range(left, right)]), key=lambda x: x[1])
|
||||
|
||||
amp_ct += incr_a
|
||||
|
||||
for idx in range(len(elements)):
|
||||
new_new_amps[elements[idx][0]+left-left_0] = song_data[left+elements[idx][0]]
|
||||
|
||||
mmxx = max(new_new_amps)
|
||||
new_new_amps = [nnw*1000/mmxx for nnw in new_new_amps]
|
||||
|
||||
# localize peaks
|
||||
left_id = 0
|
||||
right_id = 0
|
||||
a_ampl = 0
|
||||
in_seg = False
|
||||
time_d = 0.035
|
||||
cur_t = 0
|
||||
|
||||
locs = []
|
||||
loct = []
|
||||
for i in range(len(new_new_amps)):
|
||||
if(new_new_amps[i] > 100):
|
||||
if(not in_seg):
|
||||
in_seg = True
|
||||
left_id = i
|
||||
right_id = i
|
||||
a_ampl = max(a_ampl, new_new_amps[i])
|
||||
cur_t = 0
|
||||
else:
|
||||
cur_t += 1/sample_rate
|
||||
if(in_seg and cur_t >= time_d):
|
||||
in_seg = False
|
||||
locs.append(a_ampl)
|
||||
loct.append((left_id + right_id)/(2*sample_rate) + offset)
|
||||
|
||||
a_ampl = 0
|
||||
|
||||
# detect sliders
|
||||
sl_a = []
|
||||
sl_t = []
|
||||
in_slider = False
|
||||
slider_dtct = segsize
|
||||
for i in range(1, len(loct)-1):
|
||||
delta = loct[i] - loct[i-1]
|
||||
delta2 = loct[i+1] - loct[i]
|
||||
if(delta < slider_dtct and delta2 < slider_dtct):
|
||||
if(in_slider):
|
||||
sl_t.append(loct[i])
|
||||
sl_a.append(locs[i])
|
||||
else:
|
||||
in_slider = True
|
||||
sl_t.append(loct[i-1])
|
||||
sl_a.append(locs[i-1])
|
||||
sl_t.append(loct[i])
|
||||
sl_a.append(locs[i])
|
||||
else:
|
||||
in_slider = False
|
||||
|
||||
plt.plot(new_new_t, new_new_amps, "b-")
|
||||
plt.plot(loct, locs, "ro")
|
||||
plt.plot(sl_t, sl_a, "go")
|
||||
plt.grid()
|
||||
plt.show()
|
||||
|
||||
# -------------------------------------------- Localize -------------------------------------------- #
|
||||
|
||||
|
@ -136,6 +204,7 @@ def keep_highest(song_name, offset, songlen, segsize, count, output_name, minfre
|
|||
plt.plot(new_times, new_kept, "ro")
|
||||
plt.grid()
|
||||
plt.show()
|
||||
|
||||
# -------------------------------------------- Write -------------------------------------------- #
|
||||
i0 = 0
|
||||
timing_points.append(999999)
|
||||
|
@ -186,6 +255,8 @@ def keep_highest(song_name, offset, songlen, segsize, count, output_name, minfre
|
|||
res = np.array(res)
|
||||
wavfile.write(output_name, sample_rate, res)
|
||||
|
||||
return (loct, sl_t, timing_points) # amplitude result, sliders and frequency result
|
||||
|
||||
def convert_to_wav(song_name:str, output_file="audio.wav") -> str:
|
||||
"""
|
||||
Converts the song to .wav, only if it's not already in wave format.
|
||||
|
@ -193,11 +264,10 @@ def convert_to_wav(song_name:str, output_file="audio.wav") -> str:
|
|||
Returns: the song_name that should be used afterwards.
|
||||
"""
|
||||
extension = Path(song_name).suffix
|
||||
match extension:
|
||||
case ".mp3" | ".ogg":
|
||||
print("Converting to .wav...")
|
||||
subprocess.run(["ffmpeg", "-y", "-i", song_name, output_file], shell=False)
|
||||
return output_file
|
||||
if(extension == ".mp3" or extension == ".ogg"):
|
||||
print("Converting to .wav...")
|
||||
subprocess.run(["ffmpeg", "-y", "-i", song_name, output_file], shell=False)
|
||||
return output_file
|
||||
return song_name
|
||||
|
||||
def retrieve_all_from_song(filename, t0, t1, bpm, dta=0.001, dtf=0.01, threshold=0.06, show=True):
|
||||
|
@ -242,20 +312,21 @@ def retrieve_all_from_song(filename, t0, t1, bpm, dta=0.001, dtf=0.01, threshold
|
|||
plt.show()
|
||||
|
||||
# free()
|
||||
'''
|
||||
|
||||
# c-type
|
||||
SONG_LEN = 8
|
||||
SONG_LEN = 7
|
||||
OFFSET = 0.042
|
||||
BPM = 149.3
|
||||
SEGSIZE = 1/(BPM/60)
|
||||
wavved_song = convert_to_wav("songs/ctype.mp3")
|
||||
'''
|
||||
wavved_song = convert_to_wav("ctype.mp3")
|
||||
|
||||
'''
|
||||
# tetris_2
|
||||
SONG_LEN = 8
|
||||
SONG_LEN = 10
|
||||
OFFSET = 0
|
||||
BPM = 157
|
||||
SEGSIZE = 1/(BPM/60)
|
||||
wavved_song = convert_to_wav("tetris_2.wav")
|
||||
'''
|
||||
'''
|
||||
# test
|
||||
|
@ -264,21 +335,21 @@ OFFSET = 0
|
|||
BPM = 240
|
||||
SEGSIZE = 1/(BPM/60)
|
||||
'''
|
||||
|
||||
'''
|
||||
# gmtn
|
||||
SONG_LEN = 3
|
||||
SONG_LEN = 5
|
||||
OFFSET = 1.652
|
||||
BPM = 155
|
||||
SEGSIZE = 1/(BPM/60)
|
||||
wavved_song = convert_to_wav("songs/furioso melodia.mp3")
|
||||
|
||||
wavved_song = convert_to_wav("furioso melodia.mp3")
|
||||
'''
|
||||
'''
|
||||
# E
|
||||
SONG_LEN = 10
|
||||
SONG_LEN = 30
|
||||
OFFSET = 2.641
|
||||
BPM = 155
|
||||
SEGSIZE = 1/(BPM/60)
|
||||
wavved_song = convert_to_wav("songs/rushe.mp3")
|
||||
wavved_song = convert_to_wav("rushe.mp3")
|
||||
'''
|
||||
'''
|
||||
# Tsubaki
|
||||
|
@ -286,7 +357,7 @@ SONG_LEN = 10
|
|||
OFFSET = 35.659
|
||||
BPM = 199
|
||||
SEGSIZE = 1/(BPM/60)
|
||||
wavved_song = convert_to_wav("songs/TSUBAKI.mp3")
|
||||
wavved_song = convert_to_wav("TSUBAKI.mp3")
|
||||
'''
|
||||
'''
|
||||
# death
|
||||
|
@ -300,631 +371,7 @@ wavved_song = convert_to_wav("songs/Night of Knights.mp3")
|
|||
#wavved_song = convert_to_wav("tetris_2.wav")
|
||||
|
||||
NOTE_DIST = (2**(1/4))
|
||||
OCTAVE_DIST = 0.05
|
||||
keep_highest(wavved_song, OFFSET, SONG_LEN, SEGSIZE/4, 1, "Zblit.wav", minfreq=300, maxfreq=3000, ampthr=500)
|
||||
|
||||
print("yipee")
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
'''
|
||||
|
||||
def retrieve_dominant_freqs(song_name, offset, songlen, segsize):
|
||||
# returns a list with peak frequencies alongside the sample rate
|
||||
# /!\ song_name is specified to be a list, NOT a list of couples (aka song is mono)
|
||||
# segsize is in seconds
|
||||
|
||||
# remove high_pitched/low-pitched frequencies
|
||||
minfreq = 110
|
||||
maxfreq = 440*6
|
||||
|
||||
# cutting the song to only keep the one we're interested in
|
||||
subprocess.run(["ffmpeg", "-ss", str(offset), "-t", str(songlen), "-i", song_name, "crop.wav"], shell=False)
|
||||
subprocess.run(["clear"])
|
||||
|
||||
# extracting data from cropped song
|
||||
sample_rate, raw_song_data = wavfile.read("crop.wav")
|
||||
blit = int(sample_rate*segsize) # Te
|
||||
|
||||
song_data = [0 for i in range(len(raw_song_data))]
|
||||
|
||||
a = 0
|
||||
if(is_data_stereo(raw_song_data)):
|
||||
print("Converting to mono...")
|
||||
for x in range(len(raw_song_data)):
|
||||
song_data[x] = raw_song_data[x][0]/2 + raw_song_data[x][1]/2
|
||||
|
||||
if(x % (int(len(raw_song_data)/100)) == 0):
|
||||
print(a, "/ 100")
|
||||
a += 1
|
||||
else:
|
||||
song_data = raw_song_data
|
||||
|
||||
print("\nSampleRate : ", sample_rate)
|
||||
print("SegSize : ", blit)
|
||||
|
||||
# remove the copy of the song
|
||||
subprocess.run(["rm", "crop.wav"], shell=False)
|
||||
|
||||
# calculate the frequencies associated to the FFTs
|
||||
pfreq = scp.fft.rfftfreq(blit, 1/sample_rate)
|
||||
|
||||
# left boundary of segment to crop
|
||||
current_time = offset
|
||||
|
||||
# list of FFTs
|
||||
fft_list = []
|
||||
|
||||
# number of samples
|
||||
k = 0
|
||||
|
||||
print("Retrieving freqs from", offset, "to", songlen+offset, "...")
|
||||
print("amplitudes are from", minfreq, "to", maxfreq)
|
||||
while(current_time < songlen+offset-segsize):
|
||||
# index corresponding to left boundary
|
||||
left_id = int(current_time*sample_rate)
|
||||
|
||||
# index corresponding to right boundary
|
||||
right_id = int((current_time+segsize)*sample_rate)
|
||||
|
||||
# calculate the fft, append it to fft_list
|
||||
pff = scp.fft.rfft(song_data[int(current_time*sample_rate):int(sample_rate*(current_time+segsize))])
|
||||
fft_list.append(pff)
|
||||
#print("(k =", k, ") :", left_id, "to", right_id)
|
||||
|
||||
# just to avoid what causes 0.1 + 0.1 == 0.2 to be False
|
||||
k += 1
|
||||
current_time = offset + k*segsize
|
||||
#print(current_time)
|
||||
|
||||
# spacing between samples (time)
|
||||
fe = segsize/sample_rate
|
||||
|
||||
# list that will contain the maximum frequencies/amplitudes for all FFTs
|
||||
maxlist = []
|
||||
maxamps = []
|
||||
|
||||
print("\n\nSegSize :", segsize, "\nFFT :", len(fft_list), "\nFFT[0] :", len(fft_list[0]), "\npfreq :", len(pfreq), "\n\n")
|
||||
|
||||
# find all maximums
|
||||
for i in range(len(fft_list)):
|
||||
current_max = -1
|
||||
current_fmax = 0
|
||||
|
||||
for j in range(len(fft_list[i])):
|
||||
if(j < len(pfreq) and pfreq[j] < maxfreq and pfreq[j] >= minfreq and np.abs(fft_list[i][j]) > current_max):
|
||||
current_max = np.abs(fft_list[i][j])
|
||||
current_fmax = pfreq[j]
|
||||
|
||||
maxlist.append(current_fmax)
|
||||
maxamps.append(current_max)
|
||||
|
||||
# gg
|
||||
# maxlist[i] corresponds to time (offset + i*segsize)
|
||||
return (maxlist, maxamps)
|
||||
|
||||
def void_freq_clean(song_name, offset, songlen, segsize, minfreq, maxfreq, ampthr, output_name):
|
||||
# removes unnecessary frequencies/amps from a song
|
||||
# ampthr is in [0, 1]
|
||||
|
||||
# extracting data from cropped song
|
||||
sample_rate, raw_song_data = wavfile.read(song_name)
|
||||
blit = int(sample_rate*segsize) # Te
|
||||
|
||||
song_data = [0 for i in range(len(raw_song_data))]
|
||||
|
||||
a = 0
|
||||
if(is_data_stereo(raw_song_data)):
|
||||
print("Converting to mono...")
|
||||
for x in range(len(raw_song_data)):
|
||||
song_data[x] = raw_song_data[x][0]/2 + raw_song_data[x][1]/2
|
||||
|
||||
if(x % (int(len(raw_song_data)/100)) == 0):
|
||||
print(a, "/ 100")
|
||||
a += 1
|
||||
else:
|
||||
song_data = raw_song_data
|
||||
|
||||
print("\nSampleRate : ", sample_rate)
|
||||
print("SegSize : ", blit)
|
||||
|
||||
# calculate the frequencies associated to the FFTs
|
||||
pfreq = scp.fft.rfftfreq(blit, 1/sample_rate)
|
||||
|
||||
# left boundary of segment to crop
|
||||
current_time = offset
|
||||
|
||||
# list of FFTs
|
||||
fft_list = []
|
||||
|
||||
# number of samples
|
||||
k = 0
|
||||
|
||||
print("Retrieving freqs from", offset, "to", songlen+offset, "...")
|
||||
print("Freqs are from", minfreq, "to", maxfreq)
|
||||
while(current_time < songlen+offset-segsize):
|
||||
# index corresponding to left boundary
|
||||
left_id = int(current_time*sample_rate)
|
||||
|
||||
# index corresponding to right boundary
|
||||
right_id = int((current_time+segsize)*sample_rate)
|
||||
|
||||
# calculate the fft, append it to fft_list
|
||||
pff = scp.fft.rfft(song_data[int(current_time*sample_rate):int(sample_rate*(current_time+segsize))])
|
||||
fft_list.append(pff)
|
||||
|
||||
# just to avoid what causes 0.1 + 0.1 == 0.2 to be False
|
||||
k += 1
|
||||
current_time = offset + k*segsize
|
||||
#print(current_time)
|
||||
|
||||
print("\n\nSegSize :", segsize, "\nFFT :", len(fft_list), "\nFFT[0] :", len(fft_list[0]), "\npfreq :", len(pfreq), "\n\n")
|
||||
|
||||
# remove
|
||||
# i = time, j = freq
|
||||
for i in range(len(fft_list)):
|
||||
# get the local max freq
|
||||
lmax = 0
|
||||
for j in range(len(fft_list[i])):
|
||||
|
||||
if(np.abs(fft_list[i][j]) > lmax):
|
||||
lmax = np.abs(fft_list[i][j])
|
||||
|
||||
# remove freqs + amps
|
||||
for j in range(len(fft_list[i])):
|
||||
if(j < len(pfreq)):
|
||||
if(pfreq[j] <= minfreq or pfreq[j] >= maxfreq):
|
||||
fft_list[i][j] = 0+0j
|
||||
|
||||
if(np.abs(fft_list[i][j]) <= lmax*ampthr):
|
||||
fft_list[i][j] = 0+0j
|
||||
|
||||
# writing new .wav
|
||||
res = []
|
||||
print("Converting...")
|
||||
for i in range(len(fft_list)):
|
||||
ift = scp.fft.irfft(fft_list[i], n=blit)
|
||||
for k in ift:
|
||||
res.append(k)
|
||||
#print(type(res[0]))
|
||||
mx = 0
|
||||
for j in range(len(res)):
|
||||
if(res[j] > mx):
|
||||
mx = res[j]
|
||||
|
||||
for i in range(len(res)):
|
||||
res[i] = np.int16(32767*res[i]/mx)
|
||||
|
||||
res = np.array(res)
|
||||
wavfile.write(output_name, sample_rate, res)
|
||||
|
||||
def is_within_reach(xref, x, error):
|
||||
return(np.abs(xref - x) <= error)
|
||||
|
||||
def localize_frequencies(song_name, offset, songlen, segsize, output_name):
|
||||
# removes unnecessary frequencies/amps from a song
|
||||
# ampthr is in [0, 1]
|
||||
|
||||
# extracting data from cropped song
|
||||
sample_rate, raw_song_data = wavfile.read(song_name)
|
||||
blit = int(sample_rate*segsize) # Te
|
||||
|
||||
song_data = [0 for i in range(len(raw_song_data))]
|
||||
|
||||
a = 0
|
||||
if(is_data_stereo(raw_song_data)):
|
||||
print("Converting to mono...")
|
||||
for x in range(len(raw_song_data)):
|
||||
song_data[x] = raw_song_data[x][0]/2 + raw_song_data[x][1]/2
|
||||
|
||||
if(x % (int(len(raw_song_data)/100)) == 0):
|
||||
print(a, "/ 100")
|
||||
a += 1
|
||||
else:
|
||||
song_data = raw_song_data
|
||||
|
||||
print("\nSampleRate : ", sample_rate)
|
||||
print("SegSize : ", blit)
|
||||
|
||||
# calculate the frequencies associated to the FFTs
|
||||
pfreq = scp.fft.rfftfreq(blit, 1/sample_rate)
|
||||
|
||||
# left boundary of segment to crop
|
||||
current_time = offset
|
||||
|
||||
# list of FFTs
|
||||
fft_list = []
|
||||
|
||||
# number of samples
|
||||
k = 0
|
||||
|
||||
print("Retrieving freqs from", offset, "to", songlen+offset, "...")
|
||||
while(current_time < songlen+offset-segsize):
|
||||
# index corresponding to left boundary
|
||||
left_id = int(current_time*sample_rate)
|
||||
|
||||
# index corresponding to right boundary
|
||||
right_id = int((current_time+segsize)*sample_rate)
|
||||
|
||||
# calculate the fft, append it to fft_list
|
||||
pff = scp.fft.rfft(song_data[int(current_time*sample_rate):int(sample_rate*(current_time+segsize))])
|
||||
fft_list.append(pff)
|
||||
|
||||
# just to avoid what causes 0.1 + 0.1 == 0.2 to be False
|
||||
k += 1
|
||||
current_time = offset + k*segsize
|
||||
#print(current_time)
|
||||
|
||||
print("\n\nSegSize :", segsize, "\nFFT :", len(fft_list), "\nFFT[0] :", len(fft_list[0]), "\npfreq :", len(pfreq), "\n\n")
|
||||
|
||||
let_ring_currentLength = 0
|
||||
let_ring_freq = -200
|
||||
|
||||
let_ring_freqLeniency = 50
|
||||
|
||||
let_ring_leniencySecond = 0.07
|
||||
let_ring_lengthLeniency = 1
|
||||
|
||||
# remove
|
||||
# i = time, j = freq
|
||||
for i in range(len(fft_list)):
|
||||
# retrieve dominant freq
|
||||
maxfreq = 0
|
||||
maxfreqid = 0
|
||||
maxamp = 0
|
||||
for j in range(len(fft_list[i])):
|
||||
if(np.abs(fft_list[i][j]) > maxamp):
|
||||
maxamp = np.abs(fft_list[i][j])
|
||||
maxfreq = pfreq[j]
|
||||
maxfreqid = j
|
||||
|
||||
if(is_within_reach(maxfreq, let_ring_freq, let_ring_freqLeniency)):
|
||||
let_ring_freq = maxfreq
|
||||
let_ring_currentLength += 1
|
||||
|
||||
if(let_ring_currentLength > let_ring_lengthLeniency):
|
||||
for j in range(len(fft_list[i])):
|
||||
#if(j < len(pfreq) and is_within_reach(pfreq[j], maxfreq, 75)):
|
||||
fft_list[i][j] = 0+0j
|
||||
print("here")
|
||||
|
||||
else:
|
||||
let_ring_freq = maxfreq
|
||||
let_ring_currentLength = 0
|
||||
|
||||
print("(at time", i, ") maxfreq and length are", let_ring_freq, let_ring_currentLength)
|
||||
|
||||
# writing new .wav
|
||||
res = []
|
||||
print("Converting...")
|
||||
for i in range(len(fft_list)):
|
||||
ift = scp.fft.irfft(fft_list[i], n=blit)
|
||||
for k in ift:
|
||||
res.append(k)
|
||||
#print(type(res[0]))
|
||||
mx = 0
|
||||
for j in range(len(res)):
|
||||
if(res[j] > mx):
|
||||
mx = res[j]
|
||||
|
||||
for i in range(len(res)):
|
||||
res[i] = np.int16(32767*res[i]/mx)
|
||||
|
||||
res = np.array(res)
|
||||
wavfile.write(output_name, sample_rate, res)
|
||||
|
||||
def write_result(song_name, offset, songlen, segsize, timing_pts, output_name):
|
||||
# removes unnecessary frequencies/amps from a song
|
||||
# ampthr is in [0, 1]
|
||||
|
||||
# extracting data from cropped song
|
||||
sample_rate, raw_song_data = wavfile.read(song_name)
|
||||
blit = int(sample_rate*segsize) # Te
|
||||
|
||||
song_data = [0 for i in range(len(raw_song_data))]
|
||||
|
||||
a = 0
|
||||
if(is_data_stereo(raw_song_data)):
|
||||
print("Converting to mono...")
|
||||
for x in range(len(raw_song_data)):
|
||||
song_data[x] = raw_song_data[x][0]/2 + raw_song_data[x][1]/2
|
||||
|
||||
if(x % (int(len(raw_song_data)/100)) == 0):
|
||||
print(a, "/ 100")
|
||||
a += 1
|
||||
else:
|
||||
song_data = raw_song_data
|
||||
|
||||
print("\nSampleRate : ", sample_rate)
|
||||
print("SegSize : ", blit)
|
||||
|
||||
# calculate the frequencies associated to the FFTs
|
||||
pfreq = scp.fft.rfftfreq(blit, 1/sample_rate)
|
||||
|
||||
# left boundary of segment to crop
|
||||
current_time = offset
|
||||
|
||||
# list of FFTs
|
||||
fft_list = []
|
||||
|
||||
# number of samples
|
||||
k = 0
|
||||
|
||||
print("Retrieving freqs from", offset, "to", songlen+offset, "...")
|
||||
while(current_time < songlen+offset-segsize):
|
||||
# index corresponding to left boundary
|
||||
left_id = int(current_time*sample_rate)
|
||||
|
||||
# index corresponding to right boundary
|
||||
right_id = int((current_time+segsize)*sample_rate)
|
||||
|
||||
# calculate the fft, append it to fft_list
|
||||
pff = scp.fft.rfft(song_data[int(current_time*sample_rate):int(sample_rate*(current_time+segsize))])
|
||||
fft_list.append(pff)
|
||||
|
||||
# just to avoid what causes 0.1 + 0.1 == 0.2 to be False
|
||||
k += 1
|
||||
current_time = offset + k*segsize
|
||||
#print(current_time)
|
||||
|
||||
print("\n\nSegSize :", segsize, "\nFFT :", len(fft_list), "\nFFT[0] :", len(fft_list[0]), "\npfreq :", len(pfreq), "\n\n")
|
||||
|
||||
i0 = 0
|
||||
timing_pts.append(999999)
|
||||
|
||||
write_freq = 880
|
||||
write_cur = 0
|
||||
write_id = -1
|
||||
while(write_cur <= write_freq): # shouldnt seg fault
|
||||
write_id += 1
|
||||
write_cur = pfreq[write_id]
|
||||
|
||||
|
||||
# remove
|
||||
# i = time, j = freq
|
||||
for i in range(len(fft_list)):
|
||||
# retrieve dominant freq
|
||||
if(segsize*i >= timing_pts[i0]):
|
||||
i0 += 1
|
||||
|
||||
maxfreq = 0
|
||||
maxfreqid = 0
|
||||
maxamp = 0
|
||||
for j in range(len(fft_list[i])):
|
||||
if(np.abs(fft_list[i][j]) > maxamp):
|
||||
maxamp = np.abs(fft_list[i][j])
|
||||
maxfreq = pfreq[j]
|
||||
maxfreqid = j
|
||||
|
||||
fft_list[i][write_id] = max(maxamp*2, 32767)
|
||||
fft_list[i][write_id-1] = max(maxamp*2, 32767)
|
||||
fft_list[i][write_id+1] = max(maxamp*2, 32767)
|
||||
|
||||
|
||||
# writing new .wav
|
||||
res = []
|
||||
print("Converting...")
|
||||
for i in range(len(fft_list)):
|
||||
ift = scp.fft.irfft(fft_list[i], n=blit)
|
||||
for k in ift:
|
||||
res.append(k)
|
||||
#print(type(res[0]))
|
||||
mx = 0
|
||||
for j in range(len(res)):
|
||||
if(res[j] > mx):
|
||||
mx = res[j]
|
||||
|
||||
for i in range(len(res)):
|
||||
res[i] = np.int16(32767*res[i]/mx)
|
||||
|
||||
res = np.array(res)
|
||||
wavfile.write(output_name, sample_rate, res)
|
||||
|
||||
def retrieve_dominant_amps(song_name, offset, songlen, segsize, percent, divlen):
|
||||
# returns a list with the percent% peak amplitudes alongside the sample rate
|
||||
# /!\ song_name is specified to be a list, NOT a list of couples (aka song is mono)
|
||||
# segsize is in seconds
|
||||
# divlen is in seconds
|
||||
|
||||
# cutting the song to only keep the one we're interested in
|
||||
subprocess.run(["ffmpeg", "-ss", str(offset), "-t", str(songlen), "-i", song_name, "crop.wav"], shell=False)
|
||||
subprocess.run(["clear"])
|
||||
|
||||
# extracting data from cropped song
|
||||
sample_rate, raw_song_data = wavfile.read("crop.wav")
|
||||
blit = int(sample_rate*segsize) # Te
|
||||
|
||||
# in case song has stereo format, conversion to mono
|
||||
song_data = [0 for i in range(len(raw_song_data))]
|
||||
|
||||
a = 0
|
||||
if(is_data_stereo(raw_song_data)):
|
||||
print("Converting to mono...")
|
||||
for x in range(len(raw_song_data)):
|
||||
song_data[x] = raw_song_data[x][0]/2 + raw_song_data[x][1]/2
|
||||
|
||||
if(x % (int(len(raw_song_data)/100)) == 0):
|
||||
print(a, "/ 100")
|
||||
a += 1
|
||||
else:
|
||||
song_data = raw_song_data
|
||||
|
||||
# which notes will be voided
|
||||
is_locked = [False for i in range(len(song_data))]
|
||||
x = int((len(song_data)*percent)//100)
|
||||
|
||||
# length of segments
|
||||
seglen = int(divlen*sample_rate)
|
||||
|
||||
# current offset
|
||||
curptr = 0
|
||||
|
||||
print("Retreiving the", int(x), "/", len(song_data), "highest values")
|
||||
while(curptr < len(song_data)):
|
||||
left = curptr
|
||||
right = min(len(song_data), curptr+seglen)
|
||||
|
||||
#returns a list of couples [id, value]
|
||||
elements = heapq.nlargest(int(x), enumerate(song_data[left:right]), key=lambda x: x[1])
|
||||
|
||||
for idx in range(len(elements)):
|
||||
is_locked[elements[idx][0]+left] = True
|
||||
|
||||
curptr += seglen
|
||||
|
||||
for r in range(len(song_data)):
|
||||
if(is_locked[r] == False):
|
||||
song_data[r] = 0
|
||||
|
||||
# now we need to reduce song_data so that it matches the length of the previous function's return
|
||||
res = []
|
||||
k = 0
|
||||
current_time = offset
|
||||
|
||||
while(current_time < songlen+offset-segsize):
|
||||
# index corresponding to left boundary
|
||||
left_id = int(current_time*sample_rate)
|
||||
|
||||
# index corresponding to right boundary
|
||||
right_id = int((current_time+segsize)*sample_rate)
|
||||
|
||||
# merge the segment into one value
|
||||
cmax = 0
|
||||
for i in range(left_id, right_id):
|
||||
if(i < len(song_data) and cmax < song_data[i]):
|
||||
cmax = song_data[i]
|
||||
|
||||
res.append(cmax)
|
||||
|
||||
k += 1
|
||||
current_time = offset + k*segsize
|
||||
|
||||
# gg
|
||||
# res[i] corresponds to time (offset + i*segsize)
|
||||
return res
|
||||
|
||||
def parse_after_filter(song_name, offset, songlen, segsize, dt0):
|
||||
sample_rate, raw_song_data = wavfile.read(song_name)
|
||||
blit = int(sample_rate*segsize) # Te
|
||||
|
||||
song_data = [0 for i in range(len(raw_song_data))]
|
||||
|
||||
a = 0
|
||||
if(is_data_stereo(raw_song_data)):
|
||||
print("Converting to mono...")
|
||||
for x in range(len(raw_song_data)):
|
||||
song_data[x] = raw_song_data[x][0]/2 + raw_song_data[x][1]/2
|
||||
|
||||
if(x % (int(len(raw_song_data)/100)) == 0):
|
||||
print(a, "/ 100")
|
||||
a += 1
|
||||
else:
|
||||
song_data = raw_song_data
|
||||
|
||||
print("\nSampleRate : ", sample_rate)
|
||||
print("SegSize : ", blit)
|
||||
|
||||
# calculate the frequencies associated to the FFTs
|
||||
pfreq = scp.fft.rfftfreq(blit, 1/sample_rate)
|
||||
|
||||
# left boundary of segment to crop
|
||||
current_time = offset
|
||||
|
||||
# list of FFTs
|
||||
fft_list = []
|
||||
|
||||
# number of samples
|
||||
k = 0
|
||||
|
||||
print("Retrieving freqs from", offset, "to", songlen+offset, "...")
|
||||
while(current_time < songlen+offset-segsize):
|
||||
# index corresponding to left boundary
|
||||
left_id = int(current_time*sample_rate)
|
||||
|
||||
# index corresponding to right boundary
|
||||
right_id = int((current_time+segsize)*sample_rate)
|
||||
|
||||
# calculate the fft, append it to fft_list
|
||||
pff = scp.fft.rfft(song_data[int(current_time*sample_rate):int(sample_rate*(current_time+segsize))])
|
||||
fft_list.append(pff)
|
||||
|
||||
# just to avoid what causes 0.1 + 0.1 == 0.2 to be False
|
||||
k += 1
|
||||
current_time = offset + k*segsize
|
||||
#print(current_time)
|
||||
|
||||
print("\n\nSegSize :", segsize, "\nFFT :", len(fft_list), "\nFFT[0] :", len(fft_list[0]), "\npfreq :", len(pfreq), "\n\n")
|
||||
|
||||
timing_points = []
|
||||
prev_append = 1
|
||||
|
||||
for i in range(len(fft_list)):
|
||||
# retrieve dominant freq
|
||||
maxfreq = 0
|
||||
maxfreqid = 0
|
||||
maxamp = 0
|
||||
for j in range(len(fft_list[i])):
|
||||
if(np.abs(fft_list[i][j]) > maxamp):
|
||||
maxamp = np.abs(fft_list[i][j])
|
||||
maxfreq = pfreq[j]
|
||||
maxfreqid = j
|
||||
|
||||
print("dominant at", i, ":" , maxamp)
|
||||
if(maxamp > 100):
|
||||
if(prev_append > 0):
|
||||
timing_points.append(i*segsize + dt0)
|
||||
prev_append -= 1
|
||||
else:
|
||||
prev_append = 2
|
||||
|
||||
print(timing_points)
|
||||
return timing_points
|
||||
|
||||
# remove high/low frequencies (often noise)
|
||||
#void_freq_clean(wavved_song, OFFSET, SONG_LEN, SEGSIZE/8, 100, 3000, 0.05, "Zvoided_song.wav")
|
||||
|
||||
# crops any part with let ring
|
||||
localize_frequencies("Zblit.wav", 0, SONG_LEN-0.1, SEGSIZE/8, "Zcleaned_song.wav")
|
||||
#localize_frequencies(wavved_song, OFFSET, SONG_LEN, SEGSIZE/8, "Zcleaned_song.wav")
|
||||
|
||||
# find timings
|
||||
tp = parse_after_filter("Zcleaned_song.wav", 0, SONG_LEN-0.1, SEGSIZE/8, OFFSET)
|
||||
|
||||
# write
|
||||
write_result(wavved_song, OFFSET, SONG_LEN-0.1, SEGSIZE/8, tp, "Zoutput_song.wav")
|
||||
|
||||
#retrieve_all_from_song("Zcleaned_song.wav", 0, 5, 149.3, dtf=1/(149.3/60)/8)
|
||||
'''
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue