344 lines
11 KiB
Python
Executable File
344 lines
11 KiB
Python
Executable File
from math import *
|
|
import numpy as np
|
|
from scipy.io import wavfile
|
|
from scipy import signal
|
|
import matplotlib.pyplot as plt
|
|
import subprocess
|
|
import wave as wv
|
|
import struct
|
|
import librosa
|
|
import heapq
|
|
import scipy
|
|
import os
|
|
import random
|
|
from pathlib import Path
|
|
from time import sleep
|
|
from datetime import timedelta
|
|
|
|
import debug
|
|
|
|
print("Starting...\n")
|
|
|
|
def filter_n_percent_serial(song_name, offset, n_iter, step, threshold):
|
|
"""
|
|
song_name : string
|
|
offset : int
|
|
n_iter : int (number of turns)
|
|
step : int (length of each small segment)
|
|
threshold : int (is in ]0, 100])
|
|
|
|
filter data associated with song_name to keep only the highest threshold% values
|
|
"""
|
|
|
|
subprocess.run(["ffmpeg", "-ss", str(offset), "-t", str(offset+step*n_iter), "-i", song_name, "crop.wav"], shell=False)
|
|
|
|
sample_rate, global_data = wavfile.read('crop.wav')
|
|
|
|
subprocess.run(["clear"], shell=False)
|
|
subprocess.run(["rm", "crop.wav"], shell=False)
|
|
|
|
for i in range(n_iter):
|
|
print(i, "/", n_iter)
|
|
#print(i * step)
|
|
song_data = global_data[int(i*step*sample_rate):int((i+1)*step*sample_rate)]
|
|
|
|
if(len(song_data) != 0):
|
|
mx = max(song_data)
|
|
|
|
is_locked = [False for i in range(len(song_data))]
|
|
x = int((len(song_data)*threshold)//100)
|
|
#print("X = ", x)
|
|
|
|
#print("Retreiving the", int(x), "/", len(song_data), "highest values")
|
|
elements = heapq.nlargest(int(x), enumerate(song_data), key=lambda x: x[1])
|
|
#print("Done")
|
|
|
|
for idx in range(len(elements)):
|
|
is_locked[elements[idx][0]] = True
|
|
|
|
for r in range(len(song_data)):
|
|
if(is_locked[r] == False):
|
|
global_data[r+int(i*step*sample_rate)] = 0
|
|
|
|
return global_data
|
|
|
|
|
|
def write_to_file_thr(sample_rate, song_data, offset, threshold, filename):
|
|
# write data to output file
|
|
file = open(filename, 'w')
|
|
file.writelines('time,amplitude\n')
|
|
mx = max(song_data)
|
|
print("writing to output...")
|
|
for i in range(len(song_data)):
|
|
if(i%(len(song_data)//50) == 0):
|
|
print(i, "/", len(song_data))
|
|
if(song_data[i]/mx > threshold):
|
|
file.writelines(str(np.round(offset + i/sample_rate, 3)))
|
|
file.writelines(',')
|
|
file.writelines(str(np.round(song_data[i], 0)))
|
|
file.writelines('\n')
|
|
|
|
def round_t(id, sample_rate, bpm, div, offset, k0):
|
|
k = k0
|
|
t = offset + k/(bpm*div)
|
|
while(t < id/sample_rate):
|
|
t = offset + k/(bpm*div)
|
|
k += 1
|
|
|
|
if(np.abs(t - id/sample_rate) < np.abs((t - 1/(bpm*div)) - id/sample_rate)):
|
|
return t
|
|
return (t - 1/(bpm*div), 0)
|
|
|
|
def compress(Zxx):
|
|
res = []
|
|
|
|
def get_freq(song_name, times, width=1000, display=False):
|
|
"""
|
|
for a given list of times (in seconds), returns the corresponding peak frequencies
|
|
"""
|
|
|
|
subprocess.run(["ffmpeg", "-ss", str(0), "-t", str(max(np.array(times))), "-i", song_name, "crop.wav"], shell=False)
|
|
|
|
sample_rate, global_data = wavfile.read(song_name)
|
|
#blit = int(sample_rate*step)
|
|
|
|
subprocess.run(["clear"], shell=False)
|
|
subprocess.run(["rm", "crop.wav"], shell=False)
|
|
|
|
pfreq = scipy.fft.rfftfreq(2*width, 1/sample_rate)
|
|
|
|
frequencies = [0 for s in range(len(times))]
|
|
print(len(pfreq))
|
|
|
|
for s in range(len(times)):
|
|
left = max(0, int(times[s]*44100)-width)
|
|
right = min(len(global_data), int(times[s]*44100)+width)
|
|
pff = scipy.fft.rfft(global_data[left:right])
|
|
|
|
#print(len(pff), len(pfreq))
|
|
|
|
mx = max(np.abs(pff))
|
|
for id in range(len(pff)):
|
|
if frequencies[s] == 0 and np.abs(pff[id]) == mx:
|
|
frequencies[s] = pfreq[id]
|
|
|
|
if(display):
|
|
plt.plot(times, frequencies)
|
|
plt.grid()
|
|
plt.xlabel("Time (s)")
|
|
plt.ylabel("Dominant frequency (Hz)")
|
|
plt.title("Dominant frequencies at peaks")
|
|
plt.show()
|
|
|
|
return frequencies
|
|
|
|
def is_data_stereo(raw_global_data:list) -> bool:
|
|
"""
|
|
raw_global_data : list
|
|
"""
|
|
try:
|
|
assert(raw_global_data[0][0])
|
|
except IndexError:
|
|
return False
|
|
except AssertionError:
|
|
return True
|
|
return True
|
|
|
|
|
|
def void_freq(song_name, offset, songlen, increment, minfreq, maxfreq, upperthr, ampthr, ampfreq, ampval, leniency, write, linear, output_file="trimmed.wav"):
|
|
"""
|
|
song_name : string
|
|
offset : int
|
|
songlen : int (length of the part that will be filtered, starting from offset)
|
|
increment : float (technical parameter)
|
|
minfreq and maxfreq : every frequency in [minfreq, maxfreq] will be voided
|
|
upperthr : every frequency above upperthr will be voided
|
|
ampthr : every frequency with amplitude under MAX/ampthr (aka amplitudes under (100/ampthr)% of the max will be voided
|
|
ampfreq, leniency (if linear is false), linear : technical parameters
|
|
ampval : int
|
|
- if linear is false, then this willbe the maximum amplification possible
|
|
- if linear is true, this is the multiplier (Amp <- Amp * (ampval * frequency + leniency))
|
|
write : bool (should be set to True)
|
|
output_file : technical
|
|
"""
|
|
fft_list = []
|
|
times = []
|
|
current_time = offset
|
|
k = 0
|
|
|
|
subprocess.run(["ffmpeg", "-ss", str(offset), "-t", str(songlen+offset), "-i", song_name, "crop.wav"], shell=False)
|
|
|
|
sample_rate, raw_global_data = wavfile.read("crop.wav")
|
|
blit = int(sample_rate*increment)
|
|
|
|
global_data = [0 for i in range(len(raw_global_data))]
|
|
|
|
#subprocess.run(["clear"])
|
|
subprocess.run(["rm", "crop.wav"], shell=False)
|
|
|
|
a = 0
|
|
|
|
if(is_data_stereo(raw_global_data)):
|
|
print("Converting to mono...")
|
|
for x in range(len(raw_global_data)):
|
|
global_data[x] = raw_global_data[x][0]/2 + raw_global_data[x][1]/2
|
|
|
|
if(x % (int(len(raw_global_data)/100)) == 0):
|
|
print(a, "/ 100")
|
|
a += 1
|
|
|
|
else:
|
|
global_data = raw_global_data
|
|
|
|
#print("Blit :", blit)
|
|
|
|
pfreq = scipy.fft.rfftfreq(blit, 1/sample_rate)
|
|
|
|
#print(len(pfreq))
|
|
|
|
while(current_time <= songlen):
|
|
pff = scipy.fft.rfft(global_data[k*blit:(k+1)*blit])
|
|
fft_list.append(pff)
|
|
times.append(k*increment)
|
|
|
|
k += 1
|
|
current_time = offset + k*increment
|
|
|
|
print("FFT :", len(fft_list), "\nFFT[0] :", len(fft_list[0]), "\npfreq :", len(pfreq))
|
|
|
|
|
|
print("Finding global max...")
|
|
|
|
if(linear == False):
|
|
for i in range(len(fft_list)):
|
|
for j in range(len(fft_list[i])):
|
|
fft_list[i][j] *= (1 + ampval/max(1, np.abs(pfreq[j] - ampfreq)))
|
|
else:
|
|
for i in range(len(fft_list)):
|
|
for j in range(len(fft_list[i])):
|
|
fft_list[i][j] *= (ampval*pfreq[j] + leniency)
|
|
|
|
print("Trimming...")
|
|
|
|
for i in range(len(fft_list)):
|
|
lmax = 0
|
|
for j in range(len(fft_list[i])):
|
|
if(np.abs(fft_list[i][j]) > lmax):
|
|
lmax = np.abs(fft_list[i][j])
|
|
|
|
for j in range(len(fft_list[i])):
|
|
if((pfreq[j] >= minfreq and pfreq[j] < maxfreq) or pfreq[j] > upperthr):
|
|
fft_list[i][j] = 0+0j
|
|
|
|
if(np.abs(fft_list[i][j]) < lmax/ampthr):
|
|
fft_list[i][j] = 0+0j
|
|
|
|
|
|
if(write):
|
|
res = []
|
|
print("Converting...")
|
|
for i in range(len(fft_list)):
|
|
ift = scipy.fft.irfft(fft_list[i], n=blit)
|
|
for k in ift:
|
|
res.append(k)
|
|
#print(type(res[0]))
|
|
mx = 0
|
|
for j in range(len(res)):
|
|
if(res[j] > mx):
|
|
mx = res[j]
|
|
|
|
for i in range(len(res)):
|
|
res[i] = np.int16(32767*res[i]/mx)
|
|
|
|
res = np.array(res)
|
|
wavfile.write(output_file, 44100, res)
|
|
|
|
#plt.plot(np.abs(pfreq[:len(fft_list[0])]), np.abs(fft_list[0]))
|
|
#plt.grid()
|
|
#plt.show()
|
|
|
|
print("Done")
|
|
|
|
def convert_tuple(data, times):
|
|
"""
|
|
Takes data and converts it to a list of tuples (amplitude, datetimes)
|
|
"""
|
|
return [(times[i], data[i]) for i in range(len(data))]
|
|
|
|
def get_songlen(filename):
|
|
"""
|
|
retrieves the length of the song in seconds
|
|
"""
|
|
sample_rate, global_data = wavfile.read(filename)
|
|
print("LEN :", len(global_data)/sample_rate)
|
|
|
|
return (len(global_data)/sample_rate)
|
|
|
|
def convert_to_wav(song_name:str, output_file="audio.wav") -> str:
|
|
"""
|
|
Converts the song to .wav, only if it's not already in wave format.
|
|
Currently relies on file extension.
|
|
Returns: the song_name that should be used afterwards.
|
|
"""
|
|
extension = Path(song_name).suffix
|
|
match extension:
|
|
case ".mp3" | ".ogg":
|
|
print("Converting to .wav...")
|
|
subprocess.run(["ffmpeg", "-y", "-i", song_name, output_file], shell=False)
|
|
return output_file
|
|
return song_name
|
|
|
|
def process_song(filename, bpm, offset0=0, div_len_factor=1, n_iter_2=-1, threshold=0.5, divisor=4):
|
|
"""
|
|
filename : string (name of the song)
|
|
offset : int [+] (song mapping will start from this time in seconds, default is 0)
|
|
bpm : int [+]
|
|
div_len_factor : float [+] (the length multiplier of each segment, default is 1)
|
|
n_iter : int [+*] (the number of iterations, default is -1 (maps the whole music))
|
|
threshold : int [0, 100] (used by the filter function to only keep the largest threshold% of timing points, default is 0.5)
|
|
divisor : int [+] (beat divisor used to snap the notes, default is 4)
|
|
"""
|
|
|
|
filename = convert_to_wav(filename)
|
|
|
|
offset = offset0/1000
|
|
|
|
div_len = div_len_factor*60/bpm-0.01
|
|
|
|
n_iter = n_iter_2
|
|
song_len = get_songlen(filename)
|
|
|
|
if(n_iter == -1):
|
|
n_iter = int((song_len-offset/1000)/div_len)-1
|
|
|
|
filtered_name = f"{filename}_trimmed.wav"
|
|
|
|
void_freq(filename, offset, min(song_len, offset+div_len*(n_iter+1)+0.01), 4*60/bpm, minfreq=0, maxfreq=220, upperthr=5000, ampthr=60, ampfreq = 1200, ampval = 5.0, leniency = 0.005, write=True, linear=False, output_file=filtered_name)
|
|
|
|
datares = filter_n_percent_serial(filtered_name, offset, n_iter, div_len, threshold)
|
|
|
|
#snapped_data = amplitude
|
|
#times in ms
|
|
(snapped_data, times) = debug.snap3(datares, mintime=50, initial_plot=True, after_plot=True)
|
|
|
|
#frequencies=get_freq(filtered_name, offset, div_len, div_len*n_iter, snapped_data, True)
|
|
frequencies = get_freq(filtered_name, times, display=True)
|
|
|
|
Path(f"{filename}_trimmed.wav").unlink()
|
|
return snapped_data, times, frequencies
|
|
|
|
'''
|
|
datares = debug.snap2(datares, 44100, bpm, first_offset=offset, div=divisor, show=True, adjust=True)
|
|
frequencies = get_freq(filtered_name, offset, div_len, div_len*n_iter, datares, True)
|
|
Path(f"{filename}_trimmed.wav").unlink()
|
|
return convert_tuple(datares, frequencies)
|
|
'''
|
|
|
|
def main():
|
|
aa, bb, cc = process_song("tetris_4.wav", 160, n_iter_2=48)
|
|
#print(data)
|
|
print("Program finished with return 0")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|