osutipe/sound_process.py

344 lines
11 KiB
Python
Executable File

from math import *
import numpy as np
from scipy.io import wavfile
from scipy import signal
import matplotlib.pyplot as plt
import subprocess
import wave as wv
import struct
import librosa
import heapq
import scipy
import os
import random
from pathlib import Path
from time import sleep
from datetime import timedelta
import debug
print("Starting...\n")
def filter_n_percent_serial(song_name, offset, n_iter, step, threshold):
"""
song_name : string
offset : int
n_iter : int (number of turns)
step : int (length of each small segment)
threshold : int (is in ]0, 100])
filter data associated with song_name to keep only the highest threshold% values
"""
subprocess.run(["ffmpeg", "-ss", str(offset), "-t", str(offset+step*n_iter), "-i", song_name, "crop.wav"], shell=False)
sample_rate, global_data = wavfile.read('crop.wav')
subprocess.run(["clear"], shell=False)
subprocess.run(["rm", "crop.wav"], shell=False)
for i in range(n_iter):
print(i, "/", n_iter)
#print(i * step)
song_data = global_data[int(i*step*sample_rate):int((i+1)*step*sample_rate)]
if(len(song_data) != 0):
mx = max(song_data)
is_locked = [False for i in range(len(song_data))]
x = int((len(song_data)*threshold)//100)
#print("X = ", x)
#print("Retreiving the", int(x), "/", len(song_data), "highest values")
elements = heapq.nlargest(int(x), enumerate(song_data), key=lambda x: x[1])
#print("Done")
for idx in range(len(elements)):
is_locked[elements[idx][0]] = True
for r in range(len(song_data)):
if(is_locked[r] == False):
global_data[r+int(i*step*sample_rate)] = 0
return global_data
def write_to_file_thr(sample_rate, song_data, offset, threshold, filename):
# write data to output file
file = open(filename, 'w')
file.writelines('time,amplitude\n')
mx = max(song_data)
print("writing to output...")
for i in range(len(song_data)):
if(i%(len(song_data)//50) == 0):
print(i, "/", len(song_data))
if(song_data[i]/mx > threshold):
file.writelines(str(np.round(offset + i/sample_rate, 3)))
file.writelines(',')
file.writelines(str(np.round(song_data[i], 0)))
file.writelines('\n')
def round_t(id, sample_rate, bpm, div, offset, k0):
k = k0
t = offset + k/(bpm*div)
while(t < id/sample_rate):
t = offset + k/(bpm*div)
k += 1
if(np.abs(t - id/sample_rate) < np.abs((t - 1/(bpm*div)) - id/sample_rate)):
return t
return (t - 1/(bpm*div), 0)
def compress(Zxx):
res = []
def get_freq(song_name, times, width=1000, display=False):
"""
for a given list of times (in seconds), returns the corresponding peak frequencies
"""
subprocess.run(["ffmpeg", "-ss", str(0), "-t", str(max(np.array(times))), "-i", song_name, "crop.wav"], shell=False)
sample_rate, global_data = wavfile.read(song_name)
#blit = int(sample_rate*step)
subprocess.run(["clear"], shell=False)
subprocess.run(["rm", "crop.wav"], shell=False)
pfreq = scipy.fft.rfftfreq(2*width, 1/sample_rate)
frequencies = [0 for s in range(len(times))]
print(len(pfreq))
for s in range(len(times)):
left = max(0, int(times[s]*44100)-width)
right = min(len(global_data), int(times[s]*44100)+width)
pff = scipy.fft.rfft(global_data[left:right])
#print(len(pff), len(pfreq))
mx = max(np.abs(pff))
for id in range(len(pff)):
if frequencies[s] == 0 and np.abs(pff[id]) == mx:
frequencies[s] = pfreq[id]
if(display):
plt.plot(times, frequencies)
plt.grid()
plt.xlabel("Time (s)")
plt.ylabel("Dominant frequency (Hz)")
plt.title("Dominant frequencies at peaks")
plt.show()
return frequencies
def is_data_stereo(raw_global_data:list) -> bool:
"""
raw_global_data : list
"""
try:
assert(raw_global_data[0][0])
except IndexError:
return False
except AssertionError:
return True
return True
def void_freq(song_name, offset, songlen, increment, minfreq, maxfreq, upperthr, ampthr, ampfreq, ampval, leniency, write, linear, output_file="trimmed.wav"):
"""
song_name : string
offset : int
songlen : int (length of the part that will be filtered, starting from offset)
increment : float (technical parameter)
minfreq and maxfreq : every frequency in [minfreq, maxfreq] will be voided
upperthr : every frequency above upperthr will be voided
ampthr : every frequency with amplitude under MAX/ampthr (aka amplitudes under (100/ampthr)% of the max will be voided
ampfreq, leniency (if linear is false), linear : technical parameters
ampval : int
- if linear is false, then this willbe the maximum amplification possible
- if linear is true, this is the multiplier (Amp <- Amp * (ampval * frequency + leniency))
write : bool (should be set to True)
output_file : technical
"""
fft_list = []
times = []
current_time = offset
k = 0
subprocess.run(["ffmpeg", "-ss", str(offset), "-t", str(songlen+offset), "-i", song_name, "crop.wav"], shell=False)
sample_rate, raw_global_data = wavfile.read("crop.wav")
blit = int(sample_rate*increment)
global_data = [0 for i in range(len(raw_global_data))]
#subprocess.run(["clear"])
subprocess.run(["rm", "crop.wav"], shell=False)
a = 0
if(is_data_stereo(raw_global_data)):
print("Converting to mono...")
for x in range(len(raw_global_data)):
global_data[x] = raw_global_data[x][0]/2 + raw_global_data[x][1]/2
if(x % (int(len(raw_global_data)/100)) == 0):
print(a, "/ 100")
a += 1
else:
global_data = raw_global_data
#print("Blit :", blit)
pfreq = scipy.fft.rfftfreq(blit, 1/sample_rate)
#print(len(pfreq))
while(current_time <= songlen):
pff = scipy.fft.rfft(global_data[k*blit:(k+1)*blit])
fft_list.append(pff)
times.append(k*increment)
k += 1
current_time = offset + k*increment
print("FFT :", len(fft_list), "\nFFT[0] :", len(fft_list[0]), "\npfreq :", len(pfreq))
print("Finding global max...")
if(linear == False):
for i in range(len(fft_list)):
for j in range(len(fft_list[i])):
fft_list[i][j] *= (1 + ampval/max(1, np.abs(pfreq[j] - ampfreq)))
else:
for i in range(len(fft_list)):
for j in range(len(fft_list[i])):
fft_list[i][j] *= (ampval*pfreq[j] + leniency)
print("Trimming...")
for i in range(len(fft_list)):
lmax = 0
for j in range(len(fft_list[i])):
if(np.abs(fft_list[i][j]) > lmax):
lmax = np.abs(fft_list[i][j])
for j in range(len(fft_list[i])):
if((pfreq[j] >= minfreq and pfreq[j] < maxfreq) or pfreq[j] > upperthr):
fft_list[i][j] = 0+0j
if(np.abs(fft_list[i][j]) < lmax/ampthr):
fft_list[i][j] = 0+0j
if(write):
res = []
print("Converting...")
for i in range(len(fft_list)):
ift = scipy.fft.irfft(fft_list[i], n=blit)
for k in ift:
res.append(k)
#print(type(res[0]))
mx = 0
for j in range(len(res)):
if(res[j] > mx):
mx = res[j]
for i in range(len(res)):
res[i] = np.int16(32767*res[i]/mx)
res = np.array(res)
wavfile.write(output_file, 44100, res)
#plt.plot(np.abs(pfreq[:len(fft_list[0])]), np.abs(fft_list[0]))
#plt.grid()
#plt.show()
print("Done")
def convert_tuple(data, times):
"""
Takes data and converts it to a list of tuples (amplitude, datetimes)
"""
return [(times[i], data[i]) for i in range(len(data))]
def get_songlen(filename):
"""
retrieves the length of the song in seconds
"""
sample_rate, global_data = wavfile.read(filename)
print("LEN :", len(global_data)/sample_rate)
return (len(global_data)/sample_rate)
def convert_to_wav(song_name:str, output_file="audio.wav") -> str:
"""
Converts the song to .wav, only if it's not already in wave format.
Currently relies on file extension.
Returns: the song_name that should be used afterwards.
"""
extension = Path(song_name).suffix
match extension:
case ".mp3" | ".ogg":
print("Converting to .wav...")
subprocess.run(["ffmpeg", "-y", "-i", song_name, output_file], shell=False)
return output_file
return song_name
def process_song(filename, bpm, offset0=0, div_len_factor=1, n_iter_2=-1, threshold=0.5, divisor=4):
"""
filename : string (name of the song)
offset : int [+] (song mapping will start from this time in seconds, default is 0)
bpm : int [+]
div_len_factor : float [+] (the length multiplier of each segment, default is 1)
n_iter : int [+*] (the number of iterations, default is -1 (maps the whole music))
threshold : int [0, 100] (used by the filter function to only keep the largest threshold% of timing points, default is 0.5)
divisor : int [+] (beat divisor used to snap the notes, default is 4)
"""
filename = convert_to_wav(filename)
offset = offset0/1000
div_len = div_len_factor*60/bpm-0.01
n_iter = n_iter_2
song_len = get_songlen(filename)
if(n_iter == -1):
n_iter = int((song_len-offset/1000)/div_len)-1
filtered_name = f"{filename}_trimmed.wav"
void_freq(filename, offset, min(song_len, offset+div_len*(n_iter+1)+0.01), 4*60/bpm, minfreq=0, maxfreq=220, upperthr=5000, ampthr=60, ampfreq = 1200, ampval = 5.0, leniency = 0.005, write=True, linear=False, output_file=filtered_name)
datares = filter_n_percent_serial(filtered_name, offset, n_iter, div_len, threshold)
#snapped_data = amplitude
#times in ms
(snapped_data, times) = debug.snap3(datares, mintime=50, initial_plot=True, after_plot=True)
#frequencies=get_freq(filtered_name, offset, div_len, div_len*n_iter, snapped_data, True)
frequencies = get_freq(filtered_name, times, display=True)
Path(f"{filename}_trimmed.wav").unlink()
return snapped_data, times, frequencies
'''
datares = debug.snap2(datares, 44100, bpm, first_offset=offset, div=divisor, show=True, adjust=True)
frequencies = get_freq(filtered_name, offset, div_len, div_len*n_iter, datares, True)
Path(f"{filename}_trimmed.wav").unlink()
return convert_tuple(datares, frequencies)
'''
def main():
aa, bb, cc = process_song("tetris_4.wav", 160, n_iter_2=48)
#print(data)
print("Program finished with return 0")
if __name__ == "__main__":
main()