from math import * import numpy as np from scipy.io import wavfile from scipy import signal import matplotlib.pyplot as plt import subprocess import wave as wv import struct import librosa print("Starting...\n") def fct(song_name, offset, increment, songlen, maxfreq, display): to_cut = 20000//maxfreq global_Zxx = np.array([]) global_f = np.array([]) global_t = np.array([]) current_time = offset k = 0 while(current_time <= songlen): subprocess.run(["ffmpeg", "-ss", str(current_time), "-t", str(increment), "-i", song_name, "crop.wav"]) sample_rate, audio_data = wavfile.read('crop.wav') size = audio_data.size subprocess.run(["clear"]) subprocess.run(["rm", "crop.wav"]) # do stuff here #f, t, Zxx = signal.stft(audio_data, sample_rate, nperseg=1000) f, t, Zxx = signal.spectrogram(audio_data, fs=sample_rate, nfft=size) leng = len(f) f, Zxx = f[:leng//to_cut], Zxx[:leng//to_cut] #print(len(Zxx)) #print(len(Zxx[0])) #convert to mel ''' for i in range(len(Zxx)): for j in range(len(Zxx[i])): Zxx[i][j] *= 1127*np.log(1+f[i]/700)''' t = np.array([current_time + x for x in t]) if(k == 0): global_f = f global_t = t global_Zxx = Zxx else: global_Zxx = np.concatenate((global_Zxx, Zxx), axis=1) global_t = np.concatenate((global_t, t)) #print(len(global_t)) k += 1 current_time = offset + k*increment print("Completion rate : ", np.round(100*(current_time-offset)/(songlen-offset), 4), "%") if(display): plt.pcolormesh(global_t, global_f, np.abs(global_Zxx), shading='gouraud') # print(len(global_Zxx), len(global_Zxx[0])) # 88 192 = 2500 # 70 192 = 2000 plt.title('STFT Magnitude') plt.ylabel('Frequency [Hz]') plt.xlabel('Time [sec]') plt.show() return global_t, global_f, np.abs(global_Zxx) def write_to_file(t, flist, maxlist, filename): file = open(filename, 'w') file.writelines('time,frequency,maxvalue\n') for i in range(len(t)): file.writelines(str(np.round(t[i], 3))) file.writelines(',') file.writelines(str(np.round(flist[i], 1))) file.writelines(',') file.writelines(str(np.round(maxlist[i], 0))) file.writelines('\n') #close(file) def plot_max(time, freq, Zxx, save): fres = [0 for x in range(len(time))] maxres = [0 for x in range(len(time))] for t in range(len(time)): subprocess.run(["clear"]) print(t, "/", len(time)) for f in range(len(Zxx)): if(maxres[t] < Zxx[f][t]): maxres[t] = Zxx[f][t] fres[t] = freq[f] if(save): write_to_file(time, fres, maxres, 'output.csv') ''' plt.plot(time, fres, 'r') plt.grid() plt.xlabel("Time") plt.ylabel("Maximum frequencies") plt.plot(time, maxres, 'g') plt.grid() plt.xlabel("Time") plt.ylabel("Maximun values") plt.show()''' fig, (ax1, ax2) = plt.subplots(2) fig.suptitle('Top : time and frequencies\nBottom : time and max values') ax1.plot(time, fres) ax2.plot(time, maxres) plt.show() t, f, Zxx = fct("worlds_end_3.wav", 160.889, 0.032, 170.889, 3000, False) #t, f, Zxx = fct("changing.wav", 0, 0.05, 3.9, 5000) #fct("worlds_end_3.wav", 75, (60/178)/4, 75+2, 2500) plot_max(t, f, Zxx, True) print("Program finished with return 0")