When i try to train this model and this model doenst even have many trainable parameters, in google colab it crashes because it uses all the ram, i have a dataset of mp3 files, each mp3 file is 128 kbps and 3-4 seconds long and i try to make an audio classifier and it just doesnt work, it takes too much ram to be trained and the training is also slow. Can anyone help me, please?. I use tensorflow 2.10.0 and tensorflow-io 0.27.0 because with newer versions are other problems.
import tensorflow as tf
import tensorflow_io as tfio
import os
import random
import joblib
BATCH_SIZE = 4
def load_sound(filename):
res = tfio.audio.AudioIOTensor(filename, dtype=tf.float32)
tensor = res.to_tensor()
tensor = tf.math.reduce_sum(tensor,axis = 1) / 2
sample_rate = res.rate
sample_rate = tf.cast(sample_rate, dtype=tf.int64)
wav = tfio.audio.resample(tensor, rate_in=sample_rate, rate_out=16000)
return wav
base_dir = '/content/drive/MyDrive/house_sounds/sound_data'
folders = ['door', 'voice', 'glass', 'footsteps']
files = []
for folder in folders:
folder_path = os.path.join(base_dir, folder)
file_paths = tf.data.Dataset.list_files(os.path.join(folder_path, '*.mp3'))
files.append(file_paths)
door = tf.data.Dataset.zip((files[0], tf.data.Dataset.from_tensor_slices(tf.fill([len(files[0])],0))))
voice = tf.data.Dataset.zip((files[1], tf.data.Dataset.from_tensor_slices(tf.fill([len(files[1])],1))))
glass = tf.data.Dataset.zip((files[2], tf.data.Dataset.from_tensor_slices(tf.fill([len(files[2])],2))))
footsteps = tf.data.Dataset.zip((files[3], tf.data.Dataset.from_tensor_slices(tf.fill([len(files[3])],3))))
data = door.concatenate(voice)
data = data.concatenate(glass)
data = data.concatenate(footsteps)
def create_spectrogram(file_path, label):
wav = load_sound(file_path)
wav = wav[:48000]
zero_padding = tf.zeros([48000] - tf.shape(wav), dtype=tf.float32)
wav = tf.concat([zero_padding, wav], 0)
spectrogram = tf.signal.stft(wav, frame_length=320, frame_step=32)
spectrogram = tf.abs(spectrogram)
spectrogram = tf.expand_dims(spectrogram, axis=2)
return spectrogram, label
data = data.map(create_spectrogram)
data = data.cache()
data = data.shuffle(buffer_size = 1000)
data = data.batch(4)
data = data.prefetch(8)
print('Len: ',len(data))
train = data.take(1600)
test = data.skip(1600).take(247)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Dense, Flatten, MaxPooling2D
model = Sequential([
Conv2D(16, (2,2), activation="relu", input_shape=(1491,257,1)),
MaxPooling2D(pool_size=(5, 5)),
Conv2D(32, (2, 2), activation='relu'),
MaxPooling2D(pool_size=(5, 5)),
Conv2D(64, (3, 3), activation='relu'),
MaxPooling2D(pool_size=(5, 5)),
Flatten(),
Dense(32, activation='relu'),
Dense(4, activation="softmax")
])
model.compile(
optimizer='Adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(),
metrics=['accuracy']
)
model.summary()
hist = model.fit(train, epochs=5, validation_data=test)