-1

When i try to train this model and this model doenst even have many trainable parameters, in google colab it crashes because it uses all the ram, i have a dataset of mp3 files, each mp3 file is 128 kbps and 3-4 seconds long and i try to make an audio classifier and it just doesnt work, it takes too much ram to be trained and the training is also slow. Can anyone help me, please?. I use tensorflow 2.10.0 and tensorflow-io 0.27.0 because with newer versions are other problems.

import tensorflow as tf
import tensorflow_io as tfio
import os
import random
import joblib

BATCH_SIZE = 4
def load_sound(filename):
    res = tfio.audio.AudioIOTensor(filename, dtype=tf.float32)
    tensor = res.to_tensor()
    tensor = tf.math.reduce_sum(tensor,axis = 1) / 2

    sample_rate = res.rate

    sample_rate = tf.cast(sample_rate, dtype=tf.int64)

    wav = tfio.audio.resample(tensor, rate_in=sample_rate, rate_out=16000)

    return wav


base_dir = '/content/drive/MyDrive/house_sounds/sound_data'
folders = ['door', 'voice', 'glass', 'footsteps']
files = []

for folder in folders:
    folder_path = os.path.join(base_dir, folder)
    file_paths = tf.data.Dataset.list_files(os.path.join(folder_path, '*.mp3'))
    files.append(file_paths)

door = tf.data.Dataset.zip((files[0], tf.data.Dataset.from_tensor_slices(tf.fill([len(files[0])],0))))
voice = tf.data.Dataset.zip((files[1], tf.data.Dataset.from_tensor_slices(tf.fill([len(files[1])],1))))
glass = tf.data.Dataset.zip((files[2], tf.data.Dataset.from_tensor_slices(tf.fill([len(files[2])],2))))
footsteps = tf.data.Dataset.zip((files[3], tf.data.Dataset.from_tensor_slices(tf.fill([len(files[3])],3))))

data = door.concatenate(voice)
data = data.concatenate(glass)
data = data.concatenate(footsteps)

def create_spectrogram(file_path, label):
    wav = load_sound(file_path)
    wav = wav[:48000]
    zero_padding = tf.zeros([48000] - tf.shape(wav), dtype=tf.float32)
    wav = tf.concat([zero_padding, wav], 0)

    spectrogram = tf.signal.stft(wav, frame_length=320, frame_step=32)
    spectrogram = tf.abs(spectrogram)
    spectrogram = tf.expand_dims(spectrogram, axis=2)
    return spectrogram, label

data = data.map(create_spectrogram)
data = data.cache()
data = data.shuffle(buffer_size = 1000)
data = data.batch(4)
data = data.prefetch(8)
print('Len: ',len(data))
train = data.take(1600)
test = data.skip(1600).take(247)

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Dense, Flatten, MaxPooling2D

model = Sequential([
    Conv2D(16, (2,2), activation="relu", input_shape=(1491,257,1)),
    MaxPooling2D(pool_size=(5, 5)),
    Conv2D(32, (2, 2), activation='relu'),
    MaxPooling2D(pool_size=(5, 5)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(5, 5)),
    Flatten(),
    Dense(32, activation='relu'),
    Dense(4, activation="softmax")
])

model.compile(
    optimizer='Adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=['accuracy']
)
model.summary()
hist = model.fit(train, epochs=5, validation_data=test)

0

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.