Merge main into model-improvement

eggwhat · Jan 25, 2024 · 9772858 · 9772858
2 parents 84704ed + 7a91bd5
commit 9772858
Show file tree

Hide file tree

Showing 7 changed files with 269 additions and 12 deletions.
diff --git a/.gitignore b/.gitignore
@@ -160,4 +160,5 @@ cython_debug/
 #.idea/
 
 # Custom
-train/data/imdb_crop
+train/data/imdb_crop
+train/face_detection/data
diff --git a/backend/api.py b/backend/api.py
@@ -1,7 +1,7 @@
 from fastapi import FastAPI, File, UploadFile, HTTPException, WebSocket
 from fastapi.responses import StreamingResponse, JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
-from core.face_detection import detect_faces
+from core.face_detection import detect_faces, detect_faces_video, apply_bounding_box
 from core.video_processing import generate_video, process_video
 from PIL import Image
 from typing import List
@@ -53,13 +53,19 @@ async def get_models():
 @app.websocket("/detect-age/ws")
 async def detect_age_single(websocket: WebSocket):
     await websocket.accept()
+
+    frame_counter = 0
+    detected_faces = None
     while True:
         data = await websocket.receive_text()
         decoded_data = base64.b64decode(data)
         frame = cv2.imdecode(np.frombuffer(decoded_data, dtype=np.uint8), 1)
-        detect_faces(frame, Resnet_7CModel)
+        if frame_counter % 3 == 0:
+            detected_faces = detect_faces_video(frame, Resnet_7CModel)
+        frame = apply_bounding_box(frame, detected_faces)
         _, encoded_frame = cv2.imencode('.jpg', frame)
         image = base64.b64encode(encoded_frame.tobytes()).decode('utf-8')
+        frame_counter += 1
         await websocket.send_text(image)
 
 
@@ -88,7 +94,6 @@ def detect_age_multiple(files: List[UploadFile] = File(...)):
             for image in images:
                 zipf.writestr(image["name"], image["content"])
 
-
         zip_file.seek(0)
         return StreamingResponse(iter([zip_file.getvalue()]), media_type="application/x-zip-compressed",
                                  headers={"Content-Disposition": f"attachment; filename=images.zip"})
@@ -107,7 +112,7 @@ async def detect_age_video(file: UploadFile = File(...)):
         with open(file_path, "wb") as video_file:
             shutil.copyfileobj(file.file, video_file)
 
-        frames, frame_rate = process_video(file_path, detect_faces, Resnet_7CModel)
+        frames, frame_rate = process_video(file_path, detect_faces_video, apply_bounding_box, Resnet_7CModel)
 
         video_bytes = generate_video(frames, temp_dir, frame_rate)
 

diff --git a/backend/core/face_detection.py b/backend/core/face_detection.py
@@ -9,7 +9,7 @@ def detect_faces(img, model):
         cv2.data.haarcascades + "haarcascade_frontalface_default.xml"  # pretrained model
     )
     faces = face_classifier.detectMultiScale(
-        gray_image, scaleFactor=1.1, minNeighbors=5, minSize=(40, 40)
+        gray_image, scaleFactor=1.1, minNeighbors=6, minSize=(40, 40)
     )
 
     for (x, y, w, h) in faces:
@@ -21,3 +21,30 @@ def detect_faces(img, model):
     return {
         'faces': faces
     }
+
+
+def detect_faces_video(img, model):
+    gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+
+    face_classifier = cv2.CascadeClassifier(
+        cv2.data.haarcascades + "haarcascade_frontalface_default.xml"  # pretrained model
+    )
+    faces = face_classifier.detectMultiScale(
+        gray_image, scaleFactor=1.1, minNeighbors=6, minSize=(40, 40)
+    )
+
+    predictions = []
+    for (x, y, w, h) in faces:
+        face_img = img[y:y+h, x:x+w]
+        predictions.append(model.predict(Image.fromarray(face_img).convert('RGB')))
+    return {
+        'faces': faces,
+        'predictions': predictions
+    }
+
+
+def apply_bounding_box(frame, detected_faces):
+    for (x, y, w, h), prediction in zip(detected_faces['faces'], detected_faces['predictions']):
+        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 4)
+        cv2.putText(frame, prediction, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
+    return frame
diff --git a/backend/core/video_processing.py b/backend/core/video_processing.py
@@ -9,8 +9,6 @@ def generate_video(frames, temp_dir, frame_rate):
     output_path = os.path.join(temp_dir, 'output.mp4')
     video_writer = cv2.VideoWriter(output_path, fourcc, frame_rate, (width, height))
 
-    frames = [np.array(frame) for frame in frames]
-
     for frame in frames:
         video_writer.write(frame)
 
@@ -20,18 +18,22 @@ def generate_video(frames, temp_dir, frame_rate):
         return output_file.read()
 
 
-def process_video(file_path, detect_faces, model):
+def process_video(file_path, detect_faces_video, apply_bounding_box, model):
     frames = []
     video = cv2.VideoCapture(file_path)
     frame_rate = int(video.get(cv2.CAP_PROP_FPS))
 
+    frame_counter = 0
+    detected_faces = None
     while True:
         ret, frame = video.read()
         if not ret:
             break
-
-        detect_faces(frame, model)
+        if frame_counter % 5 == 0:
+            detected_faces = detect_faces_video(frame, model)
+        frame = apply_bounding_box(frame, detected_faces)
         frames.append(frame)
+        frame_counter += 1
 
     video.release()
     return frames, frame_rate
diff --git a/frontend/age-detection/src/App.js b/frontend/age-detection/src/App.js
@@ -35,7 +35,7 @@ function App() {
         if (!isPredictionReceived) {
             const interval = setInterval(() => {
                 captureAndSendFrame(webcamRef, ws);
-            }, 1000 / 5);
+            }, 1000 / 10);
 
             return () => clearInterval(interval);
         }

diff --git a/train/face_detection/evaluation.py b/train/face_detection/evaluation.py
@@ -0,0 +1,57 @@
+import pandas as pd
+import os
+import cv2 
+
+def detect_faces(path_to_image, minNeighbors = 5):
+    img = cv2.imread(path_to_image)
+    gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+
+    face_classifier = cv2.CascadeClassifier(
+        cv2.data.haarcascades + "haarcascade_frontalface_default.xml" # pretrained model
+    )
+    faces = face_classifier.detectMultiScale(
+        gray_image, scaleFactor=1.1, minNeighbors=minNeighbors, minSize=(40, 40)
+    )
+    crops = []
+    for (x, y, w, h) in faces:
+        cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 4)
+        crops.append(img[y:y+h, x:x+w])
+    return {
+        'result': cv2.cvtColor(img, cv2.COLOR_BGR2RGB),
+        'faces': faces,
+        'crops': crops
+    }
+
+def evaluate(input_dir, df, minNeighbors = 5):
+    toReview = []
+    falsePositives = 0
+    notDetected = 0
+    notDetectedRows = []
+    for index, row in df.iterrows():
+        path_to_image = input_dir + '/images/' + row.image_name
+        detected = detect_faces(path_to_image, minNeighbors)
+        if len(detected['faces']) != row.faces_n:
+            toReview.append({ 'row': row, 'output': detected})
+        if len(detected['faces']) > row.faces_n:
+            falsePositives = falsePositives + (len(detected['faces']) - row.faces_n)
+        if len(detected['faces']) < row.faces_n:
+            notDetected = notDetected + (row.faces_n - len(detected['faces']))
+            notDetectedRows.append({'row': row, 'output': detected})
+
+    return {
+        'good': len(df) - len(toReview),
+        'toReview': toReview,
+        'falsePositives': falsePositives,
+        'notDetected': notDetected,
+        'notDetectedRows': notDetectedRows
+    }
+
+
+
+if __name__ == "__main__":
+    input_dir = os.path.join('', 'data')
+    m_path = input_dir + '/faces.csv'  # change this to your metadata file
+    df_faces = pd.read_csv(m_path)
+    df_faces = df_faces.groupby('image_name').size().reset_index(name='faces_n')
+    print(f"Amount of images: {len(df_faces)}")
+    # evaluate(input_dir, df_faces)
diff --git a/train/face_detection/face_detection_evaluation.ipynb b/train/face_detection/face_detection_evaluation.ipynb