# ***************************** # ***** Cleaned Imports ***** # ***************************** # standard libraries import os # interact with the operating system # third-party libraries for numerical and data processing import numpy as np # numeric calculations and array handling import pandas as pd # data manipulation and analysis # machine learning and deep learning libraries import tensorflow as tf # ML and DL framework from tensorflow.keras import layers, models # build neural network layers and models from tensorflow.keras.preprocessing import image_dataset_from_directory # load image datasets from directories from tensorflow.keras.models import Model # base model class in Keras from tensorflow.keras.regularizers import l2 # L2 regularization to prevent overfitting from tensorflow.keras.preprocessing.image import ImageDataGenerator # augment image data # image processing and visualization libraries import cv2 # image and video processing from PIL import Image # handle image files import matplotlib.pyplot as plt # create visual data representations # evaluation and tracking libraries from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay # model performance evaluation from sklearn.utils import class_weight # compute class weights for unbalanced data from sklearn import __version__ as sklearn_version # retrieve scikit-learn version # experiment tracking and HTTP requests import mlflow # track ML experiments import requests # perform HTTP requests import tkinter as tk from tkinter import filedialog import random # ***************************** # ***** Extracting Library Versions ***** # ***************************** # define versions dictionary versions = { "os": "N/A", # os does not have accessible version "numpy": np.__version__, "tensorflow": tf.__version__, "matplotlib": matplotlib.__version__, "scikit-learn": sklearn_version, "PIL": Image.__version__, "cv2": cv2.__version__, "pandas": pd.__version__ } # display library versions for lib, version in versions.items(): print(f"{lib}: {version}") # ***************************** # ***** Requirements File ***** # ***************************** # write dependencies to requirements.txt file requirements = """ numpy==1.26.4 tensorflow==2.10.0 matplotlib==3.9.2 scikit-learn==1.5.2 Pillow==10.4.0 opencv-python==4.10.0 pandas==2.2.3 """ with open("requirements.txt", "w") as file: file.write(requirements.strip()) # ***************************** # ***** Load Data Directories ***** # ***************************** # define base and dataset directories base_dir = 'E:/Datas Champi' train_dir = os.path.join(base_dir, 'balanced_train') validation_dir = os.path.join(base_dir, 'balanced_validation') test_dir = os.path.join(base_dir, 'test') # check if each directory exists for directory in [train_dir, validation_dir, test_dir]: if not os.path.isdir(directory): raise FileNotFoundError(f"Directory not found: {directory}") # define parameters batch_size = 32 # batch size img_height = 224 # image height after resizing img_width = 224 # image width after resizing AUTOTUNE = tf.data.AUTOTUNE # prefetching optimization # ***************************** # ***** Load Image Datasets ***** # ***************************** # load training dataset without rescaling train_ds = image_dataset_from_directory( train_dir, labels='inferred', # infer labels from sub-directory names label_mode='int', # use with 'sparse_categorical_crossentropy' batch_size=batch_size, image_size=(img_height, img_width), # resize images shuffle=True, # shuffle training data seed=123, # ensure reproducibility interpolation='bilinear' # resample image when resizing ) validation_ds = image_dataset_from_directory( validation_dir, labels='inferred', label_mode='int', batch_size=batch_size, image_size=(img_height, img_width), shuffle=False, # do not shuffle validation data seed=123, interpolation='bilinear' ) test_ds = image_dataset_from_directory( test_dir, labels='inferred', label_mode='int', batch_size=batch_size, image_size=(img_height, img_width), shuffle=False, # do not shuffle test data seed=123, interpolation='bilinear' ) # ***************************** # ***** Extract Class Names ***** # ***************************** # store class names before applying .prefetch() class_names = train_ds.class_names # list of class names num_classes = len(class_names) # total number of classes print(f"Detected classes ({num_classes}): {class_names}") # display detected classes # ***************************** # ***** Compute Class Weights ***** # ***************************** # initialize class count array class_counts = np.zeros(num_classes, dtype=int) # count samples in each class for images, labels in train_ds: labels = labels.numpy() # convert labels to numpy array for label in labels: class_counts[label] += 1 # increment class count print(f"Samples per class: {class_counts}") # calculate class weights for handling imbalance y_train = np.concatenate([y.numpy() for x, y in train_ds], axis=0) # gather all labels class_weights = class_weight.compute_class_weight( class_weight='balanced', classes=np.unique(y_train), y=y_train ) class_weights_dict = {i: weight for i, weight in enumerate(class_weights)} print(f"Class weights: {class_weights_dict}") # ***************************** # ***** Visualize Class Distribution ***** # ***************************** # create figure plt.figure(figsize=(12, 10)) # plot bar chart for image count per class plt.bar(class_names, class_counts, color='skyblue') # add plot title and labels plt.title('Image Count per Class', fontsize=16) plt.xlabel('Classes', fontsize=14) plt.ylabel('Image Count', fontsize=14) plt.xticks(rotation=45, ha='right') # annotate with image counts for index, count in enumerate(class_counts): plt.text(index, count + max(class_counts)*0.01, str(count), ha='center', va='bottom', fontsize=12) plt.tight_layout() plt.show() # ***************************** # ***** Optimize Dataset Performance ***** # ***************************** # apply .prefetch() for dataset loading optimization train_ds = train_ds.prefetch(buffer_size=AUTOTUNE) validation_ds = validation_ds.prefetch(buffer_size=AUTOTUNE) test_ds = test_ds.prefetch(buffer_size=AUTOTUNE) # ***************************** # ***** Utility Functions ***** # ***************************** def make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=None): grad_model = Model( [model.inputs], [model.get_layer(last_conv_layer_name).output, model.output] ) with tf.GradientTape() as tape: last_conv_layer_output, preds = grad_model(img_array) if pred_index is None: pred_index = tf.argmax(preds[0]) class_channel = preds[:, pred_index] grads = tape.gradient(class_channel, last_conv_layer_output) pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2)) last_conv_layer_output = last_conv_layer_output[0] heatmap = last_conv_layer_output @ pooled_grads[..., tf.newaxis] heatmap = tf.squeeze(heatmap) heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap) heatmap = heatmap.numpy() return heatmap def superimpose_heatmap(img, heatmap, alpha=0.4): heatmap = cv2.resize(heatmap, (img_width, img_height)) heatmap = np.uint8(255 * heatmap) jet = plt.get_cmap("jet") jet_colors = jet(heatmap)[:, :, :3] jet_heatmap = jet_colors * 255 jet_heatmap = jet_heatmap.astype(np.uint8) superimposed_img = jet_heatmap * alpha + np.array(img) superimposed_img = superimposed_img.astype(np.uint8) superimposed_img = Image.fromarray(superimposed_img) return superimposed_img # ***************************** # ***** Build Optimized CNN Model ***** # ***************************** input_shape = (img_height, img_width, 3) model = models.Sequential([ layers.Rescaling(1./255, input_shape=input_shape), layers.Conv2D(32, (3, 3), activation='relu', kernel_regularizer=l2(0.001)), layers.BatchNormalization(), layers.MaxPooling2D((2, 2)), layers.Conv2D(64, (3, 3), activation='relu', kernel_regularizer=l2(0.001)), layers.BatchNormalization(), layers.MaxPooling2D((2, 2)), layers.Conv2D(128, (3, 3), activation='relu', kernel_regularizer=l2(0.001)), layers.BatchNormalization(), layers.MaxPooling2D((2, 2)), layers.Conv2D(256, (3, 3), activation='relu', kernel_regularizer=l2(0.001)), layers.BatchNormalization(), layers.MaxPooling2D((2, 2)), layers.Dropout(0.3), layers.Conv2D(512, (3, 3), activation='relu', kernel_regularizer=l2(0.001)), layers.BatchNormalization(), layers.MaxPooling2D((2, 2)), layers.Dropout(0.4), layers.Conv2D(1024, (3, 3), activation='relu', kernel_regularizer=l2(0.001)), layers.BatchNormalization(), layers.GlobalMaxPooling2D(), layers.Dense(512, activation='relu', kernel_regularizer=l2(0.001)), layers.Dropout(0.5), layers.Dense(num_classes, activation='softmax') ]) model.summary() # ***************************** # ***** Compile Model ***** # ***************************** model.compile( optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss='sparse_categorical_crossentropy', metrics=['accuracy'] ) # ***************************** # ***** Callbacks Definition ***** # ***************************** callbacks = [ tf.keras.callbacks.EarlyStopping( patience=10, restore_best_weights=True, monitor='val_loss' ), tf.keras.callbacks.ModelCheckpoint( 'best_model.h5', save_best_only=True, monitor='val_loss' ) ] # ***************************** # ***** Model Training ***** # ***************************** history = model.fit( train_ds, epochs=20, validation_data=validation_ds, callbacks=callbacks, class_weight=class_weights_dict )