You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

310 lines
9.8 KiB

  1. # *****************************
  2. # ***** Cleaned Imports *****
  3. # *****************************
  4. # standard libraries
  5. import os # interact with the operating system
  6. # third-party libraries for numerical and data processing
  7. import numpy as np # numeric calculations and array handling
  8. import pandas as pd # data manipulation and analysis
  9. # machine learning and deep learning libraries
  10. import tensorflow as tf # ML and DL framework
  11. from tensorflow.keras import layers, models # build neural network layers and models
  12. from tensorflow.keras.preprocessing import image_dataset_from_directory # load image datasets from directories
  13. from tensorflow.keras.models import Model # base model class in Keras
  14. from tensorflow.keras.regularizers import l2 # L2 regularization to prevent overfitting
  15. from tensorflow.keras.preprocessing.image import ImageDataGenerator # augment image data
  16. # image processing and visualization libraries
  17. import cv2 # image and video processing
  18. from PIL import Image # handle image files
  19. import matplotlib.pyplot as plt # create visual data representations
  20. # evaluation and tracking libraries
  21. from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay # model performance evaluation
  22. from sklearn.utils import class_weight # compute class weights for unbalanced data
  23. from sklearn import __version__ as sklearn_version # retrieve scikit-learn version
  24. # experiment tracking and HTTP requests
  25. import mlflow # track ML experiments
  26. import requests # perform HTTP requests
  27. import tkinter as tk
  28. from tkinter import filedialog
  29. import random
  30. # *****************************
  31. # ***** Extracting Library Versions *****
  32. # *****************************
  33. # define versions dictionary
  34. versions = {
  35. "os": "N/A", # os does not have accessible version
  36. "numpy": np.__version__,
  37. "tensorflow": tf.__version__,
  38. "matplotlib": matplotlib.__version__,
  39. "scikit-learn": sklearn_version,
  40. "PIL": Image.__version__,
  41. "cv2": cv2.__version__,
  42. "pandas": pd.__version__
  43. }
  44. # display library versions
  45. for lib, version in versions.items():
  46. print(f"{lib}: {version}")
  47. # *****************************
  48. # ***** Requirements File *****
  49. # *****************************
  50. # write dependencies to requirements.txt file
  51. requirements = """
  52. numpy==1.26.4
  53. tensorflow==2.10.0
  54. matplotlib==3.9.2
  55. scikit-learn==1.5.2
  56. Pillow==10.4.0
  57. opencv-python==4.10.0
  58. pandas==2.2.3
  59. """
  60. with open("requirements.txt", "w") as file:
  61. file.write(requirements.strip())
  62. # *****************************
  63. # ***** Load Data Directories *****
  64. # *****************************
  65. # define base and dataset directories
  66. base_dir = 'E:/Datas Champi'
  67. train_dir = os.path.join(base_dir, 'balanced_train')
  68. validation_dir = os.path.join(base_dir, 'balanced_validation')
  69. test_dir = os.path.join(base_dir, 'test')
  70. # check if each directory exists
  71. for directory in [train_dir, validation_dir, test_dir]:
  72. if not os.path.isdir(directory):
  73. raise FileNotFoundError(f"Directory not found: {directory}")
  74. # define parameters
  75. batch_size = 32 # batch size
  76. img_height = 224 # image height after resizing
  77. img_width = 224 # image width after resizing
  78. AUTOTUNE = tf.data.AUTOTUNE # prefetching optimization
  79. # *****************************
  80. # ***** Load Image Datasets *****
  81. # *****************************
  82. # load training dataset without rescaling
  83. train_ds = image_dataset_from_directory(
  84. train_dir,
  85. labels='inferred', # infer labels from sub-directory names
  86. label_mode='int', # use with 'sparse_categorical_crossentropy'
  87. batch_size=batch_size,
  88. image_size=(img_height, img_width), # resize images
  89. shuffle=True, # shuffle training data
  90. seed=123, # ensure reproducibility
  91. interpolation='bilinear' # resample image when resizing
  92. )
  93. validation_ds = image_dataset_from_directory(
  94. validation_dir,
  95. labels='inferred',
  96. label_mode='int',
  97. batch_size=batch_size,
  98. image_size=(img_height, img_width),
  99. shuffle=False, # do not shuffle validation data
  100. seed=123,
  101. interpolation='bilinear'
  102. )
  103. test_ds = image_dataset_from_directory(
  104. test_dir,
  105. labels='inferred',
  106. label_mode='int',
  107. batch_size=batch_size,
  108. image_size=(img_height, img_width),
  109. shuffle=False, # do not shuffle test data
  110. seed=123,
  111. interpolation='bilinear'
  112. )
  113. # *****************************
  114. # ***** Extract Class Names *****
  115. # *****************************
  116. # store class names before applying .prefetch()
  117. class_names = train_ds.class_names # list of class names
  118. num_classes = len(class_names) # total number of classes
  119. print(f"Detected classes ({num_classes}): {class_names}") # display detected classes
  120. # *****************************
  121. # ***** Compute Class Weights *****
  122. # *****************************
  123. # initialize class count array
  124. class_counts = np.zeros(num_classes, dtype=int)
  125. # count samples in each class
  126. for images, labels in train_ds:
  127. labels = labels.numpy() # convert labels to numpy array
  128. for label in labels:
  129. class_counts[label] += 1 # increment class count
  130. print(f"Samples per class: {class_counts}")
  131. # calculate class weights for handling imbalance
  132. y_train = np.concatenate([y.numpy() for x, y in train_ds], axis=0) # gather all labels
  133. class_weights = class_weight.compute_class_weight(
  134. class_weight='balanced',
  135. classes=np.unique(y_train),
  136. y=y_train
  137. )
  138. class_weights_dict = {i: weight for i, weight in enumerate(class_weights)}
  139. print(f"Class weights: {class_weights_dict}")
  140. # *****************************
  141. # ***** Visualize Class Distribution *****
  142. # *****************************
  143. # create figure
  144. plt.figure(figsize=(12, 10))
  145. # plot bar chart for image count per class
  146. plt.bar(class_names, class_counts, color='skyblue')
  147. # add plot title and labels
  148. plt.title('Image Count per Class', fontsize=16)
  149. plt.xlabel('Classes', fontsize=14)
  150. plt.ylabel('Image Count', fontsize=14)
  151. plt.xticks(rotation=45, ha='right')
  152. # annotate with image counts
  153. for index, count in enumerate(class_counts):
  154. plt.text(index, count + max(class_counts)*0.01, str(count), ha='center', va='bottom', fontsize=12)
  155. plt.tight_layout()
  156. plt.show()
  157. # *****************************
  158. # ***** Optimize Dataset Performance *****
  159. # *****************************
  160. # apply .prefetch() for dataset loading optimization
  161. train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)
  162. validation_ds = validation_ds.prefetch(buffer_size=AUTOTUNE)
  163. test_ds = test_ds.prefetch(buffer_size=AUTOTUNE)
  164. # *****************************
  165. # ***** Utility Functions *****
  166. # *****************************
  167. def make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=None):
  168. grad_model = Model(
  169. [model.inputs],
  170. [model.get_layer(last_conv_layer_name).output, model.output]
  171. )
  172. with tf.GradientTape() as tape:
  173. last_conv_layer_output, preds = grad_model(img_array)
  174. if pred_index is None:
  175. pred_index = tf.argmax(preds[0])
  176. class_channel = preds[:, pred_index]
  177. grads = tape.gradient(class_channel, last_conv_layer_output)
  178. pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
  179. last_conv_layer_output = last_conv_layer_output[0]
  180. heatmap = last_conv_layer_output @ pooled_grads[..., tf.newaxis]
  181. heatmap = tf.squeeze(heatmap)
  182. heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
  183. heatmap = heatmap.numpy()
  184. return heatmap
  185. def superimpose_heatmap(img, heatmap, alpha=0.4):
  186. heatmap = cv2.resize(heatmap, (img_width, img_height))
  187. heatmap = np.uint8(255 * heatmap)
  188. jet = plt.get_cmap("jet")
  189. jet_colors = jet(heatmap)[:, :, :3]
  190. jet_heatmap = jet_colors * 255
  191. jet_heatmap = jet_heatmap.astype(np.uint8)
  192. superimposed_img = jet_heatmap * alpha + np.array(img)
  193. superimposed_img = superimposed_img.astype(np.uint8)
  194. superimposed_img = Image.fromarray(superimposed_img)
  195. return superimposed_img
  196. # *****************************
  197. # ***** Build Optimized CNN Model *****
  198. # *****************************
  199. input_shape = (img_height, img_width, 3)
  200. model = models.Sequential([
  201. layers.Rescaling(1./255, input_shape=input_shape),
  202. layers.Conv2D(32, (3, 3), activation='relu', kernel_regularizer=l2(0.001)),
  203. layers.BatchNormalization(),
  204. layers.MaxPooling2D((2, 2)),
  205. layers.Conv2D(64, (3, 3), activation='relu', kernel_regularizer=l2(0.001)),
  206. layers.BatchNormalization(),
  207. layers.MaxPooling2D((2, 2)),
  208. layers.Conv2D(128, (3, 3), activation='relu', kernel_regularizer=l2(0.001)),
  209. layers.BatchNormalization(),
  210. layers.MaxPooling2D((2, 2)),
  211. layers.Conv2D(256, (3, 3), activation='relu', kernel_regularizer=l2(0.001)),
  212. layers.BatchNormalization(),
  213. layers.MaxPooling2D((2, 2)),
  214. layers.Dropout(0.3),
  215. layers.Conv2D(512, (3, 3), activation='relu', kernel_regularizer=l2(0.001)),
  216. layers.BatchNormalization(),
  217. layers.MaxPooling2D((2, 2)),
  218. layers.Dropout(0.4),
  219. layers.Conv2D(1024, (3, 3), activation='relu', kernel_regularizer=l2(0.001)),
  220. layers.BatchNormalization(),
  221. layers.GlobalMaxPooling2D(),
  222. layers.Dense(512, activation='relu', kernel_regularizer=l2(0.001)),
  223. layers.Dropout(0.5),
  224. layers.Dense(num_classes, activation='softmax')
  225. ])
  226. model.summary()
  227. # *****************************
  228. # ***** Compile Model *****
  229. # *****************************
  230. model.compile(
  231. optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
  232. loss='sparse_categorical_crossentropy',
  233. metrics=['accuracy']
  234. )
  235. # *****************************
  236. # ***** Callbacks Definition *****
  237. # *****************************
  238. callbacks = [
  239. tf.keras.callbacks.EarlyStopping(
  240. patience=10,
  241. restore_best_weights=True,
  242. monitor='val_loss'
  243. ),
  244. tf.keras.callbacks.ModelCheckpoint(
  245. 'best_model.h5',
  246. save_best_only=True,
  247. monitor='val_loss'
  248. )
  249. ]
  250. # *****************************
  251. # ***** Model Training *****
  252. # *****************************
  253. history = model.fit(
  254. train_ds,
  255. epochs=20,
  256. validation_data=validation_ds,
  257. callbacks=callbacks,
  258. class_weight=class_weights_dict
  259. )