You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

195 lines
6.2 KiB

  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 66,
  6. "metadata": {},
  7. "outputs": [],
  8. "source": [
  9. "# Import des librairies\n",
  10. "import pandas as pd\n",
  11. "import os\n",
  12. "\n",
  13. "# Repertoire des donnés\n",
  14. "data_path = '../../data/LAYER1/MO/MO/'"
  15. ]
  16. },
  17. {
  18. "cell_type": "code",
  19. "execution_count": 68,
  20. "metadata": {},
  21. "outputs": [
  22. {
  23. "name": "stdout",
  24. "output_type": "stream",
  25. "text": [
  26. "Data saved to ../../data/LAYER1/MO/dataset.csv\n"
  27. ]
  28. },
  29. {
  30. "data": {
  31. "text/html": [
  32. "<div>\n",
  33. "<style scoped>\n",
  34. " .dataframe tbody tr th:only-of-type {\n",
  35. " vertical-align: middle;\n",
  36. " }\n",
  37. "\n",
  38. " .dataframe tbody tr th {\n",
  39. " vertical-align: top;\n",
  40. " }\n",
  41. "\n",
  42. " .dataframe thead th {\n",
  43. " text-align: right;\n",
  44. " }\n",
  45. "</style>\n",
  46. "<table border=\"1\" class=\"dataframe\">\n",
  47. " <thead>\n",
  48. " <tr style=\"text-align: right;\">\n",
  49. " <th></th>\n",
  50. " <th>species_id</th>\n",
  51. " <th>imgs_files</th>\n",
  52. " </tr>\n",
  53. " </thead>\n",
  54. " <tbody>\n",
  55. " <tr>\n",
  56. " <th>0</th>\n",
  57. " <td>2749</td>\n",
  58. " <td>51775.jpg,48607.jpg,7283.jpg,56752.jpg,19683.j...</td>\n",
  59. " </tr>\n",
  60. " <tr>\n",
  61. " <th>1</th>\n",
  62. " <td>15162</td>\n",
  63. " <td>1340401.jpg,489065.jpg,635182.jpg,464456.jpg,5...</td>\n",
  64. " </tr>\n",
  65. " <tr>\n",
  66. " <th>2</th>\n",
  67. " <td>50164</td>\n",
  68. " <td>1483369.jpg,161806.jpg,541519.jpg,644907.jpg,8...</td>\n",
  69. " </tr>\n",
  70. " <tr>\n",
  71. " <th>3</th>\n",
  72. " <td>1540</td>\n",
  73. " <td>905357.jpg,1376931.jpg,1573947.jpg,897180.jpg,...</td>\n",
  74. " </tr>\n",
  75. " <tr>\n",
  76. " <th>4</th>\n",
  77. " <td>1174</td>\n",
  78. " <td>1565785.jpg,1196459.jpg,619643.jpg,888195.jpg,...</td>\n",
  79. " </tr>\n",
  80. " <tr>\n",
  81. " <th>5</th>\n",
  82. " <td>373</td>\n",
  83. " <td>735385.jpg,1029205.jpg,58108.jpg,400760.jpg,57...</td>\n",
  84. " </tr>\n",
  85. " <tr>\n",
  86. " <th>6</th>\n",
  87. " <td>362</td>\n",
  88. " <td>1022083.jpg,864049.jpg,1553692.jpg,727623.jpg,...</td>\n",
  89. " </tr>\n",
  90. " <tr>\n",
  91. " <th>7</th>\n",
  92. " <td>42</td>\n",
  93. " <td>377481.jpg,353396.jpg,17237.jpg,304456.jpg,280...</td>\n",
  94. " </tr>\n",
  95. " <tr>\n",
  96. " <th>8</th>\n",
  97. " <td>344</td>\n",
  98. " <td>57062.jpg,284982.jpg,497195.jpg,497192.jpg,517...</td>\n",
  99. " </tr>\n",
  100. " <tr>\n",
  101. " <th>9</th>\n",
  102. " <td>39842</td>\n",
  103. " <td>366700.jpg,56994.jpg,28073.jpg,370092.jpg,3037...</td>\n",
  104. " </tr>\n",
  105. " <tr>\n",
  106. " <th>10</th>\n",
  107. " <td>330</td>\n",
  108. " <td>262051.jpg,575138.jpg,97947.jpg,575143.jpg,554...</td>\n",
  109. " </tr>\n",
  110. " <tr>\n",
  111. " <th>11</th>\n",
  112. " <td>63454</td>\n",
  113. " <td>319811.jpg,43831.jpg,1467353.jpg,1467354.jpg,4...</td>\n",
  114. " </tr>\n",
  115. " <tr>\n",
  116. " <th>12</th>\n",
  117. " <td>382</td>\n",
  118. " <td>1368137.jpg,554254.jpg,1497568.jpg,248852.jpg,...</td>\n",
  119. " </tr>\n",
  120. " <tr>\n",
  121. " <th>13</th>\n",
  122. " <td>29997</td>\n",
  123. " <td>1322991.jpg,1453819.jpg,316370.jpg,609577.jpg,...</td>\n",
  124. " </tr>\n",
  125. " </tbody>\n",
  126. "</table>\n",
  127. "</div>"
  128. ],
  129. "text/plain": [
  130. " species_id imgs_files\n",
  131. "0 2749 51775.jpg,48607.jpg,7283.jpg,56752.jpg,19683.j...\n",
  132. "1 15162 1340401.jpg,489065.jpg,635182.jpg,464456.jpg,5...\n",
  133. "2 50164 1483369.jpg,161806.jpg,541519.jpg,644907.jpg,8...\n",
  134. "3 1540 905357.jpg,1376931.jpg,1573947.jpg,897180.jpg,...\n",
  135. "4 1174 1565785.jpg,1196459.jpg,619643.jpg,888195.jpg,...\n",
  136. "5 373 735385.jpg,1029205.jpg,58108.jpg,400760.jpg,57...\n",
  137. "6 362 1022083.jpg,864049.jpg,1553692.jpg,727623.jpg,...\n",
  138. "7 42 377481.jpg,353396.jpg,17237.jpg,304456.jpg,280...\n",
  139. "8 344 57062.jpg,284982.jpg,497195.jpg,497192.jpg,517...\n",
  140. "9 39842 366700.jpg,56994.jpg,28073.jpg,370092.jpg,3037...\n",
  141. "10 330 262051.jpg,575138.jpg,97947.jpg,575143.jpg,554...\n",
  142. "11 63454 319811.jpg,43831.jpg,1467353.jpg,1467354.jpg,4...\n",
  143. "12 382 1368137.jpg,554254.jpg,1497568.jpg,248852.jpg,...\n",
  144. "13 29997 1322991.jpg,1453819.jpg,316370.jpg,609577.jpg,..."
  145. ]
  146. },
  147. "metadata": {},
  148. "output_type": "display_data"
  149. }
  150. ],
  151. "source": [
  152. "# Genere le nouveau fichier CSV qui n'incluent que les images processés avec succè\n",
  153. "data = []\n",
  154. "\n",
  155. "for species_folder in os.listdir(data_path):\n",
  156. " folder_path = os.path.join(data_path, species_folder)\n",
  157. " files = os.listdir(folder_path)\n",
  158. " data.append({'species_id': species_folder, 'imgs_files': files})\n",
  159. "\n",
  160. "df = pd.DataFrame(data)\n",
  161. "df['imgs_files'] = df['imgs_files'].apply(','.join) # Convertir les array en chaîne de caractères, avec les éléments séparés par des virgules.\n",
  162. "\n",
  163. "\n",
  164. "output_path = '../../data/LAYER1/MO/dataset.csv'\n",
  165. "df.to_csv(output_path, index=False)\n",
  166. "\n",
  167. "print(f'Data saved to {output_path}')\n",
  168. "\n",
  169. "display(df)"
  170. ]
  171. }
  172. ],
  173. "metadata": {
  174. "kernelspec": {
  175. "display_name": "Python 3",
  176. "language": "python",
  177. "name": "python3"
  178. },
  179. "language_info": {
  180. "codemirror_mode": {
  181. "name": "ipython",
  182. "version": 3
  183. },
  184. "file_extension": ".py",
  185. "mimetype": "text/x-python",
  186. "name": "python",
  187. "nbconvert_exporter": "python",
  188. "pygments_lexer": "ipython3",
  189. "version": "3.11.9"
  190. }
  191. },
  192. "nbformat": 4,
  193. "nbformat_minor": 2
  194. }