heuzef
/
jan24_cds_mushrooms

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Import des librairies\n",
    "import pandas as pd\n",
    "import os\n",
    "\n",
    "# Repertoire des donnés\n",
    "data_path = '../../data/LAYER1/MO/MO/'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Data saved to ../../data/LAYER1/MO/dataset.csv\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>species_id</th>\n",
       "      <th>imgs_files</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2749</td>\n",
       "      <td>51775.jpg,48607.jpg,7283.jpg,56752.jpg,19683.j...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>15162</td>\n",
       "      <td>1340401.jpg,489065.jpg,635182.jpg,464456.jpg,5...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>50164</td>\n",
       "      <td>1483369.jpg,161806.jpg,541519.jpg,644907.jpg,8...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1540</td>\n",
       "      <td>905357.jpg,1376931.jpg,1573947.jpg,897180.jpg,...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1174</td>\n",
       "      <td>1565785.jpg,1196459.jpg,619643.jpg,888195.jpg,...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>373</td>\n",
       "      <td>735385.jpg,1029205.jpg,58108.jpg,400760.jpg,57...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>362</td>\n",
       "      <td>1022083.jpg,864049.jpg,1553692.jpg,727623.jpg,...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>42</td>\n",
       "      <td>377481.jpg,353396.jpg,17237.jpg,304456.jpg,280...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>344</td>\n",
       "      <td>57062.jpg,284982.jpg,497195.jpg,497192.jpg,517...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>39842</td>\n",
       "      <td>366700.jpg,56994.jpg,28073.jpg,370092.jpg,3037...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>330</td>\n",
       "      <td>262051.jpg,575138.jpg,97947.jpg,575143.jpg,554...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>63454</td>\n",
       "      <td>319811.jpg,43831.jpg,1467353.jpg,1467354.jpg,4...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>382</td>\n",
       "      <td>1368137.jpg,554254.jpg,1497568.jpg,248852.jpg,...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>29997</td>\n",
       "      <td>1322991.jpg,1453819.jpg,316370.jpg,609577.jpg,...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   species_id                                         imgs_files\n",
       "0        2749  51775.jpg,48607.jpg,7283.jpg,56752.jpg,19683.j...\n",
       "1       15162  1340401.jpg,489065.jpg,635182.jpg,464456.jpg,5...\n",
       "2       50164  1483369.jpg,161806.jpg,541519.jpg,644907.jpg,8...\n",
       "3        1540  905357.jpg,1376931.jpg,1573947.jpg,897180.jpg,...\n",
       "4        1174  1565785.jpg,1196459.jpg,619643.jpg,888195.jpg,...\n",
       "5         373  735385.jpg,1029205.jpg,58108.jpg,400760.jpg,57...\n",
       "6         362  1022083.jpg,864049.jpg,1553692.jpg,727623.jpg,...\n",
       "7          42  377481.jpg,353396.jpg,17237.jpg,304456.jpg,280...\n",
       "8         344  57062.jpg,284982.jpg,497195.jpg,497192.jpg,517...\n",
       "9       39842  366700.jpg,56994.jpg,28073.jpg,370092.jpg,3037...\n",
       "10        330  262051.jpg,575138.jpg,97947.jpg,575143.jpg,554...\n",
       "11      63454  319811.jpg,43831.jpg,1467353.jpg,1467354.jpg,4...\n",
       "12        382  1368137.jpg,554254.jpg,1497568.jpg,248852.jpg,...\n",
       "13      29997  1322991.jpg,1453819.jpg,316370.jpg,609577.jpg,..."
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Genere le nouveau fichier CSV qui n'incluent que les images processés avec succè\n",
    "data = []\n",
    "\n",
    "for species_folder in os.listdir(data_path):\n",
    "    folder_path = os.path.join(data_path, species_folder)\n",
    "    files = os.listdir(folder_path)\n",
    "    data.append({'species_id': species_folder, 'imgs_files': files})\n",
    "\n",
    "df = pd.DataFrame(data)\n",
    "df['imgs_files'] = df['imgs_files'].apply(','.join) # Convertir les array en chaîne de caractères, avec les éléments séparés par des virgules.\n",
    "\n",
    "\n",
    "output_path = '../../data/LAYER1/MO/dataset.csv'\n",
    "df.to_csv(output_path, index=False)\n",
    "\n",
    "print(f'Data saved to {output_path}')\n",
    "\n",
    "display(df)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}