{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "vOgNA2SVBlDA"
   },
   "outputs": [],
   "source": [
    "import json\n",
    "import random\n",
    "import math\n",
    "\n",
    "# ------------------------------\n",
    "# 1. Wczytanie danych treningowych\n",
    "# ------------------------------\n",
    "def load_email_data(spam_path, ham_path):\n",
    "    with open(spam_path, encoding=\"utf-8\") as f1, open(ham_path, encoding=\"utf-8\") as f2:\n",
    "        spam_data = json.load(f1)\n",
    "        ham_data = json.load(f2)\n",
    "    return spam_data + ham_data\n",
    "\n",
    "\n",
    "def train_test_split(data, test_ratio=0.2):\n",
    "    random.shuffle(data)\n",
    "    cut = int(len(data) * (1 - test_ratio))\n",
    "    return data[:cut], data[cut:]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "Qhn3rY-HDD7B"
   },
   "outputs": [],
   "source": [
    "\n",
    "# ------------------------------\n",
    "# 2. Trenowanie klasyfikatora Bayesa\n",
    "# ------------------------------\n",
    "def preprocess(text):\n",
    "    return text.lower().replace(\"–\", \" \").replace(\"-\", \" \").replace(\",\", \" \")\\\n",
    "        .replace(\".\", \" \").replace(\"!\", \" \").replace(\"?\", \" \").split()\n",
    "\n",
    "def train_naive_bayes(train_data, alpha=1.0):\n",
    "    class_counts = {}\n",
    "    word_counts = {}\n",
    "    total_words = {}\n",
    "\n",
    "    for rec in train_data:\n",
    "        label = rec[\"label\"]\n",
    "        class_counts[label] = class_counts.get(label, 0) + 1\n",
    "        word_counts.setdefault(label, {})\n",
    "        total_words.setdefault(label, 0)\n",
    "\n",
    "        words = preprocess(rec[\"text\"])\n",
    "        for word in words:\n",
    "            word_counts[label][word] = word_counts[label].get(word, 0) + 1\n",
    "            total_words[label] += 1\n",
    "\n",
    "    vocab = set()\n",
    "    for wc in word_counts.values():\n",
    "        vocab.update(wc.keys())\n",
    "\n",
    "    return {\n",
    "        \"class_counts\": class_counts,\n",
    "        \"word_counts\": word_counts,\n",
    "        \"total_words\": total_words,\n",
    "        \"vocab\": vocab,\n",
    "        \"alpha\": alpha,\n",
    "        \"total_docs\": len(train_data)\n",
    "    }\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "A-nSjvVJDI6V"
   },
   "outputs": [],
   "source": [
    "# ------------------------------\n",
    "# 3. Klasyfikacja wiadomości\n",
    "# ------------------------------\n",
    "def log_prob(model, words, class_name):\n",
    "    logp = math.log(model[\"class_counts\"][class_name] / model[\"total_docs\"])\n",
    "    V = len(model[\"vocab\"])\n",
    "    a = model[\"alpha\"]\n",
    "    for word in words:\n",
    "        wc = model[\"word_counts\"][class_name].get(word, 0)\n",
    "        logp += math.log((wc + a) / (model[\"total_words\"][class_name] + a * V))\n",
    "    return logp\n",
    "\n",
    "def predict(model, text):\n",
    "    words = preprocess(text)\n",
    "    best_class, best_log = None, -float(\"inf\")\n",
    "    for c in model[\"class_counts\"]:\n",
    "        lp = log_prob(model, words, c)\n",
    "        if lp > best_log:\n",
    "            best_class, best_log = c, lp\n",
    "    return best_class\n",
    "\n",
    "def evaluate_model(model, test_data):\n",
    "    correct = 0\n",
    "    for rec in test_data:\n",
    "        prediction = predict(model, rec[\"text\"])\n",
    "        if prediction == rec[\"label\"]:\n",
    "            correct += 1\n",
    "    accuracy = correct / len(test_data)\n",
    "    print(f\"Skuteczność na zbiorze testowym: {accuracy * 100:.2f}%\")\n",
    "    return accuracy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "id": "_lHbbpWnfkhy"
   },
   "outputs": [
    {
     "ename": "FileNotFoundError",
     "evalue": "[Errno 2] No such file or directory: 'credentials.json'",
     "output_type": "error",
     "traceback": [
      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
      "\u001b[31mFileNotFoundError\u001b[39m                         Traceback (most recent call last)",
      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 28\u001b[39m\n\u001b[32m     25\u001b[39m         \u001b[38;5;28mprint\u001b[39m(\u001b[33m\"\u001b[39m\u001b[33m•\u001b[39m\u001b[33m\"\u001b[39m, label[\u001b[33m'\u001b[39m\u001b[33mname\u001b[39m\u001b[33m'\u001b[39m])\n\u001b[32m     27\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[34m__name__\u001b[39m == \u001b[33m'\u001b[39m\u001b[33m__main__\u001b[39m\u001b[33m'\u001b[39m:\n\u001b[32m---> \u001b[39m\u001b[32m28\u001b[39m     \u001b[43mauthorize_and_save_token\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 14\u001b[39m, in \u001b[36mauthorize_and_save_token\u001b[39m\u001b[34m()\u001b[39m\n\u001b[32m     11\u001b[39m     \u001b[38;5;28mprint\u001b[39m(\u001b[33m\"\u001b[39m\u001b[33mPlik token.pkl już istnieje.\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m     12\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m14\u001b[39m flow = \u001b[43mInstalledAppFlow\u001b[49m\u001b[43m.\u001b[49m\u001b[43mfrom_client_secrets_file\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mcredentials.json\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mSCOPES\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m     15\u001b[39m creds = flow.run_local_server(port=\u001b[32m0\u001b[39m)\n\u001b[32m     17\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mopen\u001b[39m(\u001b[33m'\u001b[39m\u001b[33mtoken.pkl\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mwb\u001b[39m\u001b[33m'\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m token_file:\n",
      "\u001b[36mFile \u001b[39m\u001b[32m~\\AppData\\Roaming\\Python\\Python313\\site-packages\\google_auth_oauthlib\\flow.py:198\u001b[39m, in \u001b[36mFlow.from_client_secrets_file\u001b[39m\u001b[34m(cls, client_secrets_file, scopes, **kwargs)\u001b[39m\n\u001b[32m    183\u001b[39m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[32m    184\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mfrom_client_secrets_file\u001b[39m(\u001b[38;5;28mcls\u001b[39m, client_secrets_file, scopes, **kwargs):\n\u001b[32m    185\u001b[39m \u001b[38;5;250m    \u001b[39m\u001b[33;03m\"\"\"Creates a :class:`Flow` instance from a Google client secrets file.\u001b[39;00m\n\u001b[32m    186\u001b[39m \n\u001b[32m    187\u001b[39m \u001b[33;03m    Args:\u001b[39;00m\n\u001b[32m   (...)\u001b[39m\u001b[32m    196\u001b[39m \u001b[33;03m        Flow: The constructed Flow instance.\u001b[39;00m\n\u001b[32m    197\u001b[39m \u001b[33;03m    \"\"\"\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m198\u001b[39m     \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mclient_secrets_file\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mr\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m json_file:\n\u001b[32m    199\u001b[39m         client_config = json.load(json_file)\n\u001b[32m    201\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mcls\u001b[39m.from_client_config(client_config, scopes=scopes, **kwargs)\n",
      "\u001b[31mFileNotFoundError\u001b[39m: [Errno 2] No such file or directory: 'credentials.json'"
     ]
    }
   ],
   "source": [
    "#kod do uruchomienia lokalnie na komputerze\n",
    "import os\n",
    "import pickle\n",
    "from google_auth_oauthlib.flow import InstalledAppFlow\n",
    "from googleapiclient.discovery import build\n",
    "\n",
    "SCOPES = ['https://www.googleapis.com/auth/gmail.modify']\n",
    "\n",
    "def authorize_and_save_token():\n",
    "    if os.path.exists('token.pkl'):\n",
    "        print(\"Plik token.pkl już istnieje.\")\n",
    "        return\n",
    "\n",
    "    flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES)\n",
    "    creds = flow.run_local_server(port=0)\n",
    "\n",
    "    with open('token.pkl', 'wb') as token_file:\n",
    "        pickle.dump(creds, token_file)\n",
    "    print(\"Autoryzacja zakończona i token zapisany jako token.pkl.\")\n",
    "\n",
    "    service = build('gmail', 'v1', credentials=creds)\n",
    "    results = service.users().labels().list(userId='me').execute()\n",
    "    print(\"Twoje etykiety Gmail:\")\n",
    "    for label in results.get('labels', []):\n",
    "        print(\"•\", label['name'])\n",
    "\n",
    "if __name__ == '__main__':\n",
    "    authorize_and_save_token()\n",
    "\n",
    "\n",
    "\n"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "provenance": [
    {
     "file_id": "1cokMlQIQ9exQFDec8s44xi1ilHtD4CaV",
     "timestamp": 1747899172257
    },
    {
     "file_id": "1SGFz8WH-PhdgXvSxpq4vrp5cZvwUaD6j",
     "timestamp": 1747757956790
    }
   ]
  },
  "kernelspec": {
   "display_name": "Python 3",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}