IdealRMT-DecisionTrees/ExtractDataset.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "97e76d73",
   "metadata": {},
   "outputs": [],
   "source": [
    "from scapy.all import *\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import argparse\n",
    "import os\n",
    "from labels import mac_to_label\n",
    "\n",
    "inputfile = \"data.pcap\"\n",
    "outputfile = \"data.csv\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "119623a5",
   "metadata": {},
   "outputs": [],
   "source": [
    "#read the pcap file and extract the features for each packet\n",
    "all_packets = rdpcap(inputfile)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "f5584562",
   "metadata": {},
   "outputs": [],
   "source": [
    "results = []\n",
    "for packet in all_packets:\n",
    "    size = len(packet)\n",
    "    try:\n",
    "        proto = packet.proto\n",
    "    except AttributeError:\n",
    "        proto = 0\n",
    "    try:\n",
    "        sport = packet.sport\n",
    "        dport = packet.dport\n",
    "    except AttributeError:\n",
    "        sport = 0\n",
    "        dport = 0\n",
    "\n",
    "    proto = int(proto)\n",
    "    sport = int(sport)\n",
    "    dport = int(dport)\n",
    "\n",
    "    if \"Ether\" in packet:\n",
    "        eth_dst = packet[\"Ether\"].dst\n",
    "        if eth_dst in mac_to_label:\n",
    "            classification = mac_to_label[eth_dst]\n",
    "        else:\n",
    "            classification = \"other\"\n",
    "    else:\n",
    "        classification = \"other\"\n",
    "\n",
    "    metric = [proto,sport,dport,classification]\n",
    "    results.append(metric)\n",
    "results = (np.array(results)).T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "2e04c2d1",
   "metadata": {},
   "outputs": [],
   "source": [
    "# store the features in the dataframe\n",
    "dataframe = pd.DataFrame({'protocl':results[0],'src':results[1],'dst':results[2],'classfication':results[3]})\n",
    "columns = ['protocl','src','dst','classfication']\n",
    "\n",
    "# save the dataframe to the csv file, if not exsit, create one.\n",
    "if os.path.exists(outputfile):\n",
    "    dataframe.to_csv(outputfile,index=False,sep=',',mode='a',columns = columns, header=False)\n",
    "else:\n",
    "    dataframe.to_csv(outputfile,index=False,sep=',',columns = columns)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}