In [1]:
from scapy.all import *
import numpy as np
import pandas as pd
import argparse
import os

inputfile = "data.pcap"
outputfile = "data.csv"

mac_to_device = {
    "44:65:0d:56:cc:d3": "Amazon Echo",
    "e0:76:d0:3f:00:ae": "August Doorbell Cam",
    "70:88:6b:10:0f:c6": "Awair air quality monitor",
    "b4:75:0e:ec:e5:a9": "Belkin Camera",
    "ec:1a:59:83:28:11": "Belkin Motion Sensor",
    "ec:1a:59:79:f4:89": "Belkin Switch",
    "74:6a:89:00:2e:25": "Blipcare BP Meter",
    "7c:70:bc:5d:5e:dc": "Canary Camera",
    "30:8c:fb:2f:e4:b2": "Dropcam",
    "6c:ad:f8:5e:e4:61": "Google Chromecast",
    "28:c2:dd:ff:a5:2d": "Hello Barbie",
    "70:5a:0f:e4:9b:c0": "HP Printer",
    "74:c6:3b:29:d7:1d": "iHome PowerPlug",
    "d0:73:d5:01:83:08": "LiFX Bulb",
    "18:b4:30:25:be:e4": "NEST Smoke Sensor",
    "70:ee:50:18:34:43": "Netatmo Camera",
    "70:ee:50:03:b8:ac": "Netatmo Weather station",
    "00:17:88:2b:9a:25": "Phillip Hue Lightbulb",
    "e0:76:d0:33:bb:85": "Pixstart photo frame",
    "88:4a:ea:31:66:9d": "Ring Door Bell",
    "00:16:6c:ab:6b:88": "Samsung Smart Cam",
    "d0:52:a8:00:67:5e": "Smart Things",
    "f4:f2:6d:93:51:f1": "TP-Link Camera",
    "50:c7:bf:00:56:39": "TP-Link Plug",
    "18:b7:9e:02:20:44": "Triby Speaker",
    "00:24:e4:10:ee:4c": "Withings Baby Monitor",
    "00:24:e4:1b:6f:96": "Withings Scale",
    "00:24:e4:20:28:c6": "Withings sleep sensor",
    "00:24:e4:11:18:a8": "Withings"
}

In [2]:
#read the pcap file and extract the features for each packet
all_packets = rdpcap(inputfile)

In [3]:
results = []
for packet in all_packets:
    size = len(packet)
    try:
        proto = packet.proto
    except AttributeError:
        proto = 0
    try:
        sport = packet.sport
        dport = packet.dport
    except AttributeError:
        sport = 0
        dport = 0

    proto = int(proto)
    sport = int(sport)
    dport = int(dport)

    if "Ether" in packet:
        eth_dst = packet["Ether"].dst
        if eth_dst in mac_to_device:
            classification = mac_to_device[eth_dst]
        else:
            classification = "other"
    else:
        classification = "other"

    metric = [proto,sport,dport,classification]
    results.append(metric)
results = (np.array(results)).T

In [4]:
# store the features in the dataframe
dataframe = pd.DataFrame({'protocl':results[0],'src':results[1],'dst':results[2],'classfication':results[3]})
columns = ['protocl','src','dst','classfication']

# save the dataframe to the csv file, if not exsit, create one.
if os.path.exists(outputfile):
    dataframe.to_csv(outputfile,index=False,sep=',',mode='a',columns = columns, header=False)
else:
    dataframe.to_csv(outputfile,index=False,sep=',',columns = columns)