Comparative Analysis of Machine Learning Algorithms for Anomaly Detection in IoT Networks Using CICIoT2023 Dataset

Main Article Content

AUGUSTO VICENTE

Abstract

Internet of Things (IoT) networks face increasing security threats due to their heterogeneous
nature and resource constraints. This study presents a comprehensive comparison of ten machine learning
algorithms for anomaly detection in IoT environments using the CICIoT2023 dataset. We evaluated six
supervised learning algorithms (Logistic Regression, Random Forest, Gradient Boosting, Linear SVC,
SGD Classifier, and MLP) and four unsupervised anomaly detection methods (Isolation Forest, SGD
One-Class SVM, Local Outlier Factor, and Elliptic Envelope) using a reproducible pipeline with Data
Version Control (DVC). Our methodology employs stratified sampling on 4.5 million records (97.7%
attacks, 2.3% benign), standardized preprocessing with 39 features, and binary classification. The ex-
perimental framework includes rigorous statistical validation through 705 experiments across multiple
hyperparameter configurations with 5 independent runs each. Given severe class imbalance, balanced
accuracy emerged as the critical metric, with ensemble methods (Gradient Boosting: 91.95%, Random
Forest: 91.89%) demonstrating 8-17 percentage point advantage over linear classifiers in minority class
detection. Gradient Boosting achieved highest F1-score (0.9964 ± 0.0004), while SGD-based methods
provided 200-600× faster training with competitive performance, suitable for resource-constrained de-
ployments. Bayesian statistical analysis confirmed significant performance differences across algorithm
families. This research establishes a rigorous baseline for algorithm selection in severely imbalanced IoT
intrusion detection systems.

Article Details

How to Cite
VICENTE, A. (2025). Comparative Analysis of Machine Learning Algorithms for Anomaly Detection in IoT Networks Using CICIoT2023 Dataset. INFOCOMP Journal of Computer Science, 24(2). Retrieved from https://infocomp.dcc.ufla.br/index.php/infocomp/article/view/5342
Section
Machine Learning and Computational Intelligence

References

% ============================================

% BIBLIOGRAPHY FOR ML-BASED IOT ANOMALY DETECTION PAPER

% Based on references from research schedule and literature review

% ============================================

% ============================================

% CICIoT2023 Dataset Reference - PRIMARY REFERENCE

% ============================================

@article{neto2023ciciot2023,

title={CICIoT2023: A Real-Time Dataset and Benchmark for Large-Scale Attacks in IoT Environment},

author={Neto, Euclides Carlos Pinto and Dadkhah, Sajjad and Ferreira, Raphael and Zohourian, Alireza and Lu, Rongxing and Ghorbani, Ali A.},

journal={Sensors},

year={2023},

volume={23},

number={13},

pages={5941},

doi={10.3390/s23135941},

publisher={MDPI}

}

@article{hamad2025systematic,

title={Systematic Analysis of Federated Learning Approaches for Intrusion Detection in the Internet of Things Environment},

author={Hamad, Nuha A and Bakar, Ka Abu and Qamar, Faizan and Jubair, Ahmed Mahdi and Mohamed, Rajina R and Mohamed, Mohamad Afendee},

journal={IEEE Access},

year={2025},

publisher={IEEE}

}

@article{fares2025machine,

title={Machine learning, Deep learning and Ensemble learning based approaches for intrusion detection enhancement},

author={Fares, Hajar and Zeroual, Mustapha and Karim, Abderrazek and Maleh, Yassine and Baddi, Youssef and Aknin, Noura},

journal={EDPACS},

volume={70},

number={1},

pages={31--51},

year={2025},

publisher={Taylor & Francis}

}

@article{okafor2024deep,

title={Deep learning in cybersecurity: Enhancing threat detection and response},

author={Okafor, Maureen Oluchukwuamaka},

journal={World Journal of Advanced Research and Reviews},

volume={24},

number={3},

pages={1116--1132},

year={2024}

}

@article{zhen2025anomaly,

title={Anomaly Detection Model in Network Security Situational Awareness based on Machine Learning: Limitation, Techniques, Future Trends},

author={Zhen, Li and Kamarudin, Nazhatul Hafizah and Kok, Ven Jyn and Qamar, Faizan},

journal={IEEE Access},

year={2025},

publisher={IEEE}

}

@article{parmar2025scaling,

title={Scaling Data Infrastructure for High-Volume Manufacturing: Challenges and Solutions in Big Data Engineering},

author={Parmar, Tarun},

journal={International Scientific Journal of Engineering and Management},

volume={4},

number={01},

pages={10--55041},

year={2025}

}

@article{reyes2025cybersecurity,

title={Cybersecurity Conceptual Framework Applied to Edge Computing and Internet of Things Environments},

author={Reyes-Acosta, Ricardo Emmanuel and Mendoza-Gonz{'a}lez, Ricardo and Oswaldo Diaz, Edgar and Vargas Martin, Miguel and Luna Rosas, Francisco Javier and Mart{'i}nez Romo, Julio C{'e}sar and Mendoza-Gonz{'a}lez, Alfredo},

journal={Electronics},

volume={14},

number={11},

pages={2109},

year={2025},

publisher={MDPI}

}

@article{mehmood2024advances,

title={Advances and vulnerabilities in modern cryptographic techniques: A comprehensive survey on cybersecurity in the domain of machine/deep learning and quantum techniques},

author={Mehmood, Abid and Shafique, Arslan and Alawida, Moatsum and Khan, Abdul Nasir},

journal={IEEE access},

volume={12},

pages={27530--27555},

year={2024},

publisher={IEEE}

}

@article{pandey2025lightweight,

title={A lightweight framework to secure IoT devices with limited resources in cloud environments},

author={Pandey, Vivek Kumar and Sahu, Dinesh and Prakash, Shiv and Rathore, Rajkumar Singh and Dixit, Pratibha and Hunko, Iryna},

journal={Scientific Reports},

volume={15},

number={1},

pages={26009},

year={2025},

publisher={Nature Publishing Group UK London}

}

@incollection{infantenabling,

title={Enabling Smart Cities: A Comprehensive Study of IoT and IIoT Integration in Diverse Industries},

author={Infant, DM Dhusnic and Priyanka, EB},

booktitle={Deep Learning and Blockchain Technology for Smart and Sustainable Cities},

pages={89--114},

publisher={Auerbach Publications}

}

@article{al2025comprehensive,

title={A comprehensive survey on deep learning-based intrusion detection systems in Internet of Things (IoT)},

author={Al-Haija, Qasem Abu and Droos, Ayat},

journal={Expert Systems},

volume={42},

number={2},

pages={e13726},

year={2025},

publisher={Wiley Online Library}

}

@article{iftikhar2025securing,

title={SECURING EDGE BASED SMART CITY NETWORKS WITH SOFTWARE DEFINED NETWORKING AND ZERO TRUST ARCHITECTURE},

author={Iftikhar, Abeer and Hussain, Faisal Bashir and Qureshi, Kashif Naseer and Shiraz, Muhammad and Sookhak, Mehdi},

journal={Journal of Network and Computer Applications},

pages={104341},

year={2025},

publisher={Elsevier}

}

% ============================================

% Machine Learning for IoT Security - General Surveys

% ============================================

@article{ahmad2021network,

title={Network intrusion detection system: A systematic study of machine learning and deep learning approaches},

author={Ahmad, Zeeshan and Shahid Khan, Adnan and Wai Shiang, Cheah and Abdullah, Johari and Ahmad, Farhan},

journal={Transactions on Emerging Telecommunications Technologies},

volume={32},

number={1},

pages={e4150},

year={2021},

publisher={Wiley Online Library}

}

@article{cook2020anomaly,

title={Anomaly detection for IoT time-series data: A survey},

author={Cook, Andrew A and Misirli, Goksel and Fan, Zhong},

journal={IEEE Internet of Things Journal},

volume={7},

number={7},

pages={6481--6494},

year={2020},

publisher={IEEE}

}

@article{benkhelifa2018critical,

title={A critical review of practices and challenges in intrusion detection systems for IoT: Toward universal and resilient systems},

author={Benkhelifa, Elhadj and Welsh, Thomas and Hamouda, Walid},

journal={IEEE Communications Surveys & Tutorials},

volume={20},

number={4},

pages={3496--3509},

year={2018},

publisher={IEEE}

}

% ============================================

% Supervised Learning for IDS

% ============================================

@article{breiman2001random,

title={Random Forests},

author={Breiman, Leo},

journal={Machine Learning},

volume={45},

number={1},

pages={5--32},

year={2001},

publisher={Springer},

doi={10.1023/A:1010933404324}

}

@article{friedman2001greedy,

title={Greedy Function Approximation: A Gradient Boosting Machine},

author={Friedman, Jerome H},

journal={Annals of Statistics},

volume={29},

number={5},

pages={1189--1232},

year={2001},

publisher={Institute of Mathematical Statistics},

doi={10.1214/aos/1013203451}

}

@article{cortes1995support,

title={Support-Vector Networks},

author={Cortes, Corinna and Vapnik, Vladimir},

journal={Machine Learning},

volume={20},

number={3},

pages={273--297},

year={1995},

publisher={Springer},

doi={10.1007/BF00994018}

}

@inproceedings{bottou2010large,

title={Large-Scale Machine Learning with Stochastic Gradient Descent},

author={Bottou, L{'e}on},

booktitle={Proceedings of COMPSTAT'2010},

pages={177--186},

year={2010},

organization={Springer},

doi={10.1007/978-3-7908-2604-3_16}

}

% ============================================

% Imbalanced Learning

% ============================================

@article{he2009learning,

title={Learning from Imbalanced Data},

author={He, Haibo and Garcia, Edwardo A},

journal={IEEE Transactions on Knowledge and Data Engineering},

volume={21},

number={9},

pages={1263--1284},

year={2009},

publisher={IEEE},

doi={10.1109/TKDE.2008.239}

}

% ============================================

% Unsupervised/Anomaly Detection Methods - CORE ALGORITHMS

% ============================================

@inproceedings{liu2008isolation,

author={Liu, Fei Tony and Ting, Kai Ming and Zhou, Zhi-Hua},

title={Isolation Forest},

booktitle={2008 Eighth IEEE International Conference on Data Mining},

year={2008},

pages={413--422},

publisher={IEEE Computer Society},

doi={10.1109/ICDM.2008.17}

}

@article{liu2012isolation,

author={Liu, Fei Tony and Ting, Kai Ming and Zhou, Zhi-Hua},

title={Isolation-Based Anomaly Detection},

journal={ACM Transactions on Knowledge Discovery from Data},

volume={6},

number={1},

pages={1--39},

year={2012},

publisher={ACM},

doi={10.1145/2133360.2133363}

}

@inproceedings{breunig2000lof,

author={Breunig, Markus M and Kriegel, Hans-Peter and Ng, Raymond T and Sander, J{"o}rg},

booktitle={ACM SIGMOD Record},

number={2},

organization={ACM},

pages={93--104},

title={LOF: Identifying Density-Based Local Outliers},

volume={29},

year={2000},

doi={10.1145/335191.335388}

}

@article{scholkopf2001estimating,

title={Estimating the support of a high-dimensional distribution},

author={Sch{"o}lkopf, Bernhard and Platt, John C and Shawe-Taylor, John and Smola, Alex J and Williamson, Robert C},

journal={Neural Computation},

volume={13},

number={7},

pages={1443--1471},

year={2001},

publisher={MIT Press}

}

% ============================================

% IoT Security Datasets

% ============================================

% TODO: Add other IoT security dataset references for comparison

@article{iot_datasets_survey,

title={Survey of IoT Security Datasets},

author={Author Names},

journal={Journal Name},

year={2023},

note={TODO: Add proper reference}

}

% ============================================

% DVC and Reproducibility

% ============================================

% TODO: Add references for reproducibility in ML research

@misc{dvc_tool,

title={{Data Version Control (DVC)}},

author={{Iterative.ai}},

year={2023},

howpublished={url{https://dvc.org}},

note={TODO: Verify proper citation format}

}

% ============================================

% Comparative Studies

% ============================================

% TODO: Add references to other comparative ML studies in IoT

@article{ml_comparison_ids,

title={Comparative Analysis of ML Algorithms for IDS},

author={Author Names},

journal={Journal Name},

year={2023},

note={TODO: Add proper reference}

}

% ============================================

% Statistical Methods and Evaluation Metrics

% ============================================

@inproceedings{brodersen2010balanced,

title={The Balanced Accuracy and Its Posterior Distribution},

author={Brodersen, Kay Henning and Ong, Cheng Soon and Stephan, Klaas Enno and Buhmann, Joachim M.},

booktitle={2010 20th International Conference on Pattern Recognition},

year={2010},

pages={3121--3124},

organization={IEEE},

doi={10.1109/ICPR.2010.764}

}

% ============================================

% Experimental Methodology and Reproducibility

% ============================================

@article{smith2018disciplined,

title={A Disciplined Approach to Neural Network Hyper-Parameters: Part 1--Learning Rate, Batch Size, Momentum, and Weight Decay},

author={Smith, Leslie N.},

journal={arXiv preprint arXiv:1803.09820},

year={2018},

note={US Naval Research Laboratory Technical Report 5510-026}

}

@article{bischl2021hyperparameter,

title={Hyperparameter Optimization: Foundations, Algorithms, Best Practices and Open Challenges},

author={Bischl, Bernd and Binder, Martin and Lang, Michel and Pielok, Tobias and Richter, Jakob and Coors, Stefan and Thomas, Janek and Ullmann, Theresa and Becker, Marc and Boulesteix, Anne-Laure and Deng, Difan and Lindauer, Marius},

journal={Wiley Interdisciplinary Reviews: Data Mining and Knowledge Discovery},

volume={13},

number={2},

pages={e1484},

year={2023},

publisher={Wiley Online Library},

doi={10.1002/widm.1484},

note={Originally published as arXiv:2107.05847 (2021)}

}

% ============================================

% SOC Alert Management and False Positive Literature

% ============================================

@inproceedings{alahmadi2022_99fp,

author = {Bushra A. Alahmadi and Louise Axon and Ivan Martinovic},

title = {99% False Positives: A Qualitative Study of {SOC} Analysts'

Perspectives on Security Alarms},

booktitle = {Proceedings of the 31st {USENIX} Security Symposium},

year = {2022}

}

@article{breaking_alert_fatigue_siem,

author = {Author(s) as in paper},

title = {Breaking Alert Fatigue: {AI}-Assisted {SIEM} Framework for Effective Threat Detection},

journal = {Applied Sciences},

year = {2023},

volume = {13},

number = {11},

pages = {6610}

}

@misc{google2024_alert_fatigue,

title = {Security overload is leaving admins with too much alert data to comprehend},

howpublished = {url{https://www.techradar.com/pro/security/security-overload-is-leaving-admins-with-too-much-alert-data-to-comprehend-which-makes-things-even-more-dangerous}},

year = {2024},

note = {Accessed: 2025-11-15}

}

@misc{itu2023_alert_management,

title = {Effective Alert Management: Minimizing False Positives And Negatives In Security Monitoring},

howpublished = {url{https://www.ituonline.com/comptia-securityx/comptia-securityx-4/effective-alert-management-minimizing-false-positives-and-negatives-in-security-monitoring/}},

year = {2023},

note = {Accessed: 2025-11-15}

}

@misc{panther2023_false_positives,

title = {Identifying and Mitigating False Positive Alerts},

howpublished = {url{https://panther.com/blog/identifying-and-mitigating-false-positive-alerts}},

year = {2023},

note = {Accessed: 2025-11-15}

}

% ============================================

% NOTES FOR REFERENCE COLLECTION

% ============================================

% Key topics to search:

% 1. CICIoT2023 dataset papers

% 2. Machine learning for IoT intrusion detection

% 3. Anomaly detection in IoT networks

% 4. Supervised vs unsupervised learning for network security

% 5. Comparative studies of ML algorithms

% 6. IoT security challenges and solutions

% 7. Network flow feature analysis

% 8. Resource-constrained ML for IoT

% 9. Binary classification for network security

% 10. Reproducibility in ML research