review-article

srp review article
https://github.com/skysomorphic/review-article
Science Score: 44.0%

This score indicates how likely this project is to be science-related based on various indicators:
✓
CITATION.cff file
Found CITATION.cff file
✓
codemeta.json file
Found codemeta.json file
✓
.zenodo.json file
Found .zenodo.json file
○
DOI references
○
Academic links in README
○
Academic email domains
○
Institutional organization owner
○
JOSS paper metadata
○
Scientific vocabulary similarity
Low similarity (0.4%) to scientific vocabulary
Last synced: 10 months ago · JSON representation ·
Repository

srp review article
Basic Info

Host: GitHub
Owner: skysomorphic
Language: Jupyter Notebook
Default Branch: main
Size: 56.7 MB
Statistics

Stars: 1
Watchers: 1
Forks: 0
Open Issues: 0
Releases: 0
Created about 1 year ago · Last pushed about 1 year ago
Metadata Files

Citation
Owner

Name: Sky Hong
Login: skysomorphic
Kind: user
Location: New York
Company: Choate Rosemary Hall
Repositories: 7
Profile: https://github.com/skysomorphic
Choate '26 | quizbowl dev, python, web
Citation (citations.bib)

@article{cybenko_approximation_1989,
	title = {Approximation by superpositions of a sigmoidal function},
	volume = {2},
	issn = {1435-568X},
	url = {https://doi.org/10.1007/BF02551274},
	doi = {10.1007/BF02551274},
	abstract = {In this paper we demonstrate that finite linear combinations of compositions of a fixed, univariate function and a set of affine functionals can uniformly approximate any continuous function ofn real variables with support in the unit hypercube; only mild conditions are imposed on the univariate function. Our results settle an open question about representability in the class of single hidden layer neural networks. In particular, we show that arbitrary decision regions can be arbitrarily well approximated by continuous feedforward neural networks with only a single internal, hidden layer and any continuous sigmoidal nonlinearity. The paper discusses approximation properties of other possible types of nonlinearities that might be implemented by artificial neural networks.},
	pages = {303--314},
	number = {4},
	journaltitle = {Mathematics of Control, Signals and Systems},
	shortjournal = {Mathematics of Control, Signals and Systems},
	author = {Cybenko, G.},
	date = {1989-12-01},
}

@article{hornik_multilayer_1989,
	title = {Multilayer feedforward networks are universal approximators},
	volume = {2},
	issn = {0893-6080},
	url = {https://www.sciencedirect.com/science/article/pii/0893608089900208},
	doi = {https://doi.org/10.1016/0893-6080(89)90020-8},
	abstract = {This paper rigorously establishes that standard multilayer feedforward networks with as few as one hidden layer using arbitrary squashing functions are capable of approximating any Borel measurable function from one finite dimensional space to another to any desired degree of accuracy, provided sufficiently many hidden units are available. In this sense, multilayer feedforward networks are a class of universal approximators.},
	pages = {359--366},
	number = {5},
	journaltitle = {Neural Networks},
	author = {Hornik, Kurt and Stinchcombe, Maxwell and White, Halbert},
	date = {1989},
	keywords = {Back-propagation networks, Feedforward networks, Mapping networks, Network representation capability, Sigma-Pi networks, Squashing functions, Stone-Weierstrass Theorem, Universal approximation},
}

@article{pinkus_approximation_1999,
	title = {Approximation theory of the {MLP} model in neural networks},
	volume = {8},
	doi = {10.1017/S0962492900002919},
	pages = {143--195},
	journaltitle = {Acta Numerica},
	author = {Pinkus, Allan},
	date = {1999},
}

@inproceedings{kidger_universal_2020,
	title = {Universal Approximation with Deep Narrow Networks},
	volume = {125},
	url = {https://proceedings.mlr.press/v125/kidger20a.html},
	series = {Proceedings of Machine Learning Research},
	abstract = {The classical Universal Approximation Theorem holds for neural networks of arbitrary width and bounded depth. Here we consider the natural ‘dual’ scenario for networks of bounded width and arbitrary depth. Precisely, let n be the number of inputs neurons, m be the number of output neurons, and let ρ be any nonaffine continuous function, with a continuous nonzero derivative at some point. Then we show that the class of neural networks of arbitrary depth, width n + m + 2, and activation function ρ, is dense in C(K; {\textbackslash}{mathbbR}{\textasciicircum}m) for K {\textbackslash}subseteq {\textbackslash}{mathbbR}{\textasciicircum}n with K compact. This covers every activation function possible to use in practice, and also includes polynomial activation functions, which is unlike the classical version of the theorem, and provides a qualitative difference between deep narrow networks and shallow wide networks. We then consider several extensions of this result. In particular we consider nowhere differentiable activation functions, density in noncompact domains with respect to the L{\textasciicircum}p-norm, and how the width may be reduced to just n + m + 1 for ‘most’ activation functions.},
	pages = {2306--2327},
	booktitle = {Proceedings of Thirty Third Conference on Learning Theory},
	publisher = {{PMLR}},
	author = {Kidger, Patrick and Lyons, Terry},
	editor = {Abernethy, Jacob and Agarwal, Shivani},
	date = {2020-07-09},
}

@misc{commons_filecolored_2025,
	title = {File:Colored neural network.svg — Wikimedia Commons, the free media repository},
	url = {https://commons.wikimedia.org/w/index.php?title=File:Colored_neural_network.svg&oldid=995727191},
	author = {Commons, Wikimedia},
	date = {2025},
}

@unpublished{lee_introduction_2021,
	title = {Introduction of Machine / Deep Learning},
	url = {https://speech.ee.ntu.edu.tw/~hylee/ml/ml2021-course-data/regression%20(v16).pdf},
	author = {Lee, Hung-Yi},
	date = {2021},
}

@unpublished{hutter_legged_2022,
	location = {2022 {IEEE} International Conference on Robotics and Automation ({ICRA})},
	title = {Legged Robots on the way from subterranean},
	url = {https://www.youtube.com/watch?v=XwheB2_dyMQ},
	author = {Hutter, Marco},
	date = {2022-05-24},
}

@misc{kingma_adam_2017,
	title = {Adam: A Method for Stochastic Optimization},
	url = {https://arxiv.org/abs/1412.6980},
	author = {Kingma, Diederik P. and Ba, Jimmy},
	date = {2017},
	note = {\_eprint: 1412.6980},
}

@misc{liu_variance_2021,
	title = {On the Variance of the Adaptive Learning Rate and Beyond},
	url = {https://arxiv.org/abs/1908.03265},
	author = {Liu, Liyuan and Jiang, Haoming and He, Pengcheng and Chen, Weizhu and Liu, Xiaodong and Gao, Jianfeng and Han, Jiawei},
	date = {2021},
	note = {\_eprint: 1908.03265},
}

@misc{li_visualizing_2018,
	title = {Visualizing the Loss Landscape of Neural Nets},
	url = {https://arxiv.org/abs/1712.09913},
	author = {Li, Hao and Xu, Zheng and Taylor, Gavin and Studer, Christoph and Goldstein, Tom},
	date = {2018},
	note = {\_eprint: 1712.09913},
}

@misc{ioffe_batch_2015,
	title = {Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift},
	url = {https://arxiv.org/abs/1502.03167},
	author = {Ioffe, Sergey and Szegedy, Christian},
	date = {2015},
	note = {\_eprint: 1502.03167},
}

@misc{santurkar_how_2019,
	title = {How Does Batch Normalization Help Optimization?},
	url = {https://arxiv.org/abs/1805.11604},
	author = {Santurkar, Shibani and Tsipras, Dimitris and Ilyas, Andrew and Madry, Aleksander},
	date = {2019},
	note = {\_eprint: 1805.11604},
}

@misc{ioffe_batch_2017,
	title = {Batch Renormalization: Towards Reducing Minibatch Dependence in Batch-Normalized Models},
	url = {https://arxiv.org/abs/1702.03275},
	author = {Ioffe, Sergey},
	date = {2017},
	note = {\_eprint: 1702.03275},
}

@misc{hessel_rainbow_2017,
	title = {Rainbow: Combining Improvements in Deep Reinforcement Learning},
	url = {https://arxiv.org/abs/1710.02298},
	author = {Hessel, Matteo and Modayil, Joseph and Hasselt, Hado van and Schaul, Tom and Ostrovski, Georg and Dabney, Will and Horgan, Dan and Piot, Bilal and Azar, Mohammad and Silver, David},
	date = {2017},
	note = {\_eprint: 1710.02298},
}

@misc{pathak_curiosity-driven_2017,
	title = {Curiosity-driven Exploration by Self-supervised Prediction},
	url = {https://arxiv.org/abs/1705.05363},
	author = {Pathak, Deepak and Agrawal, Pulkit and Efros, Alexei A. and Darrell, Trevor},
	date = {2017},
	note = {\_eprint: 1705.05363},
}

@article{hwangbo_learning_2019,
	title = {Learning agile and dynamic motor skills for legged robots},
	volume = {4},
	issn = {2470-9476},
	url = {http://dx.doi.org/10.1126/scirobotics.aau5872},
	doi = {10.1126/scirobotics.aau5872},
	number = {26},
	journaltitle = {Science Robotics},
	author = {Hwangbo, Jemin and Lee, Joonho and Dosovitskiy, Alexey and Bellicoso, Dario and Tsounis, Vassilios and Koltun, Vladlen and Hutter, Marco},
	date = {2019-01},
	note = {Publisher: American Association for the Advancement of Science ({AAAS})},
	file = {PDF:/home/sky/Zotero/storage/6NR5ND5E/Hwangbo et al. - 2019 - Learning agile and dynamic motor skills for legged robots.pdf:application/pdf},
}

@article{lee_learning_2020,
	title = {Learning quadrupedal locomotion over challenging terrain},
	volume = {5},
	issn = {2470-9476},
	url = {http://dx.doi.org/10.1126/scirobotics.abc5986},
	doi = {10.1126/scirobotics.abc5986},
	number = {47},
	journaltitle = {Science Robotics},
	author = {Lee, Joonho and Hwangbo, Jemin and Wellhausen, Lorenz and Koltun, Vladlen and Hutter, Marco},
	date = {2020-10},
	note = {Publisher: American Association for the Advancement of Science ({AAAS})},
	file = {PDF:/home/sky/Zotero/storage/Q5ND2X9Q/Lee et al. - 2020 - Learning quadrupedal locomotion over challenging terrain.pdf:application/pdf},
}

@article{luo_reinforcement_2021,
	title = {Reinforcement Learning and Control of a Lower Extremity Exoskeleton for Squat Assistance},
	volume = {Volume 8 - 2021},
	issn = {2296-9144},
	url = {https://www.frontiersin.org/journals/robotics-and-ai/articles/10.3389/frobt.2021.702845},
	doi = {10.3389/frobt.2021.702845},
	abstract = {{\textless}p{\textgreater}A significant challenge for the control of a robotic lower extremity rehabilitation exoskeleton is to ensure stability and robustness during programmed tasks or motions, which is crucial for the safety of the mobility-impaired user. Due to various levels of the user’s disability, the human-exoskeleton interaction forces and external perturbations are unpredictable and could vary substantially and cause conventional motion controllers to behave unreliably or the robot to fall down. In this work, we propose a new, reinforcement learning-based, motion controller for a lower extremity rehabilitation exoskeleton, aiming to perform collaborative squatting exercises with efficiency, stability, and strong robustness. Unlike most existing rehabilitation exoskeletons, our exoskeleton has ankle actuation on both sagittal and front planes and is equipped with multiple foot force sensors to estimate center of pressure ({CoP}), an important indicator of system balance. This proposed motion controller takes advantage of the {CoP} information by incorporating it in the state input of the control policy network and adding it to the reward during the learning to maintain a well balanced system state during motions. In addition, we use dynamics randomization and adversary force perturbations including large human interaction forces during the training to further improve control robustness. To evaluate the effectiveness of the learning controller, we conduct numerical experiments with different settings to demonstrate its remarkable ability on controlling the exoskeleton to repetitively perform well balanced and robust squatting motions under strong perturbations and realistic human interaction forces.{\textless}/p{\textgreater}},
	journaltitle = {Frontiers in Robotics and {AI}},
	author = {Luo, Shuzhen and Androwis, Ghaith and Adamovich, Sergei and Su, Hao and Nunez, Erick and Zhou, Xianlian},
	date = {2021},
}

@article{luo_experiment-free_2024,
	title = {Experiment-free exoskeleton assistance via learning in simulation},
	volume = {630},
	issn = {1476-4687},
	url = {https://doi.org/10.1038/s41586-024-07382-4},
	doi = {10.1038/s41586-024-07382-4},
	abstract = {Exoskeletons have enormous potential to improve human locomotive performance1–3. However, their development and broad dissemination are limited by the requirement for lengthy human tests and handcrafted control laws2. Here we show an experiment-free method to learn a versatile control policy in simulation. Our learning-in-simulation framework leverages dynamics-aware musculoskeletal and exoskeleton models and data-driven reinforcement learning to bridge the gap between simulation and reality without human experiments. The learned controller is deployed on a custom hip exoskeleton that automatically generates assistance across different activities with reduced metabolic rates by 24.3\%, 13.1\% and 15.4\% for walking, running and stair climbing, respectively. Our framework may offer a generalizable and scalable strategy for the rapid development and widespread adoption of a variety of assistive robots for both able-bodied and mobility-impaired individuals.},
	pages = {353--359},
	number = {8016},
	journaltitle = {Nature},
	shortjournal = {Nature},
	author = {Luo, Shuzhen and Jiang, Menghan and Zhang, Sainan and Zhu, Junxi and Yu, Shuangyue and Dominguez Silva, Israel and Wang, Tian and Rouse, Elliott and Zhou, Bolei and Yuk, Hyunwoo and Zhou, Xianlian and Su, Hao},
	date = {2024-06-01},
}

@misc{choromanska_loss_2015,
	title = {The Loss Surfaces of Multilayer Networks},
	url = {https://arxiv.org/abs/1412.0233},
	author = {Choromanska, Anna and Henaff, Mikael and Mathieu, Michael and Arous, Gérard Ben and {LeCun}, Yann},
	date = {2015},
	note = {\_eprint: 1412.0233},
}

@misc{zhang_understanding_2017,
	title = {Understanding deep learning requires rethinking generalization},
	url = {https://arxiv.org/abs/1611.03530},
	author = {Zhang, Chiyuan and Bengio, Samy and Hardt, Moritz and Recht, Benjamin and Vinyals, Oriol},
	date = {2017},
	note = {\_eprint: 1611.03530},
}

@misc{arpit_closer_2017,
	title = {A Closer Look at Memorization in Deep Networks},
	url = {https://arxiv.org/abs/1706.05394},
	author = {Arpit, Devansh and Jastrzębski, Stanisław and Ballas, Nicolas and Krueger, David and Bengio, Emmanuel and Kanwal, Maxinder S. and Maharaj, Tegan and Fischer, Asja and Courville, Aaron and Bengio, Yoshua and Lacoste-Julien, Simon},
	date = {2017},
	note = {\_eprint: 1706.05394},
}

@misc{ruder_overview_2017,
	title = {An overview of gradient descent optimization algorithms},
	url = {https://arxiv.org/abs/1609.04747},
	author = {Ruder, Sebastian},
	date = {2017},
	note = {\_eprint: 1609.04747},
}

@book{nielsen_neural_2019,
	title = {Neural Networks and Deep Learning},
	url = {https://neuralnetworksanddeeplearning.com/},
	author = {Nielsen, Michael},
	date = {2019-12},
}

@unpublished{sanderson_backpropagation_2017,
	title = {Backpropagation calculus {\textbar} {DL}4},
	url = {https://www.youtube.com/watch?v=tIeHLnjs5U8},
	author = {Sanderson, Grant},
	date = {2017-11-03},
}

@misc{commons_filemarkov_2023,
	title = {File:Markov diagram v2.svg — Wikimedia Commons, the free media repository},
	url = {https://commons.wikimedia.org/w/index.php?title=File:Markov_diagram_v2.svg&oldid=757108625},
	author = {Commons, Wikimedia},
	date = {2023},
}

@misc{commons_filek-fold_2024,
	title = {File:K-fold cross validation {EN}.svg — Wikimedia Commons, the free media repository},
	url = {https://commons.wikimedia.org/w/index.php?title=File:K-fold_cross_validation_EN.svg&oldid=932198002},
	author = {Commons, Wikimedia},
	date = {2024},
}

@book{bishop_deep_2023,
	title = {Deep learning: Foundations and concepts},
	url = {https://www.bishopbook.com/},
	publisher = {Springer Nature},
	author = {Bishop, Christopher M and Bishop, Hugh},
	date = {2023},
}

@article{williams_simple_1992,
	title = {Simple statistical gradient-following algorithms for connectionist reinforcement learning},
	volume = {8},
	issn = {1573-0565},
	url = {https://doi.org/10.1007/BF00992696},
	doi = {10.1007/BF00992696},
	abstract = {This article presents a general class of associative reinforcement learning algorithms for connectionist networks containing stochastic units. These algorithms, called {REINFORCE} algorithms, are shown to make weight adjustments in a direction that lies along the gradient of expected reinforcement in both immediate-reinforcement tasks and certain limited forms of delayed-reinforcement tasks, and they do this without explicitly computing gradient estimates or even storing information from which such estimates could be computed. Specific examples of such algorithms are presented, some of which bear a close relationship to certain existing algorithms while others are novel but potentially interesting in their own right. Also given are results that show how such algorithms can be naturally integrated with backpropagation. We close with a brief discussion of a number of additional issues surrounding the use of such algorithms, including what is known about their limiting behaviors as well as further considerations that might be used to help develop similar but potentially more powerful reinforcement learning algorithms.},
	pages = {229--256},
	number = {3},
	journaltitle = {Machine Learning},
	shortjournal = {Machine Learning},
	author = {Williams, Ronald J.},
	date = {1992-05-01},
}

@article{pedregosa_scikit-learn_2011,
	title = {Scikit-learn: Machine Learning in Python},
	volume = {12},
	url = {https://jmlr.csail.mit.edu/papers/v12/pedregosa11a.html},
	pages = {2825--2830},
	journaltitle = {Journal of Machine Learning Research},
	author = {Pedregosa, Fabian and Varoquaux, Gaël and Gramfort, Alexandre and Michel, Vincent and Thirion, Bertrand and Grisel, Olivier and Blondel, Mathieu and Prettenhofer, Peter and Weiss, Ron and Dubourg, Vincent and Vanderplas, Jake and Passos, Alexandre and Cournapeau, David and Brucher, Matthieu and Perrot, Matthieu and Duchesnay, Édouard},
	date = {2011},
}

@unpublished{rao_policy_nodate,
	location = {Stanford University},
	title = {Policy Gradient Algorithms},
	url = {https://web.stanford.edu/~ashlearn/RLForFinanceBook/PolicyGradient.pdf},
	author = {Rao, Ashwin},
}

@misc{schulman_proximal_2017,
	title = {Proximal Policy Optimization Algorithms},
	url = {https://arxiv.org/abs/1707.06347},
	author = {Schulman, John and Wolski, Filip and Dhariwal, Prafulla and Radford, Alec and Klimov, Oleg},
	date = {2017},
	note = {\_eprint: 1707.06347},
}

@article{jamil_literature_2013,
	title = {A literature survey of benchmark functions for global optimisation problems},
	volume = {4},
	url = {https://www.inderscienceonline.com/doi/abs/10.1504/IJMMNO.2013.055204},
	doi = {10.1504/IJMMNO.2013.055204},
	abstract = {Test functions are important to validate and compare the performance of optimisation algorithms. There have been many test or benchmark functions reported in the literature; however, there is no standard list or set of benchmark functions. Ideally, test functions should have diverse properties to be truly useful to test new algorithms in an unbiased way. For this purpose, we have reviewed and compiled a rich set of 175 benchmark functions for unconstrained optimisation problems with diverse properties in terms of modality, separability, and valley landscape. This is by far the most complete set of functions so far in the literature, and it can be expected that this complete set of functions can be used for validation of new optimisation in the future.},
	pages = {150--194},
	number = {2},
	journaltitle = {International Journal of Mathematical Modelling and Numerical Optimisation},
	author = {Jamil, Momin and Yang, Xin-She},
	date = {2013},
	file = {PDF:/home/sky/Zotero/storage/VKWWALB5/Jamil and Yang - 2013 - A literature survey of benchmark functions for global optimisation problems.pdf:application/pdf},
}

@book{sutton_reinforcement_1998,
	title = {Reinforcement learning: An introduction},
	volume = {1},
	url = {https://web.stanford.edu/class/psych209/Readings/SuttonBartoIPRLBook2ndEd.pdf},
	number = {1},
	publisher = {{MIT} press Cambridge},
	author = {Sutton, Richard S and Barto, Andrew G and {others}},
	date = {1998},
}

@misc{moon_general_2025,
	title = {General Overview for Computer Vision},
	url = {https://doi.org/10.5281/zenodo.15420905},
	publisher = {Zenodo},
	author = {Moon, Changil},
	date = {2025-05},
	doi = {10.5281/zenodo.15420905},
	file = {PDF:/home/sky/Zotero/storage/M3M87GRH/Moon - 2025 - General Overview for Computer Vision.pdf:application/pdf},
}

@misc{kempka_vizdoom_2016,
	title = {{ViZDoom}: A Doom-based {AI} Research Platform for Visual Reinforcement Learning},
	url = {https://arxiv.org/abs/1605.02097},
	author = {Kempka, Michał and Wydmuch, Marek and Runc, Grzegorz and Toczek, Jakub and Jaśkowski, Wojciech},
	date = {2016},
	note = {\_eprint: 1605.02097},
}

@misc{silver_lectures_2015,
	title = {Lectures on Reinforcement Learning},
	url = {https://www.davidsilver.uk/teaching/},
	author = {Silver, David},
	date = {2015},
}

@inproceedings{nesterov_method_1983,
	title = {A method for unconstrained convex minimization problem with the rate of convergence O (1/k2)},
	volume = {269},
	pages = {543},
	booktitle = {Dokl. Akad. Nauk. {SSSR}},
	author = {Nesterov, Yurii},
	date = {1983},
	note = {Issue: 3},
}

@inproceedings{schulman_trust_2015,
	location = {Lille, France},
	title = {Trust Region Policy Optimization},
	volume = {37},
	url = {https://proceedings.mlr.press/v37/schulman15.html},
	series = {Proceedings of Machine Learning Research},
	abstract = {In this article, we describe a method for optimizing control policies, with guaranteed monotonic improvement. By making several approximations to the theoretically-justified scheme, we develop a practical algorithm, called Trust Region Policy Optimization ({TRPO}). This algorithm is effective for optimizing large nonlinear policies such as neural networks. Our experiments demonstrate its robust performance on a wide variety of tasks: learning simulated robotic swimming, hopping, and walking gaits; and playing Atari games using images of the screen as input. Despite its approximations that deviate from the theory, {TRPO} tends to give monotonic improvement, with little tuning of hyperparameters.},
	pages = {1889--1897},
	booktitle = {Proceedings of the 32nd International Conference on Machine Learning},
	publisher = {{PMLR}},
	author = {Schulman, John and Levine, Sergey and Abbeel, Pieter and Jordan, Michael and Moritz, Philipp},
	editor = {Bach, Francis and Blei, David},
	date = {2015-07-07},
	file = {PDF:/home/sky/Zotero/storage/QK86ZJS6/Schulman et al. - 2015 - Trust Region Policy Optimization.pdf:application/pdf},
}

@article{apicella_survey_2021,
	title = {A survey on modern trainable activation functions},
	volume = {138},
	issn = {0893-6080},
	url = {https://www.sciencedirect.com/science/article/pii/S0893608021000344},
	doi = {https://doi.org/10.1016/j.neunet.2021.01.026},
	abstract = {In neural networks literature, there is a strong interest in identifying and defining activation functions which can improve neural network performance. In recent years there has been a renovated interest in the scientific community in investigating activation functions which can be trained during the learning process, usually referred to as trainable, learnable or adaptable activation functions. They appear to lead to better network performance. Diverse and heterogeneous models of trainable activation function have been proposed in the literature. In this paper, we present a survey of these models. Starting from a discussion on the use of the term “activation function” in literature, we propose a taxonomy of trainable activation functions, highlight common and distinctive proprieties of recent and past models, and discuss main advantages and limitations of this type of approach. We show that many of the proposed approaches are equivalent to adding neuron layers which use fixed (non-trainable) activation functions and some simple local rule that constrains the corresponding weight layers.},
	pages = {14--32},
	journaltitle = {Neural Networks},
	author = {Apicella, Andrea and Donnarumma, Francesco and Isgrò, Francesco and Prevete, Roberto},
	date = {2021},
	keywords = {Activation functions, Learnable activation functions, Machine learning, Neural networks, Trainable activation functions},
}

@misc{nwankpa_activation_2018,
	title = {Activation Functions: Comparison of trends in Practice and Research for Deep Learning},
	url = {https://arxiv.org/abs/1811.03378},
	author = {Nwankpa, Chigozie and Ijomah, Winifred and Gachagan, Anthony and Marshall, Stephen},
	date = {2018},
	note = {\_eprint: 1811.03378},
}

@article{dubey_activation_2022,
	title = {Activation functions in deep learning: A comprehensive survey and benchmark},
	volume = {503},
	issn = {0925-2312},
	url = {https://www.sciencedirect.com/science/article/pii/S0925231222008426},
	doi = {https://doi.org/10.1016/j.neucom.2022.06.111},
	abstract = {Neural networks have shown tremendous growth in recent years to solve numerous problems. Various types of neural networks have been introduced to deal with different types of problems. However, the main goal of any neural network is to transform the non-linearly separable input data into more linearly separable abstract features using a hierarchy of layers. These layers are combinations of linear and nonlinear functions. The most popular and common non-linearity layers are activation functions ({AFs}), such as Logistic Sigmoid, Tanh, {ReLU}, {ELU}, Swish and Mish. In this paper, a comprehensive overview and survey is presented for {AFs} in neural networks for deep learning. Different classes of {AFs} such as Logistic Sigmoid and Tanh based, {ReLU} based, {ELU} based, and Learning based are covered. Several characteristics of {AFs} such as output range, monotonicity, and smoothness are also pointed out. A performance comparison is also performed among 18 state-of-the-art {AFs} with different networks on different types of data. The insights of {AFs} are presented to benefit the researchers for doing further research and practitioners to select among different choices. The code used for experimental comparison is released at: https://github.com/shivram1987/{ActivationFunctions}.},
	pages = {92--108},
	journaltitle = {Neurocomputing},
	author = {Dubey, Shiv Ram and Singh, Satish Kumar and Chaudhuri, Bidyut Baran},
	date = {2022},
	keywords = {Neural networks, Activation Functions, Convolutional neural networks, Deep learning, Overview, Recurrent Neural Networks},
}

@inproceedings{hutter_anymal_2016,
	title = {{ANYmal} - a highly mobile and dynamic quadrupedal robot},
	doi = {10.1109/IROS.2016.7758092},
	pages = {38--44},
	booktitle = {2016 {IEEE}/{RSJ} International Conference on Intelligent Robots and Systems ({IROS})},
	author = {Hutter, Marco and Gehring, Christian and Jud, Dominic and Lauber, Andreas and Bellicoso, C. Dario and Tsounis, Vassilios and Hwangbo, Jemin and Bodie, Karen and Fankhauser, Peter and Bloesch, Michael and Diethelm, Remo and Bachmann, Samuel and Melzer, Amir and Hoepflinger, Mark},
	date = {2016},
	keywords = {Actuators, Dynamics, Legged locomotion, Robot sensing systems, Torque},
	file = {ANYmal_-_a_highly_mobile_and_dynamic_quadrupedal_robot:/home/sky/Zotero/storage/BPDGIAK8/ANYmal_-_a_highly_mobile_and_dynamic_quadrupedal_robot.pdf:application/pdf},
}

@unpublished{lee_convolutional_2021,
	title = {Convolutional Neural Network ({CNN})},
	url = {https://speech.ee.ntu.edu.tw/~hylee/ml/ml2021-course-data/cnn_v4.pdf},
	author = {Lee, Hung-Yi},
	date = {2021},
}

@misc{simonyan_very_2015,
	title = {Very Deep Convolutional Networks for Large-Scale Image Recognition},
	url = {https://arxiv.org/abs/1409.1556},
	author = {Simonyan, Karen and Zisserman, Andrew},
	date = {2015},
	note = {\_eprint: 1409.1556},
}

@misc{noauthor_cmu_nodate,
	title = {{CMU} Graphics Lab Motion Capture Database},
	url = {https://mocap.cs.cmu.edu/},
}

@article{lecun_gradient-based_1998,
	title = {Gradient-based learning applied to document recognition},
	volume = {86},
	doi = {10.1109/5.726791},
	pages = {2278--2324},
	number = {11},
	journaltitle = {Proceedings of the {IEEE}},
	author = {Lecun, Y. and Bottou, L. and Bengio, Y. and Haffner, P.},
	date = {1998},
	keywords = {Machine learning, Neural networks, Character recognition, Feature extraction, Hidden Markov models, Multi-layer neural network, Optical character recognition software, Optical computing, Pattern recognition, Principal component analysis},
}

@article{srivastava_dropout_2014,
	title = {Dropout: a simple way to prevent neural networks from overfitting},
	volume = {15},
	issn = {1532-4435},
	abstract = {Deep neural nets with a large number of parameters are very powerful machine learning systems. However, overfitting is a serious problem in such networks. Large networks are also slow to use, making it difficult to deal with overfitting by combining the predictions of many different large neural nets at test time. Dropout is a technique for addressing this problem. The key idea is to randomly drop units (along with their connections) from the neural network during training. This prevents units from co-adapting too much. During training, dropout samples from an exponential number of different "thinned" networks. At test time, it is easy to approximate the effect of averaging the predictions of all these thinned networks by simply using a single unthinned network that has smaller weights. This significantly reduces overfitting and gives major improvements over other regularization methods. We show that dropout improves the performance of neural networks on supervised learning tasks in vision, speech recognition, document classification and computational biology, obtaining state-of-the-art results on many benchmark data sets.},
	pages = {1929--1958},
	number = {1},
	journaltitle = {J. Mach. Learn. Res.},
	author = {Srivastava, Nitish and Hinton, Geoffrey and Krizhevsky, Alex and Sutskever, Ilya and Salakhutdinov, Ruslan},
	date = {2014-01},
	note = {Publisher: {JMLR}.org},
	keywords = {deep learning, model combination, neural networks, regularization},
}

@inproceedings{kalra_why_2024,
	title = {Why Warmup the Learning Rate? Underlying Mechanisms and Improvements},
	volume = {37},
	url = {https://proceedings.neurips.cc/paper_files/paper/2024/file/ca98452d4e9ecbc18c40da2aa0da8b98-Paper-Conference.pdf},
	pages = {111760--111801},
	booktitle = {Advances in Neural Information Processing Systems},
	publisher = {Curran Associates, Inc.},
	author = {Kalra, Dayal Singh and Barkeshli, Maissam},
	editor = {Globerson, A. and Mackey, L. and Belgrave, D. and Fan, A. and Paquet, U. and Tomczak, J. and Zhang, C.},
	date = {2024},
}

@inproceedings{he_deep_2016,
	title = {Deep Residual Learning for Image Recognition},
	booktitle = {Proceedings of the {IEEE} Conference on Computer Vision and Pattern Recognition ({CVPR})},
	author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
	date = {2016-06},
}

@inproceedings{zhang_improved_2018,
	title = {Improved Adam Optimizer for Deep Neural Networks},
	doi = {10.1109/IWQoS.2018.8624183},
	pages = {1--2},
	booktitle = {2018 {IEEE}/{ACM} 26th International Symposium on Quality of Service ({IWQoS})},
	author = {Zhang, Zijun},
	date = {2018},
	keywords = {Machine learning, Neural networks, Bridges, Conferences, Optimization, Task analysis, Training},
}

@inproceedings{dozat_incorporating_2016,
	title = {Incorporating Nesterov Momentum into Adam},
	eventtitle = {International Conference on Learning Representations ({ICLR}) 2016},
	author = {Dozat, Timothy},
	date = {2016-02-18},
}

@inproceedings{ross_reduction_2011,
	location = {Fort Lauderdale, {FL}, {USA}},
	title = {A Reduction of Imitation Learning and Structured Prediction to No-Regret Online Learning},
	volume = {15},
	url = {https://proceedings.mlr.press/v15/ross11a.html},
	series = {Proceedings of Machine Learning Research},
	abstract = {Sequential prediction problems such as imitation learning, where future observations depend on previous predictions (actions), violate the common i.i.d. assumptions made in statistical learning. This leads to poor performance in theory and often in practice. Some recent approaches provide stronger guarantees in this setting, but remain somewhat unsatisfactory as they train either non-stationary or stochastic policies and require a large number of iterations. In this paper, we propose a new iterative algorithm, which trains a stationary deterministic policy, that can be seen as a no regret algorithm in an online learning setting. We show that any such no regret algorithm, combined with additional reduction assumptions, must find a policy with good performance under the distribution of observations it induces in such sequential settings. We demonstrate that this new approach outperforms previous approaches on two challenging imitation learning problems and a benchmark sequence labeling problem.},
	pages = {627--635},
	booktitle = {Proceedings of the Fourteenth International Conference on Artificial Intelligence and Statistics},
	publisher = {{PMLR}},
	author = {Ross, Stephane and Gordon, Geoffrey and Bagnell, Drew},
	editor = {Gordon, Geoffrey and Dunson, David and Dudík, Miroslav},
	date = {2011-04-11},
}

@article{narvekar_curriculum_2020,
	title = {Curriculum Learning for Reinforcement Learning Domains: A Framework and Survey},
	volume = {21},
	url = {http://jmlr.org/papers/v21/20-212.html},
	pages = {1--50},
	number = {181},
	journaltitle = {Journal of Machine Learning Research},
	author = {Narvekar, Sanmit and Peng, Bei and Leonetti, Matteo and Sinapov, Jivko and Taylor, Matthew E. and Stone, Peter},
	date = {2020},
}
ecosyste.ms

Data

Tools

Indexes

Applications

Experiments

Open Source Science