| | Approach | Referred publications |
| | Markov decision process | [12, 23, 24, 37, 64, 70, 75, 84, 96, 100, 101, 104, 127, 130, 133, 138, 144, 153, 165, 167, 170, 177, 188, 191, 199], [203, 207, 211, 212, 214, 217, 220, 231, 252, 256–259, 263, 264, 272, 274, 281, 291, 309, 313, 320, 340, 343, 346], [369–376] | | Multiarmed bandit | [61, 66, 102, 198, 351, 377, 378] | | Dynamic programming | [16, 19, 27, 52, 68, 70, 84, 90, 93, 107, 119, 120, 132, 135, 141, 145, 155, 156, 161, 162, 189, 191, 198, 201, 207], [209, 212, 222, 236, 242, 247, 254, 258, 259, 278, 280, 288, 289, 304, 306, 313, 321, 331, 340, 347, 357, 371, 372, 379, 380] | | Q-learning | [10, 17, 24, 44, 47, 50, 64, 68, 70, 80, 81, 83, 91, 92, 94, 101, 110, 116, 124, 125, 127, 129, 133, 145, 152, 172, 179], [180, 183, 187, 201, 203, 205, 206, 208, 210, 212, 215, 219, 222–225, 227, 231, 242, 244, 246, 248, 250, 254, 262], [264, 280, 282, 283, 291, 294–296, 321, 326, 327, 343, 347, 353, 356, 366, 367, 372, 374, 381–386] | | SARSA | [14, 127, 240, 246, 280, 384] | | Deep Q-network | [17, 47, 83, 99, 125, 133, 190, 210, 254, 291, 294, 347, 387] | | Deep deterministic policy gradient | [229, 260, 338] | | Gradient descent | [26, 28, 216, 388] | | Deep reinforcement learning | [19, 20, 32, 41, 47, 50, 60, 75, 77, 84, 88, 90, 95, 98, 100, 103, 117, 131, 134, 147, 154, 159, 165, 168, 176, 179, 182], [193, 199, 201, 207, 210, 220, 221, 223, 236, 241, 260, 261, 273, 275, 281, 294, 299, 301, 302, 305, 309, 311, 317, 323, 333, 338, 341, 346, 352, 355, 361, 363, 375, 380, 389–391] | | Actor-critic | [15, 17, 33, 97, 120, 176, 196, 221, 237, 337, 338, 349, 392] | | Double deep Q-network | [47, 83, 125, 210, 254, 294, 387, 393] | | Imitation | [226, 265, 355] | | Multiagent | [32, 60, 70, 77, 103, 145, 163, 168, 173, 175, 176, 188, 195, 200, 209, 218, 219, 225, 231], [245, 251, 263, 267, 280, 289, 323, 330, 338, 344, 361, 367, 377, 394, 395] | | Distributed | [45, 56, 60, 73, 91, 119, 133, 145, 187, 261, 282, 348, 394] | | Centralized | [60, 147, 187, 243, 296] | | Cooperative | [16, 81, 170, 200, 338, 344] | | Collaborative | [45, 137, 174, 196, 237, 248, 325, 381, 396] |
|
|