Deep Reinforcement Learning for UAV Intelligent Mission Planning

<table class="table-group" id="tab2"><tr><td><table class="table"><tr><td class="thead-hr" colspan="1"><hr/></td></tr><tr class="thead"><td class="align_left">Proximal policy optimization algorithm (PPO)</td></tr><tr><td class="thead-hr" colspan="1"><hr/></td></tr><tr><td class="align_left">1. For <i>i</i> = 1 to N do</td></tr><tr><td class="align_left">2. Run policy <svg height="9.25202pt" id="M29" style="vertical-align:-3.29111pt" version="1.1" viewbox="-0.0498162 -5.96091 15.5891 9.25202" width="15.5891pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M574 449L545 460C526 432 516 430 487 430C404 430 311 435 226 435C104 435 56 379 25 341L43 318C81 354 121 372 181 372C161 246 87 53 23 3L30 -12C48 -12 88 -4 113 11C157 75 207 248 232 371L386 367L326 109C321 86 318 66 318 50C318 4 339 -12 366 -12C410 -12 461 21 505 69L492 96C467 79 434 60 418 60C406 60 400 78 411 147C422 217 439 300 457 366C487 366 524 367 536 370C547 385 558 408 574 449Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,7.358,3.132)"><path d="M482 498C482 606 448 710 328 710C138 710 24 416 24 218C24 99 58 -12 184 -12C373 -12 482 294 482 498ZM395 521C395 483 391 449 383 395H128C158 537 226 671 312 671C388 671 395 566 395 521ZM375 342C348 190 284 25 192 25C128 25 109 116 109 199C109 243 112 292 121 342H375Z"></path></g><g transform="matrix(.0065,0,0,-0.0065,11.963,-.065)"><path d="M323 549L316 585C302 600 218 636 190 624L84 81L142 54L323 549Z"></path></g></svg> for <i>T</i> timesteps, collecting <span class="nowrap"><svg height="13.7042pt" id="M30" style="vertical-align:-2.2681pt" version="1.1" viewbox="-0.0498162 -11.4361 62.6796 13.7042" width="62.6796pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M300 -147C201 -63 143 98 143 270S200 602 300 686L282 710C136 610 70 450 70 271V270C70 89 136 -72 282 -170L300 -147Z"></path></g><g transform="matrix(.013,0,0,-0.013,4.498,0)"><path d="M352 391C352 416 319 448 267 448C236 448 173 423 147 400C107 364 96 332 96 304C96 248 143 210 193 181C241 153 258 124 258 100C258 72 232 38 184 38C151 38 107 66 81 108C77 114 64 116 55 111C34 99 23 84 23 65C23 29 81 -12 134 -12C220 -12 325 61 325 141C325 184 297 215 234 256C194 282 161 309 161 346C161 380 188 401 217 401C255 401 279 380 301 353C308 344 313 341 325 347C341 355 352 371 352 391Z"></path></g><g transform="matrix(.013,0,0,-0.013,9.373,0)"><path d="M95 130C70 130 46 113 46 88C46 72 54 64 59 64C93 55 121 33 121 -3C121 -41 93 -68 44 -88L55 -117C117 -98 186 -56 186 22C186 91 131 130 95 130Z"></path></g><g transform="matrix(.013,0,0,-0.013,14.516,0)"><path d="M483 97L471 123C436 91 401 65 392 65C388 65 384 74 390 106C414 239 444 378 457 429L455 433C444 433 429 436 416 439C392 444 368 448 344 448C281 448 204 415 152 376C71 315 23 205 23 103C23 21 57 -12 85 -12C114 -12 149 6 185 34C231 70 285 119 329 183H331L309 81C292 0 308 -12 326 -12C350 -12 421 24 483 97ZM374 387C370 363 356 291 345 261C315 193 181 50 139 50C124 50 110 71 110 118C110 224 153 331 218 379C238 394 271 402 301 402C329 402 359 394 374 387Z"></path></g><g transform="matrix(.013,0,0,-0.013,21.097,0)"><path d="M95 130C70 130 46 113 46 88C46 72 54 64 59 64C93 55 121 33 121 -3C121 -41 93 -68 44 -88L55 -117C117 -98 186 -56 186 22C186 91 131 130 95 130Z"></path></g><g transform="matrix(.013,0,0,-0.013,26.24,0)"><path d="M393 379C402 394 400 411 393 422C384 437 365 448 348 448C301 448 237 372 186 285H182L193 335C210 408 205 448 178 448C150 448 80 402 29 344L45 321C80 355 114 373 122 373C128 373 130 365 124 330C106 228 76 98 50 -5L57 -12C82 -5 112 3 132 6L172 203C196 256 234 304 254 329C275 355 293 367 306 367C318 367 330 360 342 348C347 343 355 343 365 350S386 367 393 379Z"></path></g><g transform="matrix(.013,0,0,-0.013,31.728,0)"><path d="M95 130C70 130 46 113 46 88C46 72 54 64 59 64C93 55 121 33 121 -3C121 -41 93 -68 44 -88L55 -117C117 -98 186 -56 186 22C186 91 131 130 95 130Z"></path></g><g transform="matrix(.013,0,0,-0.013,36.871,0)"><path d="M352 391C352 416 319 448 267 448C236 448 173 423 147 400C107 364 96 332 96 304C96 248 143 210 193 181C241 153 258 124 258 100C258 72 232 38 184 38C151 38 107 66 81 108C77 114 64 116 55 111C34 99 23 84 23 65C23 29 81 -12 134 -12C220 -12 325 61 325 141C325 184 297 215 234 256C194 282 161 309 161 346C161 380 188 401 217 401C255 401 279 380 301 353C308 344 313 341 325 347C341 355 352 371 352 391Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,41.746,-5.741)"><path d="M310 541L304 571C290 586 211 619 185 610L80 76L131 52L310 541Z"></path></g><g transform="matrix(.013,0,0,-0.013,45.542,0)"><path d="M95 130C70 130 46 113 46 88C46 72 54 64 59 64C93 55 121 33 121 -3C121 -41 93 -68 44 -88L55 -117C117 -98 186 -56 186 22C186 91 131 130 95 130Z"></path></g><g transform="matrix(.013,0,0,-0.013,50.685,0)"><path d="M530 686C535 705 530 712 521 712C504 712 448 684 359 674L358 648H393C437 648 439 646 429 593L400 435C372 447 345 448 332 448C286 448 194 414 144 373C68 311 23 203 23 111C23 26 57 -12 91 -12C120 -12 147 3 188 29C227 54 290 102 341 170H343L322 71C308 6 320 -12 341 -12C373 -12 442 27 501 96L485 120C455 91 422 67 408 67C401 67 401 76 404 91C440 294 479 473 530 686ZM387 375L355 241C326 187 200 53 142 53C126 53 109 73 109 130C109 217 154 337 218 381C240 396 265 404 297 404S372 390 387 375Z"></path></g><g transform="matrix(.013,0,0,-0.013,57.9,0)"><path d="M275 270C275 450 212 609 64 710L45 686C145 604 203 442 203 270S147 -63 45 -147L64 -170C213 -68 275 89 275 270Z"></path></g></svg>;</span></td></tr><tr><td class="align_left">3. Estimate return <svg height="11.5564pt" id="M31" style="vertical-align:-2.26807pt" version="1.1" viewbox="-0.0498162 -9.28833 23.6135 11.5564" width="23.6135pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M610 18C585 26 567 34 540 68C517 97 499 128 476 171C452 215 425 276 413 304C496 332 570 394 570 494C570 555 545 595 509 619S419 650 364 650H139L133 622C216 615 219 612 203 527L129 132C112 40 105 36 23 28L17 0H279L285 28C199 34 194 40 211 132L239 284H284C320 284 334 275 351 236C374 182 394 140 420 93C459 23 495 -1 592 -8H600L610 18ZM480 485C480 424 449 372 403 342C374 323 338 316 293 316H245L291 562C296 589 301 601 311 608S337 618 358 618C432 618 480 575 480 485Z"></path></g><g transform="matrix(.013,0,0,-0.013,8.151,0)"><path d="M300 -147C201 -63 143 98 143 270S200 602 300 686L282 710C136 610 70 450 70 271V270C70 89 136 -72 282 -170L300 -147Z"></path></g><g transform="matrix(.013,0,0,-0.013,12.649,0)"><path d="M471 456L444 459C426 433 414 430 388 430C324 430 270 434 216 434C103 434 51 374 23 338L43 317C96 366 146 380 221 375L154 109C149 86 147 68 147 52C147 4 168 -12 197 -12C240 -12 291 25 334 71L320 96C295 75 268 58 252 58C238 58 227 79 238 138C251 211 272 296 292 372C310 372 332 368 350 368C391 368 421 369 434 371C444 388 455 413 471 456Z"></path></g><g transform="matrix(.013,0,0,-0.013,18.931,0)"><path d="M275 270C275 450 212 609 64 710L45 686C145 604 203 442 203 270S147 -63 45 -147L64 -170C213 -68 275 89 275 270Z"></path></g></svg> and advantage <span class="nowrap"><svg height="17.0133pt" id="M32" style="vertical-align:-3.5977pt" version="1.1" viewbox="-0.0498162 -13.4156 46.4261 17.0133" width="46.4261pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,1.794,-2.897)"><path d="M658 557C549 593 437 629 340 673H318C222 629 109 593 0 557L9 532C116 551 225 577 329 602C435 576 543 551 649 532L658 557Z"></path></g><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M686 28C612 35 607 44 591 112C563 234 541 360 519 489L489 666L457 658L147 121C100 40 89 36 24 28L17 0H240L250 28C168 34 159 41 190 101L262 237H482C495 180 503 137 510 91C517 47 514 35 441 28L433 0H677L686 28ZM475 280H285L429 541H431L475 280Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,9.135,-6.899)"><path d="M722 297H451L442 267C561 257 561 253 547 182L531 98C521 44 465 24 390 24C228 24 130 134 130 296C130 482 252 628 465 628C592 628 657 579 656 458L692 461C695 540 699 605 702 631C664 642 590 667 492 667C203 667 24 502 24 285C24 93 170 -17 368 -17C464 -17 543 11 617 28C613 43 612 68 620 106L635 182C649 255 652 258 716 268L722 297Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,14.939,-6.899)"><path d="M722 32C646 41 636 48 622 116L544 496L509 667L472 658L153 123C106 45 93 42 27 32L18 0H259L268 32C178 40 172 48 204 107L272 233H509C523 180 533 137 540 96C548 50 546 39 466 32L457 0H714L722 32ZM502 283H299L448 547H450L502 283Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,21.406,-6.899)"><path d="M615 650H141L132 618C223 612 227 607 212 526L136 123C122 42 112 38 27 32L18 0H522C539 33 580 135 592 166L558 179C536 139 512 100 486 75C454 48 420 41 345 41C287 41 252 42 237 55C221 68 220 100 231 158L261 316H357C455 316 455 309 455 235H489L529 437H494C468 366 460 360 369 360H269L305 565C315 609 316 611 357 611H450C525 611 535 599 548 578C559 556 561 526 560 482L595 486L615 650Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,27.076,-6.899)"><path d="M309 -142C211 -61 151 99 151 271S210 601 309 683L288 710C141 611 75 451 75 272V271C75 90 141 -72 288 -169L309 -142Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,30.333,-6.899)"><path d="M493 373C493 420 472 451 445 451C421 451 400 435 400 410C400 404 402 400 406 394S417 371 417 350C417 259 317 128 256 55H254C260 126 256 262 238 340C219 424 194 451 163 451C128 451 78 413 24 329L52 302C89 353 106 370 121 370C129 370 137 363 147 336C186 231 196 73 184 -16C152 -88 120 -192 112 -238L130 -257C157 -253 213 -227 231 -213C231 -187 234 -77 240 -23C251 -6 277 30 309 69C424 208 493 299 493 373Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,35.04,-6.899)"><path d="M98 134C72 134 46 117 46 90C46 73 55 65 60 64C95 55 124 32 124 -4C124 -42 95 -68 44 -89L57 -123C122 -104 194 -60 194 22C194 94 136 134 98 134Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,37.197,-6.899)"><path d="M547 100L523 123C489 76 461 62 452 62C442 62 433 71 427 109C405 243 387 408 378 495C360 666 322 710 263 710C230 710 182 688 161 667L170 639C184 646 203 651 218 651C247 651 272 634 288 562C297 521 300 485 304 436C230 267 111 104 24 11L33 -12L116 6C163 73 263 255 311 362L347 84C355 26 373 -12 406 -12C440 -12 489 12 547 100Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,42.392,-6.899)"><path d="M283 271C283 451 220 610 70 710L48 683C146 603 207 443 207 271S149 -59 48 -142L70 -169C220 -67 283 90 283 271Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,10.244,3.438)"><path d="M329 433H203L239 587L230 596L147 534L123 433H57L30 395L34 388H115L61 129C37 16 59 -12 85 -12C147 -12 222 58 260 98L241 125C212 95 160 62 144 62C132 62 127 71 138 126L192 386L305 394L329 433Z"></path></g></svg>;</span></td></tr><tr><td class="align_left">4. For <i>k</i> = 1 to K do</td></tr><tr><td class="align_left">5. Sample minibatch from the trajectory, calculate policy loss and value loss;</td></tr><tr><td class="align_left">6. Optimize surrogate loss function <span class="nowrap"><svg height="11.5564pt" id="M33" style="vertical-align:-2.26807pt" version="1.1" viewbox="-0.0498162 -9.28833 21.5256 11.5564" width="21.5256pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M429 650H175L169 622C251 614 259 612 244 532L174 152C163 91 152 37 135 -6C109 -72 67 -101 23 -113L33 -144C45 -143 73 -134 102 -119C186 -76 229 -1 254 133L327 532C341 609 347 613 423 622L429 650Z"></path></g><g transform="matrix(.013,0,0,-0.013,5.879,0)"><path d="M300 -147C201 -63 143 98 143 270S200 602 300 686L282 710C136 610 70 450 70 271V270C70 89 136 -72 282 -170L300 -147Z"></path></g><g transform="matrix(.013,0,0,-0.013,10.377,0)"><path d="M475 507C475 612 440 712 326 712C139 712 23 420 23 215C23 96 58 -12 180 -12C369 -12 475 293 475 507ZM391 522C391 486 387 448 379 394H126C155 538 222 677 310 677C386 677 391 571 391 522ZM373 346C344 193 283 22 189 22C126 22 106 114 106 196C106 243 111 293 118 346H373Z"></path></g><g transform="matrix(.013,0,0,-0.013,16.852,0)"><path d="M275 270C275 450 212 609 64 710L45 686C145 604 203 442 203 270S147 -63 45 -147L64 -170C213 -68 275 89 275 270Z"></path></g></svg>;</span></td></tr><tr><td class="align_left">7. Update policy parameter <span class="nowrap"><svg height="11.6425pt" id="M34" style="vertical-align:-0.2063999pt" version="1.1" viewbox="-0.0498162 -11.4361 42.4106 11.6425" width="42.4106pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M475 507C475 612 440 712 326 712C139 712 23 420 23 215C23 96 58 -12 180 -12C369 -12 475 293 475 507ZM391 522C391 486 387 448 379 394H126C155 538 222 677 310 677C386 677 391 571 391 522ZM373 346C344 193 283 22 189 22C126 22 106 114 106 196C106 243 111 293 118 346H373Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,6.475,-5.741)"><path d="M310 541L304 571C290 586 211 619 185 610L80 76L131 52L310 541Z"></path></g><g transform="matrix(.013,0,0,-0.013,13.903,0)"><path d="M885 230V280H158L260 427L238 442C164 361 93 290 53 255C93 220 164 149 238 68L260 83L158 230H885Z"></path></g><g transform="matrix(.013,0,0,-0.013,23.812,0)"><path d="M567 230V280H69V230H567Z"></path></g><g transform="matrix(.013,0,0,-0.013,35.79,0)"><path d="M475 507C475 612 440 712 326 712C139 712 23 420 23 215C23 96 58 -12 180 -12C369 -12 475 293 475 507ZM391 522C391 486 387 448 379 394H126C155 538 222 677 310 677C386 677 391 571 391 522ZM373 346C344 193 283 22 189 22C126 22 106 114 106 196C106 243 111 293 118 346H373Z"></path></g></svg>;</span></td></tr><tr><td class="align_left">8. End for</td></tr><tr><td class="align_left">9. End for</td></tr><tr class="table-tr"><td colspan="1"><hr class="tbody-hr"/></td></tr></table></td></tr></table>

Complexity

tab2

Table 2

Table 2: Deep Reinforcement Learning for UAV Intelligent Mission Planning