Supervised Reinforcement Learning for ULV Path Planning in Complex Warehouse Environment

<table class="algorithm-group"><tr><td><table class="algorithm" id="alg1"><tr><td colspan="2"><b>Input:</b> Expert data, initial parameters <svg height="9.39034pt" id="M43" style="vertical-align:-3.42943pt" version="1.1" viewbox="-0.0498162 -5.96091 7.34167 9.39034" width="7.34167pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M538 96L524 119C492 88 454 63 446 63C439 63 434 70 440 101C463 223 491 341 518 448H508L433 422L401 276C355 192 240 56 188 56C163 56 154 89 163 133C184 233 207 338 235 443L230 448L152 424L58 17C40 -60 23 -143 23 -185C23 -241 42 -261 63 -261C92 -261 117 -241 125 -227L124 -221C109 -209 89 -165 89 -103C89 -55 96 -13 105 26H107C121 -3 134 -12 151 -12C172 -12 194 -5 221 16C279 61 330 124 384 194H386C381 159 377 141 368 105C343 3 363 -12 383 -12C416 -12 486 35 538 96Z"></path></g></svg> and <span class="nowrap"><svg height="9.49473pt" id="M44" style="vertical-align:-0.2063999pt" version="1.1" viewbox="-0.0498162 -9.28833 6.59789 9.49473" width="6.59789pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M475 507C475 612 440 712 326 712C139 712 23 420 23 215C23 96 58 -12 180 -12C369 -12 475 293 475 507ZM391 522C391 486 387 448 379 394H126C155 538 222 677 310 677C386 677 391 571 391 522ZM373 346C344 193 283 22 189 22C126 22 106 114 106 196C106 243 111 293 118 346H373Z"></path></g></svg>;</span></td></tr><tr><td colspan="2"><b>for</b><svg height="12.7373pt" id="M45" style="vertical-align:-3.429399pt" version="1.1" viewbox="-0.0498162 -9.3079 89.7023 12.7373" width="89.7023pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M380 106C343 72 306 56 265 56C195 56 116 112 115 248C235 252 361 262 377 265C396 269 400 277 400 297C400 374 333 449 250 449H249C198 449 144 421 103 376S37 269 37 201C37 88 109 -12 232 -12C263 -12 332 6 395 84L380 106ZM225 412C281 412 315 364 314 312C314 297 308 292 290 292C232 290 176 289 120 289C135 370 180 412 225 412Z"></path></g><g transform="matrix(.013,0,0,-0.013,5.525,0)"><path d="M169 380V459C122 440 66 423 24 416V392C86 384 90 382 90 317V-135C90 -201 81 -207 17 -213V-240H253V-213C176 -207 169 -201 169 -125V6C182 -1 208 -11 238 -12C368 12 487 109 487 260C487 358 421 449 310 449C298 449 279 444 261 433L169 380ZM169 346C196 367 237 389 269 389C341 389 403 329 403 221C403 109 347 37 263 37C228 37 191 53 169 76V346Z"></path></g><g transform="matrix(.013,0,0,-0.013,12.18,0)"><path d="M135 536C164 536 186 560 186 587C186 617 164 639 136 639C109 639 85 617 85 587C85 560 109 536 135 536ZM252 0V26C188 32 181 38 181 106V451C138 433 90 420 39 412V388C99 379 102 374 102 312V106C102 38 95 32 32 26V0H252Z"></path></g><g transform="matrix(.013,0,0,-0.013,15.625,0)"><path d="M319 325C317 349 306 409 297 431C277 440 250 449 209 449C117 449 57 389 57 319C57 243 122 209 182 182C232 159 261 135 261 91C261 48 227 21 190 21C130 21 85 79 68 145L41 140C41 104 51 36 58 22C75 7 121 -12 172 -12C252 -12 337 35 337 126C337 195 286 231 210 262C166 281 126 304 126 348C126 388 152 417 191 417C240 417 274 378 294 318L319 325Z"></path></g><g transform="matrix(.013,0,0,-0.013,20.501,0)"><path d="M257 449C165 449 37 374 37 209C37 98 119 -12 256 -12C355 -12 473 65 473 226C473 349 381 449 257 449ZM244 416C333 416 380 320 380 204C380 67 329 21 267 21C184 21 130 115 130 241C130 354 184 416 244 416Z"></path></g><g transform="matrix(.013,0,0,-0.013,27.248,0)"><path d="M517 51L485 54C448 58 441 63 441 115V712C404 700 337 684 285 678V653C357 648 362 645 362 580V437C339 446 309 449 295 449C159 449 38 340 38 201C38 61 143 -12 223 -12C234 -12 261 -6 301 17L362 53V-12C420 9 495 22 517 26V51ZM362 85C338 67 301 51 266 51C201 51 128 109 128 228C128 373 212 411 259 411C296 411 338 395 362 360V85Z"></path></g><g transform="matrix(.013,0,0,-0.013,34.112,0)"><path d="M380 106C343 72 306 56 265 56C195 56 116 112 115 248C235 252 361 262 377 265C396 269 400 277 400 297C400 374 333 449 250 449H249C198 449 144 421 103 376S37 269 37 201C37 88 109 -12 232 -12C263 -12 332 6 395 84L380 106ZM225 412C281 412 315 364 314 312C314 297 308 292 290 292C232 290 176 289 120 289C135 370 180 412 225 412Z"></path></g><g transform="matrix(.013,0,0,-0.013,41.961,0)"><path d="M244 607C244 633 228 655 200 655C166 655 146 618 146 594C146 564 166 546 191 546C221 546 244 574 244 607ZM222 91L209 114C184 94 148 66 133 66C127 66 124 73 130 96L201 370C213 416 211 448 191 448C162 448 88 407 29 352L42 328C73 354 104 371 114 371C120 371 119 365 115 345L53 92C32 5 45 -12 68 -12C103 -12 186 50 222 91Z"></path></g><g transform="matrix(.013,0,0,-0.013,49.142,0)"><path d="M535 323V373H52V323H535ZM535 138V188H52V138H535Z"></path></g><g transform="matrix(.013,0,0,-0.013,60.405,0)"><path d="M384 0V27C293 34 287 42 287 114V635C232 613 172 594 109 583V559L157 557C201 555 205 550 205 499V114C205 42 199 34 109 27V0H384Z"></path></g><g transform="matrix(.013,0,0,-0.013,68.82,0)"><path d="M338 431H221L257 582L247 588L172 525L149 431H88L63 400L66 391H140L82 120C59 17 79 -12 102 -12C155 -12 231 55 264 95L249 116C225 91 177 58 160 58C149 58 143 69 154 118L212 392L312 396L338 431Z"></path></g><g transform="matrix(.013,0,0,-0.013,72.681,0)"><path d="M178 -12C234 -12 289 15 332 51C402 109 439 207 439 293C439 392 389 449 307 449C275 449 234 436 198 416C115 373 46 273 46 143C46 63 83 -12 178 -12ZM214 28C161 28 130 75 130 149C130 302 197 384 236 402C247 407 259 410 274 410C320 410 355 372 355 291C356 163 300 57 247 34C236 30 226 28 214 28Z"></path></g><g transform="matrix(.013,0,0,-0.013,81.004,0)"><path d="M642 419L635 448C586 435 552 411 532 392C518 379 509 351 496 304C484 260 462 196 445 150C424 93 388 40 314 30L413 508L406 510L344 491L257 36C205 46 175 84 175 169C175 195 180 241 185 277C189 304 190 331 190 358C190 413 175 448 141 448C111 448 69 429 23 373L30 347C51 365 68 375 81 375C93 375 108 368 108 327C108 307 104 268 101 239C98 211 95 180 95 155C95 33 159 -8 248 -14L213 -186C206 -220 221 -254 230 -261L261 -230L306 -11C339 -4 366 6 389 20C431 46 473 87 503 155C533 224 557 286 571 325C586 367 606 393 642 419Z"></path></g></svg><b>do</b></td></tr><tr><td colspan="2"> Update the discriminator <svg height="9.25986pt" id="M46" style="vertical-align:-0.2455397pt" version="1.1" viewbox="-0.0498162 -9.01432 11.6478 9.25986" width="11.6478pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M810 587C770 574 761 568 725 543C654 630 540 687 408 687C217 687 69 577 69 430C69 319 160 247 276 247C369 247 511 319 511 495C511 510 512 523 509 538H490C490 354 393 277 274 277C174 277 110 344 110 422C110 559 219 655 415 655C524 655 636 602 695 519C628 463 572 392 516 303C433 170 377 97 313 57C252 89 199 109 143 109C79 109 36 87 36 54C36 15 82 -9 165 -9C222 -9 276 0 326 18C373 -1 423 -15 482 -15C645 -15 804 133 804 324C804 393 783 458 747 514C776 546 781 551 818 567L810 587ZM756 336C756 135 623 16 478 16C439 16 402 24 367 36C386 45 401 56 420 68C505 127 569 206 616 302C647 362 679 427 719 480C744 435 756 387 756 336ZM276 39C243 26 207 21 164 21C106 21 77 32 77 54C77 75 125 79 144 79C191 79 227 66 276 39Z"></path></g></svg> by ascending the stochastic gradient;</td></tr><tr><td colspan="2"> Update the internal rewards <svg height="9.14241pt" id="M47" style="vertical-align:-3.1815pt" version="1.1" viewbox="-0.0498162 -5.96091 17.0065 9.14241" width="17.0065pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M689 332C689 394 670 448 646 448C620 448 597 421 597 396C597 386 600 381 608 372C619 359 620 334 620 315C620 150 538 45 454 45C414 45 386 67 386 122C386 138 388 158 394 180L457 426L452 432L377 416L315 156C302 100 259 45 216 45C176 45 148 67 148 122C148 133 152 158 156 180C162 212 173 259 194 332C201 357 206 384 206 405C206 430 198 448 174 448C125 448 66 406 23 342L43 319C84 368 110 383 121 383C126 383 128 382 128 377C128 370 127 359 122 343C99 268 84 204 77 156C74 137 70 111 70 104C70 25 125 -12 180 -12C228 -12 276 12 319 50C338 8 378 -12 418 -12C549 -12 689 166 689 332Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,8.931,3.132)"><path d="M135 536C164 536 186 560 186 587C186 617 164 639 136 639C109 639 85 617 85 587C85 560 109 536 135 536ZM252 0V26C188 32 181 38 181 106V451C138 433 90 420 39 412V388C99 379 102 374 102 312V106C102 38 95 32 32 26V0H252Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,11.37,3.132)"><path d="M524 0V26C466 32 460 36 460 104V297C460 393 411 449 331 449C302 449 276 437 248 419C223 402 201 387 181 372V451C137 432 90 420 42 411V388C96 378 102 374 102 310V104C102 38 97 33 29 26V0H246V26C187 32 181 36 181 104V339C211 365 250 390 290 390C357 390 381 345 381 276V109C381 40 374 32 315 26V0H524Z"></path></g></svg> and external rewards <span class="nowrap"><svg height="9.23375pt" id="M48" style="vertical-align:-3.27284pt" version="1.1" viewbox="-0.0498162 -5.96091 17.7555 9.23375" width="17.7555pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M689 332C689 394 670 448 646 448C620 448 597 421 597 396C597 386 600 381 608 372C619 359 620 334 620 315C620 150 538 45 454 45C414 45 386 67 386 122C386 138 388 158 394 180L457 426L452 432L377 416L315 156C302 100 259 45 216 45C176 45 148 67 148 122C148 133 152 158 156 180C162 212 173 259 194 332C201 357 206 384 206 405C206 430 198 448 174 448C125 448 66 406 23 342L43 319C84 368 110 383 121 383C126 383 128 382 128 377C128 370 127 359 122 343C99 268 84 204 77 156C74 137 70 111 70 104C70 25 125 -12 180 -12C228 -12 276 12 319 50C338 8 378 -12 418 -12C549 -12 689 166 689 332Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,8.931,3.132)"><path d="M380 106C343 72 306 56 265 56C195 56 116 112 115 248C235 252 361 262 377 265C396 269 400 277 400 297C400 374 333 449 250 449H249C198 449 144 421 103 376S37 269 37 201C37 88 109 -12 232 -12C263 -12 332 6 395 84L380 106ZM225 412C281 412 315 364 314 312C314 297 308 292 290 292C232 290 176 289 120 289C135 370 180 412 225 412Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,12.798,3.132)"><path d="M474 0V26C414 34 401 43 364 100L267 248C300 297 324 332 345 358C381 400 394 405 455 411V437H272V411C316 406 323 401 305 370C287 337 267 306 247 276L188 369C169 397 173 405 215 411V437H16V411C71 404 83 396 114 348L201 212C171 167 144 127 116 92C77 42 66 34 4 26V0H190V26C139 34 136 43 156 77C175 113 198 150 220 183L294 66C311 39 302 31 260 26V0H474Z"></path></g></svg>;</span></td></tr><tr><td colspan="2"> Update the value function <svg height="13.8999pt" id="M49" style="vertical-align:-3.94351pt" version="1.1" viewbox="-0.0498162 -9.95639 14.8639 13.8999" width="14.8639pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M697 650H468L461 623L492 619C539 613 547 605 518 546C481 471 367 264 278 116H276C239 278 197 500 186 567C180 604 185 613 226 619L252 623L260 650H24L17 623C78 617 92 613 108 533L216 -11H247C365 200 515 462 560 529C616 612 624 615 689 623L697 650Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,9.224,-5.741)"><path d="M525 275C525 381 464 451 380 451C338 451 289 439 241 405C158 347 127 277 106 152C51 -168 37 -216 24 -242L28 -257C42 -256 92 -247 117 -222C126 -213 134 -193 150 -69L166 32C184 10 211 -12 245 -12C276 -12 332 8 384 46C458 100 525 176 525 275ZM436 260C436 131 366 41 267 41C246 41 208 51 175 82C181 125 187 162 195 200C220 322 257 400 328 400C398 400 436 338 436 260Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,7.332,3.784)"><path d="M387 400C387 425 348 451 303 451C247 451 176 414 132 376C69 322 24 228 24 148C24 43 74 -12 147 -12C211 -12 301 33 363 103L346 128C319 99 249 51 193 51C148 51 112 84 112 165C112 230 130 287 154 330C170 359 199 400 243 400C277 400 304 383 326 354C333 345 343 343 354 348C378 360 387 382 387 400Z"></path></g></svg> by <span class="nowrap"><svg height="9.25202pt" id="M50" style="vertical-align:-3.29111pt" version="1.1" viewbox="-0.0498162 -5.96091 14.8235 9.25202" width="14.8235pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M689 332C689 394 670 448 646 448C620 448 597 421 597 396C597 386 600 381 608 372C619 359 620 334 620 315C620 150 538 45 454 45C414 45 386 67 386 122C386 138 388 158 394 180L457 426L452 432L377 416L315 156C302 100 259 45 216 45C176 45 148 67 148 122C148 133 152 158 156 180C162 212 173 259 194 332C201 357 206 384 206 405C206 430 198 448 174 448C125 448 66 406 23 342L43 319C84 368 110 383 121 383C126 383 128 382 128 377C128 370 127 359 122 343C99 268 84 204 77 156C74 137 70 111 70 104C70 25 125 -12 180 -12C228 -12 276 12 319 50C338 8 378 -12 418 -12C549 -12 689 166 689 332Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,8.931,3.132)"><path d="M558 454L532 470C512 451 497 442 465 442C421 442 376 450 318 450C146 450 24 313 24 164C24 46 91 -12 183 -12C317 -12 439 105 439 247C439 306 428 346 382 385L383 389C414 384 450 379 485 379C517 379 539 412 558 454ZM357 273C357 191 313 28 205 28C149 28 111 79 111 160C111 266 175 396 280 396C298 396 328 386 341 357C353 331 357 315 357 273Z"></path></g></svg>;</span></td></tr><tr><td colspan="2"> Update the policy <svg height="9.39034pt" id="M51" style="vertical-align:-3.42943pt" version="1.1" viewbox="-0.0498162 -5.96091 7.13289 9.39034" width="7.13289pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M516 277C516 378 460 448 376 448C335 448 288 434 240 401C158 345 127 277 107 159C53 -161 38 -221 23 -247L28 -261C43 -260 95 -250 114 -226C122 -216 129 -199 146 -75C152 -29 157 0 164 31C182 9 206 -12 237 -12C267 -12 325 8 376 46C451 102 516 179 516 277ZM432 259C432 134 366 40 259 40C236 40 204 48 171 76L195 203C219 327 253 400 326 400C398 400 432 336 432 259Z"></path></g></svg> of the DRL by <span class="nowrap"><svg height="13.8999pt" id="M52" style="vertical-align:-3.94351pt" version="1.1" viewbox="-0.0498162 -9.95639 14.8639 13.8999" width="14.8639pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M697 650H468L461 623L492 619C539 613 547 605 518 546C481 471 367 264 278 116H276C239 278 197 500 186 567C180 604 185 613 226 619L252 623L260 650H24L17 623C78 617 92 613 108 533L216 -11H247C365 200 515 462 560 529C616 612 624 615 689 623L697 650Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,9.224,-5.741)"><path d="M525 275C525 381 464 451 380 451C338 451 289 439 241 405C158 347 127 277 106 152C51 -168 37 -216 24 -242L28 -257C42 -256 92 -247 117 -222C126 -213 134 -193 150 -69L166 32C184 10 211 -12 245 -12C276 -12 332 8 384 46C458 100 525 176 525 275ZM436 260C436 131 366 41 267 41C246 41 208 51 175 82C181 125 187 162 195 200C220 322 257 400 328 400C398 400 436 338 436 260Z"></path></g><g transform="matrix(.0091,0,0,-0.0091,7.332,3.784)"><path d="M387 400C387 425 348 451 303 451C247 451 176 414 132 376C69 322 24 228 24 148C24 43 74 -12 147 -12C211 -12 301 33 363 103L346 128C319 99 249 51 193 51C148 51 112 84 112 165C112 230 130 287 154 330C170 359 199 400 243 400C277 400 304 383 326 354C333 345 343 343 354 348C378 360 387 382 387 400Z"></path></g></svg>;</span></td></tr><tr><td colspan="2"><b>end</b></td></tr></table></td></tr></table>

<div> The Training Procedure of the SDRL.</div>

Wireless Communications and Mobile Computing

alg1

Algorithm 1

Algorithm 1: Supervised Reinforcement Learning for ULV Path Planning in Complex Warehouse Environment