A Video Classification Method Based on Spatiotemporal Detail Attention and Feature Fusion

<table class="table-group" id="tab4"><tr><td><table class="table"><tr><td class="thead-hr" colspan="4"><hr/></td></tr><tr class="thead"><td class="align_left"></td><td class="align_center">Top1</td><td class="align_center">Top5</td><td class="align_center">GFLOPs</td></tr><tr><td class="thead-hr" colspan="4"><hr/></td></tr><tr><td class="align_left">I3D [<a href="/journals/misy/2022/4213335/#B15" target="_blank">15</a>]</td><td class="align_center">71.9</td><td class="align_center">90.1</td><td class="align_center">108</td></tr><tr><td class="align_left">StNet-IRv2 RGB [<a href="/journals/misy/2022/4213335/#B51" target="_blank">51</a>]</td><td class="align_center">79.0</td><td class="align_center">—</td><td class="align_center">—</td></tr><tr><td class="align_left">AttentionNAS [<a href="/journals/misy/2022/4213335/#B5" target="_blank">5</a>]</td><td class="align_center">79.8</td><td class="align_center">94.4</td><td class="align_center">—</td></tr><tr><td class="align_left">LGD-3D R101 [<a href="/journals/misy/2022/4213335/#B52" target="_blank">52</a>]</td><td class="align_center">81.5</td><td class="align_center">95.6</td><td class="align_center">—</td></tr><tr><td class="align_left">SlowFast <span class="nowrap"><svg height="8.55521pt" id="M91" style="vertical-align:-0.2063904pt" version="1.1" viewbox="-0.0498162 -8.34882 32.3604 8.55521" width="32.3604pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M384 0V27C293 34 287 42 287 114V635C232 613 172 594 109 583V559L157 557C201 555 205 550 205 499V114C205 42 199 34 109 27V0H384Z"></path></g><g transform="matrix(.013,0,0,-0.013,6.24,0)"><path d="M137 343C167 482 260 545 321 574C357 591 397 603 429 609L423 641C382 634 335 622 295 608C189 570 37 457 37 238C37 84 125 -12 242 -12C362 -12 447 89 447 209C447 311 374 393 267 393C247 393 226 386 204 376L137 343ZM227 337C318 337 361 256 361 173C361 105 336 22 258 22C176 22 126 120 126 240C126 266 127 291 132 310C155 323 189 337 227 337Z"></path></g><g transform="matrix(.013,0,0,-0.013,15.386,0)"><path d="M471 153C471 170 463 194 452 212C400 220 373 229 322 255C373 281 400 290 452 298C463 316 471 339 471 357C456 366 431 371 410 370C377 329 356 310 308 279C311 336 317 364 336 413C326 432 310 451 294 459C279 451 262 432 252 413C271 364 277 336 280 279C232 310 211 329 178 370C157 371 132 367 117 357C117 340 125 316 136 298C188 290 215 281 266 255C215 229 188 220 136 212C125 194 117 171 117 153C132 144 157 139 178 140C211 181 232 200 280 231C277 174 271 146 252 97C262 78 278 59 294 51C309 59 326 78 336 97C317 146 311 174 308 231C356 200 377 181 410 140C431 139 456 143 471 153Z"></path></g><g transform="matrix(.013,0,0,-0.013,25.922,0)"><path d="M249 635C141 635 70 555 70 471C70 401 114 353 179 316C143 294 106 267 90 252C68 231 45 202 45 157C45 50 130 -12 237 -12C322 -12 435 52 435 169C435 256 372 304 303 343C349 374 375 398 383 407C401 429 411 458 411 487C411 569 344 635 249 635ZM238 603C285 603 337 567 337 482C337 422 310 385 276 358C205 393 145 426 145 500C145 552 179 603 238 603ZM248 20C183 20 125 70 125 163C125 218 158 268 206 300C284 261 355 217 355 143C355 66 308 20 248 20Z"></path></g></svg>,</span> R101 [<a href="/journals/misy/2022/4213335/#B3" target="_blank">3</a>]</td><td class="align_center">81.1</td><td class="align_center">95.1</td><td class="align_center">213</td></tr><tr><td class="align_left">SlowFast <span class="nowrap"><svg height="8.55521pt" id="M92" style="vertical-align:-0.2063904pt" version="1.1" viewbox="-0.0498162 -8.34882 32.3604 8.55521" width="32.3604pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M384 0V27C293 34 287 42 287 114V635C232 613 172 594 109 583V559L157 557C201 555 205 550 205 499V114C205 42 199 34 109 27V0H384Z"></path></g><g transform="matrix(.013,0,0,-0.013,6.24,0)"><path d="M137 343C167 482 260 545 321 574C357 591 397 603 429 609L423 641C382 634 335 622 295 608C189 570 37 457 37 238C37 84 125 -12 242 -12C362 -12 447 89 447 209C447 311 374 393 267 393C247 393 226 386 204 376L137 343ZM227 337C318 337 361 256 361 173C361 105 336 22 258 22C176 22 126 120 126 240C126 266 127 291 132 310C155 323 189 337 227 337Z"></path></g><g transform="matrix(.013,0,0,-0.013,15.386,0)"><path d="M471 153C471 170 463 194 452 212C400 220 373 229 322 255C373 281 400 290 452 298C463 316 471 339 471 357C456 366 431 371 410 370C377 329 356 310 308 279C311 336 317 364 336 413C326 432 310 451 294 459C279 451 262 432 252 413C271 364 277 336 280 279C232 310 211 329 178 370C157 371 132 367 117 357C117 340 125 316 136 298C188 290 215 281 266 255C215 229 188 220 136 212C125 194 117 171 117 153C132 144 157 139 178 140C211 181 232 200 280 231C277 174 271 146 252 97C262 78 278 59 294 51C309 59 326 78 336 97C317 146 311 174 308 231C356 200 377 181 410 140C431 139 456 143 471 153Z"></path></g><g transform="matrix(.013,0,0,-0.013,25.922,0)"><path d="M249 635C141 635 70 555 70 471C70 401 114 353 179 316C143 294 106 267 90 252C68 231 45 202 45 157C45 50 130 -12 237 -12C322 -12 435 52 435 169C435 256 372 304 303 343C349 374 375 398 383 407C401 429 411 458 411 487C411 569 344 635 249 635ZM238 603C285 603 337 567 337 482C337 422 310 385 276 358C205 393 145 426 145 500C145 552 179 603 238 603ZM248 20C183 20 125 70 125 163C125 218 158 268 206 300C284 261 355 217 355 143C355 66 308 20 248 20Z"></path></g></svg>,</span> R101+NL [<a href="/journals/misy/2022/4213335/#B3" target="_blank">3</a>]</td><td class="align_center">81.8</td><td class="align_center">95.1</td><td class="align_center">234</td></tr><tr><td class="align_left">TSN [<a href="/journals/misy/2022/4213335/#B1" target="_blank">1</a>]</td><td class="align_center">71.7</td><td class="align_center">90.6</td><td class="align_center">33</td></tr><tr><td class="align_left">TSM [<a href="/journals/misy/2022/4213335/#B2" target="_blank">2</a>]</td><td class="align_center">75.6</td><td class="align_center">92.1</td><td class="align_center">65</td></tr><tr><td class="align_left">VCM-SDD <span class="nowrap"><svg height="8.69875pt" id="M93" style="vertical-align:-0.3499298pt" version="1.1" viewbox="-0.0498162 -8.34882 26.097 8.69875" width="26.097pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M249 635C141 635 70 555 70 471C70 401 114 353 179 316C143 294 106 267 90 252C68 231 45 202 45 157C45 50 130 -12 237 -12C322 -12 435 52 435 169C435 256 372 304 303 343C349 374 375 398 383 407C401 429 411 458 411 487C411 569 344 635 249 635ZM238 603C285 603 337 567 337 482C337 422 310 385 276 358C205 393 145 426 145 500C145 552 179 603 238 603ZM248 20C183 20 125 70 125 163C125 218 158 268 206 300C284 261 355 217 355 143C355 66 308 20 248 20Z"></path></g><g transform="matrix(.013,0,0,-0.013,9.145,0)"><path d="M528 54L331 254L528 455L492 493L294 291L96 493L60 455L257 254L60 54L96 16L294 217L492 16L528 54Z"></path></g><g transform="matrix(.013,0,0,-0.013,19.682,0)"><path d="M137 343C167 482 260 545 321 574C357 591 397 603 429 609L423 641C382 634 335 622 295 608C189 570 37 457 37 238C37 84 125 -12 242 -12C362 -12 447 89 447 209C447 311 374 393 267 393C247 393 226 386 204 376L137 343ZM227 337C318 337 361 256 361 173C361 105 336 22 258 22C176 22 126 120 126 240C126 266 127 291 132 310C155 323 189 337 227 337Z"></path></g></svg>,</span> R101_NP</td><td class="align_center">79.6</td><td class="align_center">94.3</td><td class="align_center">46.8</td></tr><tr><td class="align_left">VCM-SDD <span class="nowrap"><svg height="8.69875pt" id="M94" style="vertical-align:-0.3499298pt" version="1.1" viewbox="-0.0498162 -8.34882 26.097 8.69875" width="26.097pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M249 635C141 635 70 555 70 471C70 401 114 353 179 316C143 294 106 267 90 252C68 231 45 202 45 157C45 50 130 -12 237 -12C322 -12 435 52 435 169C435 256 372 304 303 343C349 374 375 398 383 407C401 429 411 458 411 487C411 569 344 635 249 635ZM238 603C285 603 337 567 337 482C337 422 310 385 276 358C205 393 145 426 145 500C145 552 179 603 238 603ZM248 20C183 20 125 70 125 163C125 218 158 268 206 300C284 261 355 217 355 143C355 66 308 20 248 20Z"></path></g><g transform="matrix(.013,0,0,-0.013,9.145,0)"><path d="M528 54L331 254L528 455L492 493L294 291L96 493L60 455L257 254L60 54L96 16L294 217L492 16L528 54Z"></path></g><g transform="matrix(.013,0,0,-0.013,19.682,0)"><path d="M137 343C167 482 260 545 321 574C357 591 397 603 429 609L423 641C382 634 335 622 295 608C189 570 37 457 37 238C37 84 125 -12 242 -12C362 -12 447 89 447 209C447 311 374 393 267 393C247 393 226 386 204 376L137 343ZM227 337C318 337 361 256 361 173C361 105 336 22 258 22C176 22 126 120 126 240C126 266 127 291 132 310C155 323 189 337 227 337Z"></path></g></svg>,</span> R101</td><td class="align_center">80.4</td><td class="align_center">94.7</td><td class="align_center">46.8</td></tr><tr><td class="align_left">VCM-SDD <span class="nowrap"><svg height="8.69875pt" id="M95" style="vertical-align:-0.3499298pt" version="1.1" viewbox="-0.0498162 -8.34882 32.3604 8.69875" width="32.3604pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M384 0V27C293 34 287 42 287 114V635C232 613 172 594 109 583V559L157 557C201 555 205 550 205 499V114C205 42 199 34 109 27V0H384Z"></path></g><g transform="matrix(.013,0,0,-0.013,6.24,0)"><path d="M137 343C167 482 260 545 321 574C357 591 397 603 429 609L423 641C382 634 335 622 295 608C189 570 37 457 37 238C37 84 125 -12 242 -12C362 -12 447 89 447 209C447 311 374 393 267 393C247 393 226 386 204 376L137 343ZM227 337C318 337 361 256 361 173C361 105 336 22 258 22C176 22 126 120 126 240C126 266 127 291 132 310C155 323 189 337 227 337Z"></path></g><g transform="matrix(.013,0,0,-0.013,15.386,0)"><path d="M528 54L331 254L528 455L492 493L294 291L96 493L60 455L257 254L60 54L96 16L294 217L492 16L528 54Z"></path></g><g transform="matrix(.013,0,0,-0.013,25.922,0)"><path d="M137 343C167 482 260 545 321 574C357 591 397 603 429 609L423 641C382 634 335 622 295 608C189 570 37 457 37 238C37 84 125 -12 242 -12C362 -12 447 89 447 209C447 311 374 393 267 393C247 393 226 386 204 376L137 343ZM227 337C318 337 361 256 361 173C361 105 336 22 258 22C176 22 126 120 126 240C126 266 127 291 132 310C155 323 189 337 227 337Z"></path></g></svg>,</span> R101_NP</td><td class="align_center">81.3</td><td class="align_center">94.9</td><td class="align_center">46.8</td></tr><tr><td class="align_left">VCM-SDD <span class="nowrap"><svg height="8.69875pt" id="M96" style="vertical-align:-0.3499298pt" version="1.1" viewbox="-0.0498162 -8.34882 32.3604 8.69875" width="32.3604pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g transform="matrix(.013,0,0,-0.013,0,0)"><path d="M384 0V27C293 34 287 42 287 114V635C232 613 172 594 109 583V559L157 557C201 555 205 550 205 499V114C205 42 199 34 109 27V0H384Z"></path></g><g transform="matrix(.013,0,0,-0.013,6.24,0)"><path d="M137 343C167 482 260 545 321 574C357 591 397 603 429 609L423 641C382 634 335 622 295 608C189 570 37 457 37 238C37 84 125 -12 242 -12C362 -12 447 89 447 209C447 311 374 393 267 393C247 393 226 386 204 376L137 343ZM227 337C318 337 361 256 361 173C361 105 336 22 258 22C176 22 126 120 126 240C126 266 127 291 132 310C155 323 189 337 227 337Z"></path></g><g transform="matrix(.013,0,0,-0.013,15.386,0)"><path d="M528 54L331 254L528 455L492 493L294 291L96 493L60 455L257 254L60 54L96 16L294 217L492 16L528 54Z"></path></g><g transform="matrix(.013,0,0,-0.013,25.922,0)"><path d="M137 343C167 482 260 545 321 574C357 591 397 603 429 609L423 641C382 634 335 622 295 608C189 570 37 457 37 238C37 84 125 -12 242 -12C362 -12 447 89 447 209C447 311 374 393 267 393C247 393 226 386 204 376L137 343ZM227 337C318 337 361 256 361 173C361 105 336 22 258 22C176 22 126 120 126 240C126 266 127 291 132 310C155 323 189 337 227 337Z"></path></g></svg>,</span> R101</td><td class="align_center">81.9</td><td class="align_center">95.3</td><td class="align_center">46.8</td></tr><tr class="table-tr"><td colspan="4"><hr class="tbody-hr"/></td></tr></table></td></tr></table>

<div>The comparison between this algorithm and other methods on the kinetics 600.</div>

Mobile Information Systems

tab4

Table 4

Table 4: A Video Classification Method Based on Spatiotemporal Detail Attention and Feature Fusion