mirror of
https://github.com/nyanmisaka/ffmpeg-rockchip.git
synced 2026-01-24 02:20:56 +01:00
lavc/aarch64: Fix addp overflow in ff_pred16x16_plane_neon_10
The mismatch between neon and C functions can be reproduced using the following bitstream and command line on aarch64 devices. wget https://streams.videolan.org/ffmpeg/incoming/replay_intra_pred_16x16.h264 ./ffmpeg -cpuflags 0 -threads 1 -i replay_intra_pred_16x16.h264 -f framemd5 -y md5_ref ./ffmpeg -threads 1 -i replay_intra_pred_16x16.h264 -f framemd5 -y md5_neon Signed-off-by: Bin Peng <pengbin@visionular.com> (cherry picked from commit 3115c0c0e6c27c689a02a7267dcf8e61fa2ac425)
This commit is contained in:
parent
71007e6c12
commit
b660192fb8
1 changed files with 6 additions and 7 deletions
|
|
@ -489,10 +489,10 @@ function ff_pred16x16_plane_neon_10, export=1
|
|||
mul v2.8h, v2.8h, v0.8h
|
||||
mul v3.8h, v3.8h, v0.8h
|
||||
addp v2.8h, v2.8h, v3.8h
|
||||
addp v2.8h, v2.8h, v2.8h
|
||||
addp v2.4h, v2.4h, v2.4h
|
||||
sshll v3.4s, v2.4h, #2
|
||||
saddw v2.4s, v3.4s, v2.4h
|
||||
saddlp v2.4s, v2.8h
|
||||
addp v2.4s, v2.4s, v2.4s
|
||||
shl v3.4s, v2.4s, #2
|
||||
add v2.4s, v3.4s, v2.4s
|
||||
rshrn v4.4h, v2.4s, #6
|
||||
trn2 v5.4h, v4.4h, v4.4h
|
||||
add v2.4h, v4.4h, v5.4h
|
||||
|
|
@ -506,14 +506,13 @@ function ff_pred16x16_plane_neon_10, export=1
|
|||
sxtl v6.4s, v5.4h // c
|
||||
|
||||
mov v0.h[0], wzr
|
||||
mul v0.8h, v0.8h, v4.h[0]
|
||||
dup v16.4s, v2.s[0]
|
||||
dup v17.4s, v2.s[0]
|
||||
dup v2.8h, v4.h[0] // b
|
||||
dup v3.4s, v6.s[0] // c
|
||||
sshll v2.4s, v2.4h, #3 // b * 8
|
||||
saddw v16.4s, v16.4s, v0.4h
|
||||
saddw2 v17.4s, v17.4s, v0.8h
|
||||
smlal v16.4s, v0.4h, v4.h[0]
|
||||
smlal2 v17.4s, v0.8h, v4.h[0]
|
||||
sub v3.4s, v3.4s, v2.4s
|
||||
|
||||
mov w3, #16
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue