网上找到一个C语言程序,其中嵌入了下边这段汇编语言,看不懂啊,还请各位达人帮忙转换下,非常感谢!!!
_asm {
push eax
push ebx
push ecx
push edx
push edi
mov eax, puc_out
mov ebx, puc_y
mov ecx, puc_u
mov edx, puc_v
mov edi, horiz_count
horiz_loop:
movd mm2, [ecx]
pxor mm7, mm7
movd mm3, [edx]
punpcklbw mm2, mm7 ; mm2 = __u3__u2__u1__u0
movq mm0, [ebx] ; mm0 = y7y6y5y4y3y2y1y0
punpcklbw mm3, mm7 ; mm3 = __v3__v2__v1__v0
movq mm1, mmw_0x00ff ; mm1 = 00ff00ff00ff00ff
psubusb mm0, mmb_0x10 ; mm0 -= 16
psubw mm2, mmw_0x0080 ; mm2 -= 128
pand mm1, mm0 ; mm1 = __y6__y4__y2__y0
psubw mm3, mmw_0x0080 ; mm3 -= 128
psllw mm1, 3 ; mm1 *= 8
psrlw mm0, 8 ; mm0 = __y7__y5__y3__y1
psllw mm2, 3 ; mm2 *= 8
pmulhw mm1, mmw_mult_Y ; mm1 *= luma coeff
psllw mm0, 3 ; mm0 *= 8
psllw mm3, 3 ; mm3 *= 8
movq mm5, mm3 ; mm5 = mm3 = v
pmulhw mm5, mmw_mult_V_R ; mm5 = red chroma
movq mm4, mm2 ; mm4 = mm2 = u
pmulhw mm0, mmw_mult_Y ; mm0 *= luma coeff
movq mm7, mm1 ; even luma part
pmulhw mm2, mmw_mult_U_G ; mm2 *= u green coeff
paddsw mm7, mm5 ; mm7 = luma + chroma __r6__r4__r2__r0
pmulhw mm3, mmw_mult_V_G ; mm3 *= v green coeff
packuswb mm7, mm7 ; mm7 = r6r4r2r0r6r4r2r0
pmulhw mm4, mmw_mult_U_B ; mm4 = blue chroma
paddsw mm5, mm0 ; mm5 = luma + chroma __r7__r5__r3__r1
packuswb mm5, mm5 ; mm6 = r7r5r3r1r7r5r3r1
paddsw mm2, mm3 ; mm2 = green chroma
movq mm3, mm1 ; mm3 = __y6__y4__y2__y0
movq mm6, mm1 ; mm6 = __y6__y4__y2__y0
paddsw mm3, mm4 ; mm3 = luma + chroma __b6__b4__b2__b0
paddsw mm6, mm2 ; mm6 = luma + chroma __g6__g4__g2__g0
punpcklbw mm7, mm5 ; mm7 = r7r6r5r4r3r2r1r0
paddsw mm2, mm0 ; odd luma part plus chroma part __g7__g5__g3__g1
packuswb mm6, mm6 ; mm2 = g6g4g2g0g6g4g2g0
packuswb mm2, mm2 ; mm2 = g7g5g3g1g7g5g3g1
packuswb mm3, mm3 ; mm3 = b6b4b2b0b6b4b2b0
paddsw mm4, mm0 ; odd luma part plus chroma part __b7__b5__b3__b1
packuswb mm4, mm4 ; mm4 = b7b5b3b1b7b5b3b1
punpcklbw mm6, mm2 ; mm6 = g7g6g5g4g3g2g1g0
punpcklbw mm3, mm4 ; mm3 = b7b6b5b4b3b2b1b0
/* 32-bit shuffle.... */
pxor mm0, mm0 ; is this needed?
movq mm1, mm6 ; mm1 = g7g6g5g4g3g2g1g0
punpcklbw mm1, mm0 ; mm1 = __g3__g2__g1__g0
movq mm0, mm3 ; mm0 = b7b6b5b4b3b2b1b0
punpcklbw mm0, mm7 ; mm0 = r3b3r2b2r1b1r0b0
movq mm2, mm0 ; mm2 = r3b3r2b2r1b1r0b0
punpcklbw mm0, mm1 ; mm0 = __r1g1b1__r0g0b0
punpckhbw mm2, mm1 ; mm2 = __r3g3b3__r2g2b2
/* 24-bit shuffle and save... */
movd [eax], mm0 ; eax[0] = __r0g0b0
psrlq mm0, 32 ; mm0 = __r1g1b1
movd 3[eax], mm0 ; eax[3] = __r1g1b1
movd 6[eax], mm2 ; eax[6] = __r2g2b2
psrlq mm2, 32 ; mm2 = __r3g3b3
movd 9[eax], mm2 ; eax[9] = __r3g3b3
/* 32-bit shuffle.... */
pxor mm0, mm0 ; is this needed?
movq mm1, mm6 ; mm1 = g7g6g5g4g3g2g1g0
punpckhbw mm1, mm0 ; mm1 = __g7__g6__g5__g4
movq mm0, mm3 ; mm0 = b7b6b5b4b3b2b1b0