Hello!

On Tue, 5 Jul 2005, Andy Polyakov via RT wrote:

>
> >>>Sorry, I've forgotten to mention I use 20050627 snapshot and
> >>>./config -g shared zlib works fine.


> Yet I find it a bit too puzzling... After I've replied I came to realize
> that no-sse2/386 shouldn't actually affect the referred test
> procedure... Indeed, even though there're SSE2 instructions present in
> bn_mul_add_words, they operate on mmx registers and therefore don't
> actually require kernel support. Kernel support required for SSE2
> instructions operating on 128-bit xmm registers, but not on 64-bit mmx
> ones, and therefore if CPUID said SSE2 is available, then
> bn_mul_add_words should have worked... So if you can have time I'd still
> appreciate if you could collect and send over disassemble output in the
> vicinity of segmentation fault, as well as info registers... Once again,
> this is optional. A.


(gdb) bt
#0 0x281330a1 in bn_mul_add_words () from ./libcrypto.so.0.9.8
#1 0x0806038c in ?? ()
#2 0x28133009 in bn_sqr_normal (r=0x282966c4, a=0x24, n=37, tmp=0x28132c51)
at bn_sqr.c:182
#3 0x28132c51 in BN_sqr (r=0x805578c, a=0x28066000, ctx=0x28132abc)
at bn_sqr.c:132
#4 0x0804b58f in test_sqr (bp=0x80610c0, ctx=0x8061080) at bntest.c:691
#5 0x08049d14 in main (argc=671507456, argv=0xbfbffb4c) at bntest.c:215
(gdb) info r
eax 0x30281d48 807935304
ecx 0x8 8
edx 0x8061248 134615624
ebx 0x8061248 134615624
esp 0xbfbff870 0xbfbff870
ebp 0x89890000 0x89890000
esi 0x0 0
edi 0x806038c 134611852
eip 0x281330a1 0x281330a1
eflags 0x206 518
cs 0x1f 31
ss 0x2f 47
ds 0x2f 47
es 0x2f 47
fs 0x2f 47
gs 0x2f 47
(gdb) disassemble
Dump of assembler code for function bn_mul_add_words:
0x28133070 : push %ebp
0x28133071 : push %ebx
0x28133072 : push %esi
0x28133073 : push %edi
0x28133074 : xor %esi,%esi
0x28133076 : mov 0x14(%esp,1),%edi
0x2813307a : mov 0x1c(%esp,1),%ecx
0x2813307e : mov 0x18(%esp,1),%ebx
0x28133082 : and $0xfffffff8,%ecx
0x28133085 : mov 0x20(%esp,1),%ebp
0x28133089 : push %ecx
0x2813308a : je 0x28133259
0x28133090 : call 0x28133095
0x28133095 : pop %eax
0x28133096 : add $0x163630,%eax
0x2813309b : mov 0x2cc(%eax),%eax
0x281330a1 : btl $0x1a,(%eax)
0x281330a5 : jae 0x2813318f
0x281330ab : movd %ebp,%mm0
0x281330ae : pxor %mm1,%mm1
0x281330b1 : movd (%edi),%mm3
0x281330b4 : paddq %mm3,%mm1
0x281330b7 : movd (%ebx),%mm2
0x281330ba : pmuludq %mm0,%mm2
0x281330bd : movd 0x4(%ebx),%mm4
0x281330c1 : pmuludq %mm0,%mm4
0x281330c4 : movd 0x8(%ebx),%mm6
0x281330c8 : pmuludq %mm0,%mm6
0x281330cb : movd 0xc(%ebx),%mm7
0x281330cf : pmuludq %mm0,%mm7
0x281330d2 : paddq %mm2,%mm1
0x281330d5 : movd 0x4(%edi),%mm3
0x281330d9 : paddq %mm4,%mm3
0x281330dc : movd 0x8(%edi),%mm5
0x281330e0 : paddq %mm6,%mm5
0x281330e3 : movd 0xc(%edi),%mm4
0x281330e7 : paddq %mm4,%mm7
0x281330ea : movd %mm1,(%edi)
0x281330ed : movd 0x10(%ebx),%mm2
0x281330f1 : pmuludq %mm0,%mm2
0x281330f4 : psrlq $0x20,%mm1
0x281330f8 : movd 0x14(%ebx),%mm4
0x281330fc : pmuludq %mm0,%mm4
0x281330ff : paddq %mm3,%mm1
0x28133102 : movd 0x18(%ebx),%mm6
0x28133106 : pmuludq %mm0,%mm6
0x28133109 : movd %mm1,0x4(%edi)
0x2813310d : psrlq $0x20,%mm1
0x28133111 : movd 0x1c(%ebx),%mm3
0x28133115 : add $0x20,%ebx
0x28133118 : pmuludq %mm0,%mm3
0x2813311b : paddq %mm5,%mm1
0x2813311e : movd 0x10(%edi),%mm5
0x28133122 : paddq %mm5,%mm2
0x28133125 : movd %mm1,0x8(%edi)
0x28133129 : psrlq $0x20,%mm1
0x2813312d : paddq %mm7,%mm1
0x28133130 : movd 0x14(%edi),%mm5
0x28133134 : paddq %mm5,%mm4
0x28133137 : movd %mm1,0xc(%edi)
0x2813313b : psrlq $0x20,%mm1
0x2813313f : paddq %mm2,%mm1
0x28133142 : movd 0x18(%edi),%mm5
0x28133146 : paddq %mm5,%mm6
0x28133149 : movd %mm1,0x10(%edi)
0x2813314d : psrlq $0x20,%mm1
0x28133151 : paddq %mm4,%mm1
0x28133154 : movd 0x1c(%edi),%mm5
0x28133158 : paddq %mm5,%mm3
0x2813315b : movd %mm1,0x14(%edi)
0x2813315f : psrlq $0x20,%mm1
0x28133163 : paddq %mm6,%mm1
0x28133166 : movd %mm1,0x18(%edi)
0x2813316a : psrlq $0x20,%mm1
0x2813316e : paddq %mm3,%mm1
0x28133171 : movd %mm1,0x1c(%edi)
0x28133175 : add $0x20,%edi
0x28133178 : psrlq $0x20,%mm1
0x2813317c : sub $0x8,%ecx
0x2813317f : jne 0x281330b1
0x28133185 : movd %mm1,%esi
0x28133188 : emms
0x2813318a : jmp 0x28133259
0x2813318f : mov %ecx,(%esp,1)
0x28133192 : mov (%ebx),%eax
0x28133194 : mul %ebp
0x28133196 : add %esi,%eax
0x28133198 : mov (%edi),%esi
0x2813319a : adc $0x0,%edx
0x2813319d : add %esi,%eax
0x2813319f : adc $0x0,%edx
0x281331a2 : mov %eax,(%edi)
0x281331a4 : mov %edx,%esi
0x281331a6 : mov 0x4(%ebx),%eax
0x281331a9 : mul %ebp
0x281331ab : add %esi,%eax
0x281331ad : mov 0x4(%edi),%esi
0x281331b0 : adc $0x0,%edx
0x281331b3 : add %esi,%eax
0x281331b5 : adc $0x0,%edx
0x281331b8 : mov %eax,0x4(%edi)
0x281331bb : mov %edx,%esi
0x281331bd : mov 0x8(%ebx),%eax
0x281331c0 : mul %ebp
0x281331c2 : add %esi,%eax
0x281331c4 : mov 0x8(%edi),%esi
0x281331c7 : adc $0x0,%edx
0x281331ca : add %esi,%eax
0x281331cc : adc $0x0,%edx
0x281331cf : mov %eax,0x8(%edi)
0x281331d2 : mov %edx,%esi
0x281331d4 : mov 0xc(%ebx),%eax
0x281331d7 : mul %ebp
0x281331d9 : add %esi,%eax
0x281331db : mov 0xc(%edi),%esi
0x281331de : adc $0x0,%edx
0x281331e1 : add %esi,%eax
0x281331e3 : adc $0x0,%edx
0x281331e6 : mov %eax,0xc(%edi)
0x281331e9 : mov %edx,%esi
0x281331eb : mov 0x10(%ebx),%eax
0x281331ee : mul %ebp
0x281331f0 : add %esi,%eax
0x281331f2 : mov 0x10(%edi),%esi
0x281331f5 : adc $0x0,%edx
0x281331f8 : add %esi,%eax
0x281331fa : adc $0x0,%edx
0x281331fd : mov %eax,0x10(%edi)
0x28133200 : mov %edx,%esi
0x28133202 : mov 0x14(%ebx),%eax
0x28133205 : mul %ebp
0x28133207 : add %esi,%eax
0x28133209 : mov 0x14(%edi),%esi
0x2813320c : adc $0x0,%edx
0x2813320f : add %esi,%eax
0x28133211 : adc $0x0,%edx
0x28133214 : mov %eax,0x14(%edi)
0x28133217 : mov %edx,%esi
0x28133219 : mov 0x18(%ebx),%eax
0x2813321c : mul %ebp
0x2813321e : add %esi,%eax
0x28133220 : mov 0x18(%edi),%esi
0x28133223 : adc $0x0,%edx
0x28133226 : add %esi,%eax
0x28133228 : adc $0x0,%edx
0x2813322b : mov %eax,0x18(%edi)
0x2813322e : mov %edx,%esi
0x28133230 : mov 0x1c(%ebx),%eax
0x28133233 : mul %ebp
0x28133235 : add %esi,%eax
0x28133237 : mov 0x1c(%edi),%esi
0x2813323a : adc $0x0,%edx
0x2813323d : add %esi,%eax
0x2813323f : adc $0x0,%edx
0x28133242 : mov %eax,0x1c(%edi)
0x28133245 : mov %edx,%esi
0x28133247 : mov (%esp,1),%ecx
0x2813324a : add $0x20,%ebx
0x2813324d : add $0x20,%edi
0x28133250 : sub $0x8,%ecx
0x28133253 : jne 0x2813318f
0x28133259 : mov 0x20(%esp,1),%ecx
0x2813325d : and $0x7,%ecx
0x28133260 : jne 0x28133270
0x28133262 : jmp 0x28133324
0x28133267 : mov %esi,%esi
0x28133269 : lea 0x0(%edi,1),%edi
0x28133270 : mov (%ebx),%eax
0x28133272 : mul %ebp
0x28133274 : add %esi,%eax
0x28133276 : mov (%edi),%esi
0x28133278 : adc $0x0,%edx
0x2813327b : add %esi,%eax
0x2813327d : adc $0x0,%edx
0x28133280 : dec %ecx
0x28133281 : mov %eax,(%edi)
0x28133283 : mov %edx,%esi
0x28133285 : je 0x28133324
0x2813328b : mov 0x4(%ebx),%eax
0x2813328e : mul %ebp
0x28133290 : add %esi,%eax
0x28133292 : mov 0x4(%edi),%esi
0x28133295 : adc $0x0,%edx
0x28133298 : add %esi,%eax
0x2813329a : adc $0x0,%edx
0x2813329d : dec %ecx
0x2813329e : mov %eax,0x4(%edi)
0x281332a1 : mov %edx,%esi
0x281332a3 : je 0x28133324
0x281332a5 : mov 0x8(%ebx),%eax
0x281332a8 : mul %ebp
0x281332aa : add %esi,%eax
0x281332ac : mov 0x8(%edi),%esi
0x281332af : adc $0x0,%edx
0x281332b2 : add %esi,%eax
0x281332b4 : adc $0x0,%edx
0x281332b7 : dec %ecx
0x281332b8 : mov %eax,0x8(%edi)
0x281332bb : mov %edx,%esi
0x281332bd : je 0x28133324
0x281332bf : mov 0xc(%ebx),%eax
0x281332c2 : mul %ebp
0x281332c4 : add %esi,%eax
0x281332c6 : mov 0xc(%edi),%esi
0x281332c9 : adc $0x0,%edx
0x281332cc : add %esi,%eax
0x281332ce : adc $0x0,%edx
0x281332d1 : dec %ecx
0x281332d2 : mov %eax,0xc(%edi)
0x281332d5 : mov %edx,%esi
0x281332d7 : je 0x28133324
0x281332d9 : mov 0x10(%ebx),%eax
0x281332dc : mul %ebp
0x281332de : add %esi,%eax
0x281332e0 : mov 0x10(%edi),%esi
0x281332e3 : adc $0x0,%edx
0x281332e6 : add %esi,%eax
0x281332e8 : adc $0x0,%edx
0x281332eb : dec %ecx
0x281332ec : mov %eax,0x10(%edi)
0x281332ef : mov %edx,%esi
0x281332f1 : je 0x28133324
0x281332f3 : mov 0x14(%ebx),%eax
0x281332f6 : mul %ebp
0x281332f8 : add %esi,%eax
0x281332fa : mov 0x14(%edi),%esi
0x281332fd : adc $0x0,%edx
0x28133300 : add %esi,%eax
0x28133302 : adc $0x0,%edx
0x28133305 : dec %ecx
0x28133306 : mov %eax,0x14(%edi)
0x28133309 : mov %edx,%esi
0x2813330b : je 0x28133324
0x2813330d : mov 0x18(%ebx),%eax
0x28133310 : mul %ebp
0x28133312 : add %esi,%eax
0x28133314 : mov 0x18(%edi),%esi
0x28133317 : adc $0x0,%edx
0x2813331a : add %esi,%eax
0x2813331c : adc $0x0,%edx
0x2813331f : mov %eax,0x18(%edi)
0x28133322 : mov %edx,%esi
0x28133324 : mov %esi,%eax
0x28133326 : pop %ecx
0x28133327 : pop %edi
0x28133328 : pop %esi
0x28133329 : pop %ebx
0x2813332a : pop %ebp
0x2813332b : ret
0x2813332c : lea 0x0(%esi,1),%esi
End of assembler dump.

Thank you!

--
SY, Dmitry Belyavsky (ICQ UIN 11116575)

__________________________________________________ ____________________
OpenSSL Project http://www.openssl.org
Development Mailing List openssl-dev@openssl.org
Automated List Manager majordomo@openssl.org