| | |
how many registers can i use in intel i7
![]() |
•
•
Join Date: May 2009
Posts: 5
Reputation:
Solved Threads: 0
Hi,
how many 64 bit registers can I use inside intel i7 cpu for storage purposes to feed them later into XMM registers? I currently use XMM0-15, MM0-8, R8-15 only. I know i can use RAX,RBX,RCX, RDX and eight registers inside the FPU (ST0-ST8), but what others can I use? Can I use stack registers? Thanks in advance.
I attach my application code if needed.
how many 64 bit registers can I use inside intel i7 cpu for storage purposes to feed them later into XMM registers? I currently use XMM0-15, MM0-8, R8-15 only. I know i can use RAX,RBX,RCX, RDX and eight registers inside the FPU (ST0-ST8), but what others can I use? Can I use stack registers? Thanks in advance.
I attach my application code if needed.
Assembly Syntax (Toggle Plain Text)
/////////////////////////////////////////// pipe_line_math.h #include <stdio.h> #include <stdlib.h> #include <time.h> void pipe_mult_ushort(ushort *data,ushort *rands) { __asm__ __volatile__(".intel_syntax noprefix\n\t" //// in this section we pull as much data as we can into the CPU //// to minimize the DRAM delay and store it where we can "movdqa xmm0,[edi]\n\t" // load xmm0 & xmm1 "movdqa xmm1,[esi]\n\t" "movdqa xmm2,0x10[edi]\n\t" // load xmm2 & xmm3 "movdqa xmm3,0x10[esi]\n\t" "movdqa xmm4,0x20[edi]\n\t" // load xmm4 & xmm5 "movdqa xmm5,0x20[esi]\n\t" "movdqa xmm6,0x30[edi]\n\t" // load xmm6 & xmm7 "movdqa xmm7,0x30[esi]\n\t" "movdqa xmm8,0x40[edi]\n\t" // load xmm8 & xmm9 "movdqa xmm9,0x40[esi]\n\t" "movdqa xmm10,0x50[edi]\n\t" // load xmm10 & xmm11 "movdqa xmm11,0x50[esi]\n\t" "movdqa xmm12,0x60[edi]\n\t" // load xmm12 & xmm13 "movdqa xmm13,0x60[esi]\n\t" "movdqa xmm14,0x70[edi]\n\t" // load xmm14 & xmm15 "movdqa xmm15,0x70[esi]\n\t" "movq mm0,0x80[edi]\n\t" // load mmx0 "movq mm1,0x80[esi]\n\t" "movq mm2,0x88[edi]\n\t" "movq mm3,0x88[esi]\n\t" "movq mm4,0x90[edi]\n\t" "movq mm5,0x90[esi]\n\t" "movq mm6,0x98[edi]\n\t" "movq mm7,0x98[esi]\n\t" "movq r8,0xA0[edi]\n\t" // store some in extended 64bit registers "movq r9,0xA0[esi]\n\t" "movq r10,0xA8[edi]\n\t" "movq r11,0xA8[esi]\n\t" "movq r12,0xB0[edi]\n\t" "movq r13,0xB0[esi]\n\t" "movq r14,0xB8[edi]\n\t" "movq r15,0xB8[esi]\n\t" // all available registers were data can be stored were filled, proceed with calcs now // calc xmms first "pmullw xmm0,xmm1\n\t" // calc xmm0 "pmullw xmm2,xmm3\n\t" // calc xmm2 "pmullw xmm4,xmm5\n\t" // calc xmm4 "pmullw xmm6,xmm7\n\t" // calc xmm6 "pmullw xmm8,xmm9\n\t" // calc xmm8 "pmullw xmm10,xmm11\n\t" // calc xmm10 "pmullw xmm12,xmm13\n\t" // calc xmm12 "pmullw xmm14,xmm15\n\t" // calc xmm14 // calc mms second "pmullw mm0,mm1\n\t" // calc mm0 "pmullw mm2,mm3\n\t" // calc mm0 "pmullw mm4,mm5\n\t" // calc mm0 "pmullw mm6,mm7\n\t" // calc mm0 // send xmm values to memory "movdqa [edi],xmm0\n\t" // xmm0 -> memory "movdqa 0x10[edi],xmm2\n\t" // xmm2 -> memory "movdqa 0x20[edi],xmm4\n\t" // xmm4 -> memory "movdqa 0x30[edi],xmm6\n\t" // xmm6 -> memory "movdqa 0x40[edi],xmm8\n\t" // xmm8 -> memory "movdqa 0x50[edi],xmm10\n\t" // xmm10 -> memory "movdqa 0x60[edi],xmm12\n\t" // xmm12 -> memory "movdqa 0x70[edi],xmm14\n\t" // xmm14 -> memory // send mm values to memory "movq 0x80[edi],mm0\n\t" // mm0 -> memory "movq 0x88[edi],mm2\n\t" // mm2 -> memory "movq 0x90[edi],mm4\n\t" // mm4 -> memory "movq 0x98[edi],mm6\n\t" // mm6 -> memory // xmms & mms are free now // load mms from 'r's "movq mm0,r8\n\t" // move saved 'r' to mm "movq mm1,r9\n\t" // move saved 'r' to mm "movq mm2,r10\n\t" // move saved 'r' to mm "movq mm3,r11\n\t" // move saved 'r' to mm "movq mm4,r12\n\t" // move saved 'r' to mm "movq mm5,r13\n\t" // move saved 'r' to mm "movq mm6,r14\n\t" // move saved 'r' to mm "movq mm7,r15\n\t" // move saved 'r' to mm // calc mms "pmullw mm0,mm1\n\t" // calc mms copied from 'r's "pmullw mm2,mm3\n\t" // calc mms copied from 'r's "pmullw mm4,mm5\n\t" // calc mms copied from 'r's "pmullw mm6,mm7\n\t" // calc mms copied from 'r's // send mm values to memory "movq 0xA0[edi],mm0\n\t" // mm0 -> memory "movq 0xA8[edi],mm2\n\t" // mm2 -> memory "movq 0xB0[edi],mm4\n\t" // mm4 -> memory "movq 0xB8[edi],mm6\n\t" // mm6 -> memory : : "D" (data) ,"S" (rands) : "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7", "xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15", "mm0","mm1","mm2","mm3","mm4","mm5","mm6","mm7", "r8","r9","r10","r11","r12","r13","r14","r15", "memory"); } /////////////////////////////////////////// #include <stdio.h> #include <stdlib.h> #include <time.h> #include "pipe_line_math.h" #define _ARRAY_SIZE_ 256*256*256*24 #define _ELTS_PER_PIPE_ 112 ushort __attribute__ ((aligned (16))) rands[_ARRAY_SIZE_]; ushort __attribute__ ((aligned (16))) data[_ARRAY_SIZE_]; struct timespec tspec1; struct timespec tspec2; main() { ulong i,max; double diff; for (i=0;i<_ARRAY_SIZE_;i++) { /// fill with any data rands[i]=i%4; data[i]=i*2+i; } max=_ARRAY_SIZE_/_ELTS_PER_PIPE_; clock_gettime(CLOCK_REALTIME,&tspec1); for (i=0;i<max;i=i+_ELTS_PER_PIPE_) pipe_mult_ushort(&data[i],&rands[i]); for (i=0;i<max;i=i+_ELTS_PER_PIPE_) pipe_mult_ushort(&data[i],&rands[i]); // one more time clock_gettime(CLOCK_REALTIME,&tspec2); diff=((double)tspec2.tv_sec+(double)tspec2.tv_nsec/1000000000.0)-((double)tspec1.tv_sec+tspec1.tv_nsec/1000000000.0); printf("time pipeline multiply:\nstart: %d:%d\n end: %d:%d ; total diff: %f\n",tspec1.tv_sec,tspec1.tv_nsec,tspec2.tv_sec,tspec2.tv_nsec,diff); printf("sample data:\n"); for (i=0;i<64;i++) { printf("%d,",data[i]); if (!((i+1)%16)) printf("\n"); } return(0); }
For 64-bit specific information, look here:
http://www.turboirc.com/asm/
http://www.vikaskumar.org/wiki/index...86-64_Tutorial
http://www.x86-64.org/
http://www.viva64.com/links/64-bit-development/
http://www.turboirc.com/asm/
http://www.vikaskumar.org/wiki/index...86-64_Tutorial
http://www.x86-64.org/
http://www.viva64.com/links/64-bit-development/
while (CPU is present) {some assembly required}
![]() |
Similar Threads
- Help me convert this Intel Assembly Instruction to GCC Compatible Inline asm (AT&T)! (Assembly)
- Registers problem (Assembly)
- Intel stack (Assembly)
- My Intel Graphics Card SUCKS (Monitors, Displays and Video Cards)
- AMD or Intel... (Motherboards, CPUs and RAM)
- problems with my intel 82845 chipset (Motherboards, CPUs and RAM)
- RH9 + XP + VMWare + Intel (*nix Software)
- Belkin F5D6020 PCMCIA with Intel AnyPoint Problems (Troubleshooting Dead Machines)
Other Threads in the Assembly Forum
- Previous Thread: i7 instruction costs
- Next Thread: fistp to general purpose reg ?
| Thread Tools | Search this Thread |





