943,580 Members | Top Members by Rank

Ad:
  • Assembly Discussion Thread
  • Unsolved
  • Views: 2530
  • Assembly RSS
Jun 2nd, 2009
0

how many registers can i use in intel i7

Expand Post »
Hi,
how many 64 bit registers can I use inside intel i7 cpu for storage purposes to feed them later into XMM registers? I currently use XMM0-15, MM0-8, R8-15 only. I know i can use RAX,RBX,RCX, RDX and eight registers inside the FPU (ST0-ST8), but what others can I use? Can I use stack registers? Thanks in advance.

I attach my application code if needed.
Assembly Syntax (Toggle Plain Text)
  1. ///////////////////////////////////////////
  2. pipe_line_math.h
  3. #include <stdio.h>
  4. #include <stdlib.h>
  5. #include <time.h>
  6.  
  7. void pipe_mult_ushort(ushort *data,ushort *rands)
  8. {
  9. __asm__ __volatile__(".intel_syntax noprefix\n\t"
  10. //// in this section we pull as much data as we can into the CPU
  11. //// to minimize the DRAM delay and store it where we can
  12.  
  13. "movdqa xmm0,[edi]\n\t" // load xmm0 & xmm1
  14. "movdqa xmm1,[esi]\n\t"
  15.  
  16. "movdqa xmm2,0x10[edi]\n\t" // load xmm2 & xmm3
  17. "movdqa xmm3,0x10[esi]\n\t"
  18.  
  19. "movdqa xmm4,0x20[edi]\n\t" // load xmm4 & xmm5
  20. "movdqa xmm5,0x20[esi]\n\t"
  21.  
  22. "movdqa xmm6,0x30[edi]\n\t" // load xmm6 & xmm7
  23. "movdqa xmm7,0x30[esi]\n\t"
  24.  
  25. "movdqa xmm8,0x40[edi]\n\t" // load xmm8 & xmm9
  26. "movdqa xmm9,0x40[esi]\n\t"
  27.  
  28. "movdqa xmm10,0x50[edi]\n\t" // load xmm10 & xmm11
  29. "movdqa xmm11,0x50[esi]\n\t"
  30.  
  31. "movdqa xmm12,0x60[edi]\n\t" // load xmm12 & xmm13
  32. "movdqa xmm13,0x60[esi]\n\t"
  33.  
  34. "movdqa xmm14,0x70[edi]\n\t" // load xmm14 & xmm15
  35. "movdqa xmm15,0x70[esi]\n\t"
  36.  
  37. "movq mm0,0x80[edi]\n\t" // load mmx0
  38. "movq mm1,0x80[esi]\n\t"
  39. "movq mm2,0x88[edi]\n\t"
  40. "movq mm3,0x88[esi]\n\t"
  41. "movq mm4,0x90[edi]\n\t"
  42. "movq mm5,0x90[esi]\n\t"
  43. "movq mm6,0x98[edi]\n\t"
  44. "movq mm7,0x98[esi]\n\t"
  45.  
  46. "movq r8,0xA0[edi]\n\t" // store some in extended 64bit registers
  47. "movq r9,0xA0[esi]\n\t"
  48. "movq r10,0xA8[edi]\n\t"
  49. "movq r11,0xA8[esi]\n\t"
  50. "movq r12,0xB0[edi]\n\t"
  51. "movq r13,0xB0[esi]\n\t"
  52. "movq r14,0xB8[edi]\n\t"
  53. "movq r15,0xB8[esi]\n\t"
  54.  
  55. // all available registers were data can be stored were filled, proceed with calcs now
  56. // calc xmms first
  57. "pmullw xmm0,xmm1\n\t" // calc xmm0
  58. "pmullw xmm2,xmm3\n\t" // calc xmm2
  59. "pmullw xmm4,xmm5\n\t" // calc xmm4
  60. "pmullw xmm6,xmm7\n\t" // calc xmm6
  61. "pmullw xmm8,xmm9\n\t" // calc xmm8
  62. "pmullw xmm10,xmm11\n\t" // calc xmm10
  63. "pmullw xmm12,xmm13\n\t" // calc xmm12
  64. "pmullw xmm14,xmm15\n\t" // calc xmm14
  65.  
  66. // calc mms second
  67. "pmullw mm0,mm1\n\t" // calc mm0
  68. "pmullw mm2,mm3\n\t" // calc mm0
  69. "pmullw mm4,mm5\n\t" // calc mm0
  70. "pmullw mm6,mm7\n\t" // calc mm0
  71.  
  72. // send xmm values to memory
  73. "movdqa [edi],xmm0\n\t" // xmm0 -> memory
  74. "movdqa 0x10[edi],xmm2\n\t" // xmm2 -> memory
  75. "movdqa 0x20[edi],xmm4\n\t" // xmm4 -> memory
  76. "movdqa 0x30[edi],xmm6\n\t" // xmm6 -> memory
  77. "movdqa 0x40[edi],xmm8\n\t" // xmm8 -> memory
  78. "movdqa 0x50[edi],xmm10\n\t" // xmm10 -> memory
  79. "movdqa 0x60[edi],xmm12\n\t" // xmm12 -> memory
  80. "movdqa 0x70[edi],xmm14\n\t" // xmm14 -> memory
  81.  
  82. // send mm values to memory
  83. "movq 0x80[edi],mm0\n\t" // mm0 -> memory
  84. "movq 0x88[edi],mm2\n\t" // mm2 -> memory
  85. "movq 0x90[edi],mm4\n\t" // mm4 -> memory
  86. "movq 0x98[edi],mm6\n\t" // mm6 -> memory
  87.  
  88. // xmms & mms are free now
  89. // load mms from 'r's
  90. "movq mm0,r8\n\t" // move saved 'r' to mm
  91. "movq mm1,r9\n\t" // move saved 'r' to mm
  92. "movq mm2,r10\n\t" // move saved 'r' to mm
  93. "movq mm3,r11\n\t" // move saved 'r' to mm
  94. "movq mm4,r12\n\t" // move saved 'r' to mm
  95. "movq mm5,r13\n\t" // move saved 'r' to mm
  96. "movq mm6,r14\n\t" // move saved 'r' to mm
  97. "movq mm7,r15\n\t" // move saved 'r' to mm
  98. // calc mms
  99. "pmullw mm0,mm1\n\t" // calc mms copied from 'r's
  100. "pmullw mm2,mm3\n\t" // calc mms copied from 'r's
  101. "pmullw mm4,mm5\n\t" // calc mms copied from 'r's
  102. "pmullw mm6,mm7\n\t" // calc mms copied from 'r's
  103. // send mm values to memory
  104. "movq 0xA0[edi],mm0\n\t" // mm0 -> memory
  105. "movq 0xA8[edi],mm2\n\t" // mm2 -> memory
  106. "movq 0xB0[edi],mm4\n\t" // mm4 -> memory
  107. "movq 0xB8[edi],mm6\n\t" // mm6 -> memory
  108.  
  109. :
  110. : "D" (data) ,"S" (rands)
  111. : "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7",
  112. "xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15",
  113. "mm0","mm1","mm2","mm3","mm4","mm5","mm6","mm7",
  114. "r8","r9","r10","r11","r12","r13","r14","r15",
  115. "memory");
  116.  
  117. }
  118.  
  119.  
  120. ///////////////////////////////////////////
  121. #include <stdio.h>
  122. #include <stdlib.h>
  123. #include <time.h>
  124.  
  125. #include "pipe_line_math.h"
  126.  
  127. #define _ARRAY_SIZE_ 256*256*256*24
  128. #define _ELTS_PER_PIPE_ 112
  129. ushort __attribute__ ((aligned (16))) rands[_ARRAY_SIZE_];
  130. ushort __attribute__ ((aligned (16))) data[_ARRAY_SIZE_];
  131.  
  132. struct timespec tspec1;
  133. struct timespec tspec2;
  134.  
  135.  
  136. main() {
  137. ulong i,max;
  138. double diff;
  139.  
  140. for (i=0;i<_ARRAY_SIZE_;i++) { /// fill with any data
  141. rands[i]=i%4;
  142. data[i]=i*2+i;
  143. }
  144.  
  145. max=_ARRAY_SIZE_/_ELTS_PER_PIPE_;
  146. clock_gettime(CLOCK_REALTIME,&tspec1);
  147. for (i=0;i<max;i=i+_ELTS_PER_PIPE_) pipe_mult_ushort(&data[i],&rands[i]);
  148. for (i=0;i<max;i=i+_ELTS_PER_PIPE_) pipe_mult_ushort(&data[i],&rands[i]); // one more time
  149. clock_gettime(CLOCK_REALTIME,&tspec2);
  150. diff=((double)tspec2.tv_sec+(double)tspec2.tv_nsec/1000000000.0)-((double)tspec1.tv_sec+tspec1.tv_nsec/1000000000.0);
  151. printf("time pipeline multiply:\nstart: %d:%d\n end: %d:%d ; total diff: %f\n",tspec1.tv_sec,tspec1.tv_nsec,tspec2.tv_sec,tspec2.tv_nsec,diff);
  152. printf("sample data:\n");
  153. for (i=0;i<64;i++) {
  154. printf("%d,",data[i]);
  155. if (!((i+1)%16)) printf("\n");
  156.  
  157. }
  158.  
  159. return(0);
  160. }
Similar Threads
Reputation Points: 10
Solved Threads: 0
Newbie Poster
nuliknol is offline Offline
10 posts
since May 2009
Jun 3rd, 2009
0

Re: how many registers can i use in intel i7

Reputation Points: 99
Solved Threads: 5
Junior Poster
Evenbit is offline Offline
140 posts
since Mar 2005

This thread is more than three months old

No one has posted to this discussion for at least three months. Please let old threads die and do not reply to them unless you feel you have something new and valuable to contribute that absolutely must be added to make the discussion complete. Otherwise, please start a new thread in this forum instead.
Message:
Previous Thread in Assembly Forum Timeline: i7 instruction costs
Next Thread in Assembly Forum Timeline: fistp to general purpose reg ?





About Us | Contact Us | Advertise | Acceptable Use Policy
Forum Index | Build Custom RSS Feed


Follow us on Twitter


© 2011 DaniWeb® LLC