Translate

Unlocking the Power: Compiler Optimization and Testing in C/C++ vs Python

Compiler optimization and testing:
After compiler optimization, the performance of C/C++ is greatly improved, with some steps even requiring 10 billion executions before results can be obtained due to the speed being too fast. Cxxdroid uses CLang with only the -O3 parameter, which automatically uses the acceleration instruction set. Termux uses g++ with support for a large number of parameters and initial tests show that it is much faster than Cxxdroid. However, there are still many areas that need to be researched and tested.
Under the same optimization conditions, it is impossible to compare the performance of Python with that of C because the use of many advanced types working together incurs a significant cost.

#include "mylib.h"
using namespace std;
int testPerformance (int ts = 10000 * 1000){
//	print_ulimit_a();

	printf("Testing Performance for %ld Millions times:\n",ts/1000/1000);
	clock_t start, finish;
	long double total_time;
	map<int , long double> omap;
	unordered_map<int,long double> umap;
	unordered_map<string, string> kmap;
	vector<int> vlist;
	string strpp;
	myChar mychar;
	 char *cst=(char*)malloc(ts*sizeof(char));
	vector<int> ls;
int *ls1=(int*)malloc(ts*sizeof(int));
	string str;


char testC[]="123456";


start = clock();
int lenC=strlen(testC);
	for (long long i = 0; i <ts; i++)
	{
		mychar.cat(testC,lenC);
	}
	
	finish = clock();
	total_time = (long double)(finish - start) / CLOCKS_PER_SEC;
	printf("myChar cat : %Lf secs,%Lf  M/sec\n", total_time, ts / total_time / 1000.0 / 1000);	
charsView(mychar,10,32);

start = clock();
	for (long long  i = 0; i <ts*100; i++)
	{
		
		char temps=mychar.chars[i%ts];
	}
	finish = clock();
	total_time = (long double)(finish - start) / CLOCKS_PER_SEC;
	printf("myChar get for 100X %d M times : %Lf secs,%Lf  M/sec\n", ts/1000/1000,total_time, ts / total_time / 1000.0 / 1000);


start = clock();
	for (long long i = 0; i <ts; i++)
	{
		strpp+=testC;
	}
	finish = clock();
	total_time = (long double)(finish - start) / CLOCKS_PER_SEC;
	printf("C++ string add ;%Lf secs,%Lf  M/sec\n", total_time, ts / total_time / 1000.0 / 1000);	

start = clock();
	for (long long  i = 0; i <ts; i++)
	{
		char temps=strpp.at(i);
	
	}

	finish = clock();
	total_time = (long double)(finish - start) / CLOCKS_PER_SEC;
	printf("C++ string get ;%Lf secs,%Lf  M/sec\n", total_time, ts / total_time / 1000.0 / 1000);
start = clock();
	for (long long i = 0; i <ts; i++)
	{
		vlist.push_back(i);
	}
	finish = clock();
	total_time = (long double)(finish - start) / CLOCKS_PER_SEC;
	printf("STD::vector push back ;%Lf secs,%Lf  M/sec\n", total_time, ts / total_time / 1000.0 / 1000);	

start = clock();
	for (long long i = 0; i <ts*100; i++)
	{
		 int temp=vlist[i%ts];
	}
	finish = clock();
	total_time = (long double)(finish - start) / CLOCKS_PER_SEC;
	printf("STD::vector get for 100X %d M times : %Lf secs,%Lf  M/sec\n", ts/1000/1000,total_time, ts / total_time / 1000.0 / 1000);	


//return 0;
start = clock();
	for (long long i = 0; i <ts; i++)
	{
		umap[i]=i;
	}
	finish = clock();
	total_time = (long double)(finish - start) / CLOCKS_PER_SEC;
	printf("STD::unordered_map set :%Lf secs,%Lf  M/sec\n", total_time, ts / total_time / 1000.0 / 1000);	

start = clock();
	for (long long i = 0; i <ts; i++)
	{
	long double temp=umap[i];
	}
	finish = clock();
	total_time = (long double)(finish - start) / CLOCKS_PER_SEC;
	printf("STD::unordered_map get :%Lf secs,%Lf  M/sec\n", total_time, ts / total_time / 1000.0 / 1000.0);	
			
	start = clock();
	for (long long i = 0; i <ts; i++)
	{
		ls1[i] = i;
		//ls.push_back(i);
		//str+="g";
	}
	finish = clock();
	total_time = (long double)(finish - start) / CLOCKS_PER_SEC;
	printf("C int array set :%Lf secs,%Lf  M/sec\n", total_time, ts / total_time / 1000.0/ 1000);
	
start = clock();
	for (long long i = 0; i <ts*100;i++)
	{
		int v = ls1[i%ts];
		//int v=ls[i];
		//str+="g";
	}
	finish = clock();
	total_time = (long double)(finish - start) / CLOCKS_PER_SEC;
	printf("C int array 100X %d M times : %Lf secs,%Lf  M/sec\n", ts/1000/1000,total_time, ts / total_time / 1000.0 / 1000);
	start = clock();
	for (long long i = 0; i <ts; i++)
	{
		cst[i]=i%256;
		//ls.push_back(i);
		//str+="g";
	}
	finish = clock();
	total_time = (long double)(finish - start) / CLOCKS_PER_SEC;
	printf("C char array set : %Lf secs,%Lf  M/sec\n", total_time, ts / total_time / 1000.0 / 1000);
	
start = clock();
	for (long long i = 0; i <ts*100;i++)
	{
		char v = cst[i%ts];
		//int v=ls[i];
		//str+="g";
	}
	finish = clock();
	total_time = (long double)(finish - start) / CLOCKS_PER_SEC;
	printf("C char array 100X %d M times : %Lf secs,%Lf  M/sec\n", ts/1000/1000,total_time, ts / total_time / 1000.0 / 1000);	

free(mychar.chars);		
free(ls1);
ls1=NULL;
free(cst);
cst=NULL;
printf("Testing Python 3.1.1 ,run loop for 1,000,000 Times:\n");
system("python test2.py");
return 0;	
	}
	

int main()
{	

testPerformance(10000*10000);
long double t0,t;	
char url[]="http://localhost:8000/static/test.mp3";
char url1[]="http://localhost:8000/static/chat.js";
char url2[]="http://localhost:8000/static/in.mp4";

printf("Testing download file from  localhost and write to File with myChar:\n");
char *uri=url2;
t0=clock();
myChar uc=readURL(uri);
t=(clock()-t0)/CLOCKS_PER_SEC;
printf("Downloaed %s in %Lf secs,%Lf MB/sec\n",uri,t,uc.length/t/1000/1000);
charsView(uc,10,16);


/*

t0=clock();
char path[]="o.mp4";
writeToFile(path,uc.chars,uc.length);
t=(clock()-t0)/CLOCKS_PER_SEC;
printf("Data written  to File %s  in %Lf secs,%Lf MB/sec\n",path,t,uc.length/t/1000/1000);
*/
free(uc.chars);
return 0;

	
}

The C language's is incredibly fast! 😱
If the same task was done in Python, it would take days to get the same results.
The test.cpp:


#include "mylib.h"

using namespace std;

int testPerformance (int ts = 10000 * 1000){

//	print_ulimit_a();

	printf("Testing Performance for %ld Millions times:\n",ts/1000/1000);

	clock_t start, finish;

	long double total_time;

	map<int , long double> omap;

	unordered_map<int,long double> umap;

	unordered_map<string, string> kmap;

	vector<int> vlist;

	string strpp;

	myChar mychar;

	 char *cst=(char*)malloc(ts*sizeof(char));

	vector<int> ls;

int *ls1=(int*)malloc(ts*sizeof(int));

	string str;

char testC[]="123456";

 myChar s;

s.cat("Hello ");

 s.cat("world ");

myChar s1=s*1000*1000;

 s1.cat("!");

char *p1=strstr(s1.chars,"Hello world !");

long long p1p=(long long)(p1-s1.chars+1);

printf("muChar for search test:\n");

printf("\n");

charsView(s1,20,11);

start=clock();

for (long long i =0;i<ts;i++){

	if (p1p!=s1.find("Hello world !")){

		printf("Test failed: %ld,,%ld\n",p1p,s1.find("world !"));

		break;

	}

}

finish=clock();

total_time = (long double)(finish - start) / CLOCKS_PER_SEC;

	printf("myChar search  str with length=%ld for %d Millions times : %Lf secs,%Lf  Billions/sec\n", s1.length,ts/1000/1000,total_time, ts / total_time / 1000.0 / 1000/1000);	 

start = clock();

int lenC=strlen(testC);

	for (long long i = 0; i <ts; i++)

	{

		mychar.cat(testC,lenC);

	}

	

	finish = clock();

	total_time = (long double)(finish - start) / CLOCKS_PER_SEC;

	printf("myChar cat : %Lf secs,%Lf  M/sec\n", total_time, ts / total_time / 1000.0 / 1000);	

charsView(mychar,10,32);

start = clock();

	for (long long  i = 0; i <ts*100; i++)

	{

	

		char temps=mychar.chars[i%ts];

	}

	finish = clock();

	total_time = (long double)(finish - start) / CLOCKS_PER_SEC;

	printf("myChar get for 100X %d M times : %Lf secs,%Lf  Billions/sec\n", ts/1000/1000,total_time, ts / total_time / 1000.0 / 1000/1000);

start = clock();

	for (long long i = 0; i <ts; i++)

	{

		strpp+=testC;

	}

	finish = clock();

	total_time = (long double)(finish - start) / CLOCKS_PER_SEC;

	printf("C++ string add ;%Lf secs,%Lf  M/sec\n", total_time, ts / total_time / 1000.0 / 1000);	

start = clock();

	for (long long  i = 0; i <ts; i++)

	{

		char temps=strpp.at(i);

	

	}

	finish = clock();

	total_time = (long double)(finish - start) / CLOCKS_PER_SEC;

	printf("C++ string get ;%Lf secs,%Lf  M/sec\n", total_time, ts / total_time / 1000.0 / 1000);

start = clock();

	for (long long i = 0; i <ts; i++)

	{

		vlist.push_back(i);

	}

	finish = clock();

	total_time = (long double)(finish - start) / CLOCKS_PER_SEC;

	printf("STD::vector push back ;%Lf secs,%Lf  M/sec\n", total_time, ts / total_time / 1000.0 / 1000);	

start = clock();

	for (long long i = 0; i <ts*100; i++)

	{

		 int temp=vlist[i%ts];

	}

	finish = clock();

	total_time = (long double)(finish - start) / CLOCKS_PER_SEC;

	printf("STD::vector get for 100X %d M times : %Lf secs,%Lf  Billions/sec\n", ts/1000/1000,total_time, ts / total_time / 1000.0 / 1000/1000);	

//return 0;

start = clock();

	for (long long i = 0; i <ts; i++)

	{

		umap[i]=i;

	}

	finish = clock();

	total_time = (long double)(finish - start) / CLOCKS_PER_SEC;

	printf("STD::unordered_map set :%Lf secs,%Lf  M/sec\n", total_time, ts / total_time / 1000.0 / 1000);	

start = clock();

	for (long long i = 0; i <ts; i++)

	{

	long double temp=umap[i];

	}

	finish = clock();

	total_time = (long double)(finish - start) / CLOCKS_PER_SEC;

	printf("STD::unordered_map get :%Lf secs,%Lf  M/sec\n", total_time, ts / total_time / 1000.0 / 1000.0);	

			

	start = clock();

	for (long long i = 0; i <ts; i++)

	{

		ls1[i] = i;

		//ls.push_back(i);

		//str+="g";

	}

	finish = clock();

	total_time = (long double)(finish - start) / CLOCKS_PER_SEC;

	printf("C int array set :%Lf secs,%Lf  Billions/sec\n", total_time, ts / total_time / 1000.0/ 1000/1000);

	

start = clock();

	for (long long i = 0; i <ts*100;i++)

	{

		int v = ls1[i%ts];

		//int v=ls[i];

		//str+="g";

	}

	finish = clock();

	total_time = (long double)(finish - start) / CLOCKS_PER_SEC;

	printf("C int array 100X %d M times : %Lf secs,%Lf  Billions/sec\n", ts/1000/1000,total_time, ts / total_time / 1000.0 / 1000/1000);

	start = clock();

	for (long long i = 0; i <ts; i++)

	{

		cst[i]=i%256;

		//ls.push_back(i);

		//str+="g";

	}

	finish = clock();

	total_time = (long double)(finish - start) / CLOCKS_PER_SEC;

	printf("C char array set : %Lf secs,%Lf  Billions/sec\n", total_time, ts / total_time / 1000.0 /1000/ 1000);

	

start = clock();

	for (long long i = 0; i <ts*100;i++)

	{

		char v = cst[i%ts];

		//int v=ls[i];

		//str+="g";

	}

	finish = clock();

	total_time = (long double)(finish - start) / CLOCKS_PER_SEC;

	printf("C char array 100X %d M times : %Lf secs,%Lf  Billions/sec\n", ts/1000/1000,total_time, ts / total_time / 1000.0 /1000/ 1000);	

//free(mychar.chars);		

free(ls1);

ls1=NULL;

free(cst);

cst=NULL;

printf("Testing Python 3.1.1 ,run loop for 1,000,000 Times:\n");

system("python test2.py");

return 0;	

	}

		

int main()

{

testPerformance(10000*10000);

long double t0,t;	

char url[]="http://localhost:8000/static/test.mp3";

char url1[]="http://localhost:8000/static/chat.js";

char url2[]="http://localhost:8000/static/in.mp4";

printf("Testing download file from  localhost and write to File with myChar:\n");

char *uri=url2;

t0=clock();

myChar uc=readURL(uri);

t=(clock()-t0)/CLOCKS_PER_SEC;

printf("Downloaed %s in %Lf secs,%Lf MB/sec\n",uri,t,uc.length/t/1000/1000);

charsView(uc,20,16);

/*

t0=clock();

char path[]="o.mp4";

writeToFile(path,uc.chars,uc.length);

t=(clock()-t0)/CLOCKS_PER_SEC;

printf("Data written  to File %s  in %Lf secs,%Lf MB/sec\n",path,t,uc.length/t/1000/1000);

//*/

//free(uc.chars);

return 0;

	

}

The test results on Termux:


(fast) brian@localhost:/sdcard/Documents/Cxxdroid$

Testing Performance for 100 Millions times:

muChar for search test:

Size :12.000000 M,   20 sample slices with witdth=11:

Hello world  -...- Hello world  -...- Hello world  -...- Hello world  -...- Hello world  -...- Hello world  -...- Hello world  -...- Hello world  -...- Hello world  -...- Hello world  -...- Hello world  -...- Hello world  -...- Hello world  -...- Hello world  -...- Hello world  -...- Hello world  -...- Hello world  -...- Hello world  -...- Hello world  -...- Hello world  -...- ![0][0][0][0][0][0][0][0][0][0]  -...-

myChar search  str with length=12000001 for 100 Millions times : 0.002188 secs,45.703839  Billions/sec

myChar cat : 0.323255 secs,309.353297  M/sec

Size :600.000000 M,   10 sample slices with witdth=32:

12345612345612345612345612345612  -...- 12345612345612345612345612345612  -...- 12345612345612345612345612345612  -...- 12345612345612345612345612345612  -...- 12345612345612345612345612345612  -...- 12345612345612345612345612345612  -...- 12345612345612345612345612345612  -...- 12345612345612345612345612345612  -...- 12345612345612345612345612345612  -...- 12345612345612345612345612345612  -...-

myChar get for 100X 100 M times : 0.000001 secs,100000.000000  Billions/sec

C++ string add ;0.918199 secs,108.908853  M/sec

C++ string get ;0.022508 secs,4442.864759  M/sec

STD::vector push back ;0.421409 secs,237.299156  M/sec

STD::vector get for 100X 100 M times : 0.000000 secs,inf  Billions/sec

STD::unordered_map set :9.055981 secs,11.042426  M/sec

STD::unordered_map get :0.368186 secs,271.601853  M/sec

C int array set :0.167584 secs,0.596716  Billions/sec

C int array 100X 100 M times : 0.000001 secs,100000.000000  Billions/sec

C char array set : 0.047238 secs,2.116940  Billions/sec

C char array 100X 100 M times : 0.000001 secs,100000.000000  Billions/sec

Testing Python 3.1.1 ,run loop for 1,000,000 Times:

Run  testPythonStrAdd  loop for  1000000 times in 0.039954 secs, 25.028816 M/secs

Run  testPythonStrGet  loop for  1000000 times in 0.035021 secs, 28.554242 M/secs

Run  testNumpyArrayPut  loop for  1000000 times in 0.074087 secs, 13.497619 M/secs

Run  testNumpyArrayGet  loop for  1000000 times in 0.106379 secs, 9.400370 M/secs

Run  testPythonListAppend  loop for  1000000 times in 0.051809 secs, 19.301728 M/secs

Run  testPythonListGet  loop for  1000000 times in 0.026272 secs, 38.062907 M/secs

Run  testPythonDictSet  loop for  1000000 times in 0.814699 secs, 1.227448 M/secs

Run  testPythonDictGet  loop for  1000000 times in 0.586921 secs, 1.703806 M/secs

Run  testFileDictSet  loop for  1000000 times in 4.468566 secs, 0.223785 M/secs

Run  testFileDictGet  loop for  1000000 times in 4.197674 secs, 0.238227 M/secs

Testing download file from  localhost and write to File with myChar:

Downloaed http://localhost:8000/static/in.mp4 in 0.969424 secs,188.828987 MB/sec

Size :183.055000 M,   20 sample slices with witdth=16:

[0][0][0][18]ftypmp42[0][0][0][0]  -...- [F7][D][C6][B1]m[9F][CC][BE][93][82][E3]<[E9]vb[BB]  -...- [6][9E][D1][A1][A9][DA][D3][7]A[19][A6][FE][D1][C8][8A][F7]  -...- [7F][E1][E]:[A4][C0][D5]~[13][CF][B3][BA][A6]q[C9][A2]  -...- [D8][83]D=[A5][CB][98]Qw[C4][EA]c[AC])[91][F4]  -...- .n%"[E7]8]%[10]>[CD]r[DF][7].[B7]  -...- [E0][EE]K[6][B9][1F][10][FC][D6][[9E]([B1][1B][9E][E2]  -...- [AB][EE][E4]65[C5][B1]8[EC][99][97]~[E2][EF]0t  -...- [B1][DF][83][EB][E3][D6]xXv[CF][F8][86][F9][D4]T!  -...- [80][F2]<k[E7]^[9C][7]@[DF]_[9F][1B][D3]qI  -...- w[C7][AD][4]qq[A0]p[1E][9B][8C]5J#[E4][CF]  -...-  ;;[D5][8C][E1][15][FC])[D6][DF][A3]=[DA]Q[80]  -...- [DE][BA][8B][E5][8A]J[E5][E5]g6[9C]0^[AB][D1]/  -...- '[96]%P[B5]~[EF]g[FA][C0]5p[87][97]'j  -...- pqT[B0][CC]:[F7][1C][B2]+Q[88][FE]f b  -...- v[82][1F]wY[B3][DF]C%[8F][0]m^[19][FB][98]  -...-  [F9][1A]+[D3][D9][DA][9B]?[90][CF][1F][DC]l[C6]q  -...- [D9][AF]J[93][E3]a[E]H@8[83][18][F2][DF]#C  -...- [F2][9B]Zq[9B]a[E8]][BC][9D][18][FB][AE]Sk[D1]  -...- P[CE][83][D][CD][94]Y[B4][E0][8E][C][B7][CF][B][F4]6  -...- Rs[16]d[AA]c[C4][B2][13]#[A9]o[0][0][0][0]  -...-

沒有留言:

發佈留言