import time
start =time.time()
# 遍历之和
total =0
# 遍历150万个数字
for item in range(0,1500000):
total = total + item
print('sum is:'+ str(total))
end =time.time()
print(end - start)
#1124999250000
#0.14 Seconds
1.
2.
3.
4.
5.
6.
7.
8.
9.
10.
11.
12.
13.
14.
15.
16.
使用向量化
import numpy as np
start =time.time()
# 向量化和--使用numpy进行向量化
# np.range创建从0到1499999的数字序列
print(np.sum(np.arange(1500000)))
end =time.time()
print(end - start)
##1124999250000
##0.008 Seconds
import numpy as np
import pandas as pd
df = pd.DataFrame(np.random.randint(0,50,
size=(5000000,4)),
columns=('a','b','c','d'))
df.shape
# (5000000,5)
df.head()
1.
2.
3.
4.
5.
6.
7.
8.
我们将创建一个新的列'ratio',以找到列'd'和'c'的比率。
使用循环
import time
start =time.time()
# Iterating through DataFrame using iterrows
for idx, row in df.iterrows():
# creating a new column
df.at[idx,'ratio']=100*(row["d"]/ row["c"])
end =time.time()
print(end - start)
### 109 Seconds
import time
start =time.time()
# Iterating through DataFrame using iterrows
for idx, row in df.iterrows():
if row.a==0:
df.at[idx,'e']= row.d
elif (row.a<=25)&(row.a>0):
df.at[idx,'e']=(row.b)-(row.c)
else:
df.at[idx,'e']= row.b+ row.c
end =time.time()
print(end - start)
### Time taken:177 seconds
>>> import numpy as np
>>> # 设置 m 的初始值
>>> m = np.random.rand(1,5)
array([[0.49976103,0.33991827,0.60596021,0.78518515,0.5540753]])>>> # 500万行的输入值
>>> x = np.random.rand(5000000,5)
1.
2.
3.
4.
5.
6.
使用循环
import numpy as np
m = np.random.rand(1,5)
x = np.random.rand(5000000,5)
total =0
tic =time.process_time()
for i in range(0,5000000):
total =0
for j in range(0,5):
total = total + x[i][j]*m[0][j]
zer[i]= total
toc =time.process_time()
print ("Computation time = "+ str((toc - tic))+"seconds")
####Computation time=28.228 seconds