Activation |
{'data': (1024, 1024), 'act_type': 'relu'} |
2384978.25 |
0.7228 |
0.865 |
Activation |
{'data': (1024, 1024), 'act_type': 'sigmoid'} |
2389172.5 |
1.0898 |
0.5484 |
Activation |
{'data': (1024, 1024), 'act_type': 'softrelu'} |
2393366.75 |
2.2916 |
1.4594 |
Activation |
{'data': (1024, 1024), 'act_type': 'softsign'} |
2397561.25 |
0.3866 |
0.673 |
Activation |
{'data': (1024, 1024), 'act_type': 'tanh'} |
2401755.5 |
2.7828 |
0.635 |
BatchNorm |
{'data': (32, 3, 256, 256), 'gamma': (3,), 'beta': (3,), 'moving_mean': (3,), 'moving_var': (3,), 'eps': 1e-08, 'axis': 1} |
681461.125 |
115.42 |
63.3374 |
BatchNorm |
{'data': (32, 3, 10000, 10), 'gamma': (3,), 'beta': (3,), 'moving_mean': (3,), 'moving_var': (3,), 'eps': 1e-08, 'axis': 1} |
719861.125 |
201.6882 |
110.1308 |
BilinearSampler |
{'data': (32, 2, 256, 256), 'grid': (32, 2, 256, 256)} |
16777.2168 |
316.345 |
344.09 |
BlockGrad |
{'data': (1024, 1024)} |
4194.3042 |
0.3772 |
--- |
BlockGrad |
{'data': (10000, 1)} |
20.0 |
0.0464 |
--- |
BlockGrad |
{'data': (10000, 100)} |
2000.0 |
0.3654 |
--- |
CTCLoss |
{'data': (1024, 100, 100), 'label': (100, 100)} |
2774390.5 |
79.762 |
--- |
Convolution |
{'data': (32, 3, 64, 64), 'weight': (1, 3, 3, 3), 'bias': (1,), 'kernel': (3, 3), 'stride': (1, 1), 'dilate': (1, 1), 'pad': (0, 0), 'num_filter': 1, 'layout': 'NCHW'} |
2630710.25 |
21.6526 |
46.2242 |
Correlation |
{'data1': (32, 3, 256, 256), 'data2': (32, 3, 256, 256), 'kernel_size': 3, 'max_displacement': 2, 'stride1': 2, 'stride2': 2} |
806192.75 |
36433.4375 |
66967.7266 |
Correlation |
{'data1': (32, 3, 10000, 10), 'data2': (32, 3, 10000, 10), 'kernel_size': 3, 'max_displacement': 2, 'stride1': 2, 'stride2': 2} |
821940.0625 |
25530.0332 |
43059.7461 |
Custom |
{'args': [(1024, 1024)], 'op_type': 'CustomAddOne'} |
3090566.0 |
0.0156 |
0.024 |
Custom |
{'args': [(10000, 1)], 'op_type': 'CustomAddOne'} |
3090566.0 |
0.0152 |
0.0242 |
Custom |
{'args': [(10000, 10)], 'op_type': 'CustomAddOne'} |
3090566.0 |
0.015 |
0.0236 |
Deconvolution |
{'data': (32, 3, 64, 64), 'weight': (3, 1, 3, 3), 'bias': (1,), 'kernel': (3, 3), 'stride': (1, 1), 'pad': (0, 0), 'num_filter': 1, 'no_bias': False, 'layout': 'NCHW'} |
2733708.75 |
45.7484 |
50.2232 |
Dropout |
{'data': (32, 3, 256, 256), 'p': 1, 'mode': 'always', 'axes': [0, 1]} |
872533.8125 |
6.0944 |
5.1082 |
Dropout |
{'data': (10000, 10), 'p': 1, 'mode': 'always', 'axes': [0, 1]} |
860350.9375 |
0.2554 |
0.2298 |
ElementWiseSum |
{'args': (1024, 1024)} |
2097.1521 |
0.263 |
--- |
Embedding |
{'data': (1024, 1024), 'weight': (3, 4), 'input_dim': 3, 'output_dim': 4, 'dtype': 'float32', 'sparse_grad': False} |
885516.75 |
0.7202 |
--- |
Embedding |
{'data': (10000, 1), 'weight': (3, 4), 'input_dim': 3, 'output_dim': 4, 'dtype': 'int32', 'sparse_grad': False} |
877288.125 |
0.0738 |
--- |
Embedding |
{'data': (10000, 100), 'weight': (3, 4), 'input_dim': 3, 'output_dim': 4, 'dtype': 'float32', 'sparse_grad': False} |
893208.125 |
0.8232 |
--- |
FullyConnected |
{'data': (32, 3, 256, 256), 'weight': (64, 196608), 'bias': (64,), 'num_hidden': 64, 'flatten': True} |
885224.5 |
1.834 |
4.1142 |
FullyConnected |
{'data': (32, 3, 10000, 10), 'weight': (64, 10), 'bias': (64,), 'num_hidden': 64, 'flatten': False} |
1130984.5 |
73.6848 |
227.4092 |
GridGenerator |
{'data': (32, 2, 256, 256), 'transform_type': 'warp', 'target_shape': (256, 6)} |
8650.752 |
53.4568 |
37.5954 |
GridGenerator |
{'data': (256, 6), 'transform_type': 'affine', 'target_shape': (256, 6)} |
1582.08 |
0.7358 |
0.233 |
GroupNorm |
{'data': (32, 3, 256, 256), 'gamma': (1,), 'beta': (1,), 'num_groups': 1, 'eps': 1e-08} |
1181316.375 |
56.5254 |
772.9122 |
GroupNorm |
{'data': (32, 10, 10000, 10), 'gamma': (10,), 'beta': (10,), 'num_groups': 10, 'eps': 1e-08} |
1431097.125 |
367.3502 |
1481.5076 |
InstanceNorm |
{'data': (32, 3, 256, 256), 'gamma': (3,), 'beta': (3,), 'eps': 1e-08} |
1417429.625 |
75.6394 |
946.3446 |
InstanceNorm |
{'data': (32, 3, 10000, 10), 'gamma': (3,), 'beta': (3,), 'eps': 1e-08} |
1455830.0 |
114.426 |
1387.3988 |
L2Normalization |
{'data': (32, 3, 256, 256), 'eps': 1e-08, 'mode': 'channel'} |
1514550.25 |
98.5754 |
70.6352 |
L2Normalization |
{'data': (32, 3, 256, 256), 'eps': 1e-08, 'mode': 'instance'} |
1539716.125 |
389.3556 |
37.6872 |
L2Normalization |
{'data': (32, 3, 256, 256), 'eps': 1e-08, 'mode': 'spatial'} |
1564882.125 |
254.9056 |
54.3488 |
LRN |
{'data': (32, 3, 256, 256), 'alpha': 0.001, 'beta': 0.2, 'nsize': 3} |
1640379.625 |
55.5114 |
106.595 |
LRN |
{'data': (32, 3, 10000, 10), 'alpha': 0.001, 'beta': 0.2, 'nsize': 3} |
1697979.625 |
75.2792 |
151.8046 |
LayerNorm |
{'data': (32, 3, 256, 256), 'gamma': (32,), 'beta': (32,), 'axis': 0, 'eps': 1e-08} |
1749884.125 |
70.3072 |
110.3524 |
LayerNorm |
{'data': (32, 3, 10000, 10), 'gamma': (32,), 'beta': (32,), 'axis': 0, 'eps': 1e-08} |
1788697.625 |
94.5508 |
135.7664 |
LeakyReLU |
{'data': (1024, 1024), 'act_type': 'leaky'} |
2408047.0 |
0.6562 |
0.8422 |
LeakyReLU |
{'data': (1024, 1024), 'act_type': 'elu'} |
2412241.25 |
1.1406 |
0.8146 |
LeakyReLU |
{'data': (1024, 1024), 'act_type': 'selu'} |
2416435.5 |
1.1348 |
0.7706 |
LeakyReLU |
{'data': (1024, 1024), 'act_type': 'gelu'} |
2420629.75 |
1.276 |
2.518 |
LinearRegressionOutput |
{'data': (32, 3, 256, 256), 'label': (32, 3, 256, 256), 'grad_scale': 0.5} |
1813863.5 |
1.671 |
--- |
LinearRegressionOutput |
{'data': (32, 3, 10000, 10), 'label': (32, 3, 10000, 10), 'grad_scale': 0.5} |
1852263.5 |
5.7176 |
--- |
LogisticRegressionOutput |
{'data': (32, 3, 256, 256), 'label': (32, 3, 256, 256), 'grad_scale': 0.5} |
1858229.375 |
6.4698 |
--- |
LogisticRegressionOutput |
{'data': (32, 3, 10000, 10), 'label': (32, 3, 10000, 10), 'grad_scale': 0.5} |
1896629.375 |
12.3032 |
--- |
MAERegressionOutput |
{'data': (32, 3, 256, 256), 'label': (32, 3, 256, 256), 'grad_scale': 0.5} |
1902595.125 |
1.2198 |
--- |
MAERegressionOutput |
{'data': (32, 3, 10000, 10), 'label': (32, 3, 10000, 10), 'grad_scale': 0.5} |
1940995.125 |
5.4376 |
--- |
MakeLoss |
{'data': (1024, 1024), 'grad_scale': 0.5, 'normalization': 'batch'} |
2760202.0 |
0.8252 |
0.533 |
MakeLoss |
{'data': (10000, 1), 'grad_scale': 0.5, 'normalization': 'batch'} |
2758144.75 |
0.0852 |
0.0522 |
MakeLoss |
{'data': (10000, 100), 'grad_scale': 0.5, 'normalization': 'batch'} |
2762124.75 |
0.7328 |
0.536 |
Pooling |
{'data': (32, 3, 64, 64), 'kernel': (3, 3), 'pool_type': 'sum', 'global_pool': 1, 'stride': (1, 1), 'pad': (0, 0)} |
2613117.0 |
2.1902 |
4.168 |
RNN |
{'data': (1024, 4, 4), 'parameters': (21,), 'state': (1, 4, 1), 'mode': 'gru', 'state_size': 1, 'num_layers': 1} |
631129.4375 |
4.9598 |
13.34 |
ROIPooling |
{'data': (32, 3, 64, 64), 'rois': (32, 5), 'pooled_size': (2, 2), 'spatial_scale': 0.5} |
2613129.25 |
0.2276 |
0.2412 |
SVMOutput |
{'data': (32, 3, 256, 256), 'label': (32, 3, 256), 'margin': 0.5, 'regularization_coefficient': 0.5} |
1959543.875 |
15.6004 |
626.5028 |
SVMOutput |
{'data': (32, 3, 10000, 10), 'label': (32, 3, 10000), 'margin': 0.5, 'regularization_coefficient': 0.5} |
1985361.0 |
34.6542 |
989.042 |
SequenceLast |
{'data': (1024, 1024), 'axis': 0} |
3090572.25 |
0.0806 |
--- |
SequenceLast |
{'data': (10000, 1), 'axis': 0} |
3090570.25 |
0.0662 |
--- |
SequenceLast |
{'data': (10000, 100), 'axis': 0} |
3090570.5 |
0.07 |
--- |
SequenceMask |
{'data': (1024, 1024), 'axis': 0} |
3098959.0 |
0.8606 |
0.8282 |
SequenceMask |
{'data': (10000, 1), 'axis': 0} |
3098999.0 |
0.141 |
0.076 |
SequenceMask |
{'data': (10000, 100), 'axis': 0} |
3102979.0 |
0.7812 |
0.8404 |
SequenceReverse |
{'data': (1024, 1024), 'axis': 0} |
3111367.5 |
3.2912 |
3.4404 |
SequenceReverse |
{'data': (10000, 1), 'axis': 0} |
3111407.5 |
0.08 |
0.068 |
SequenceReverse |
{'data': (10000, 100), 'axis': 0} |
3115407.5 |
3.7258 |
3.6228 |
Softmax |
{'data': (1024, 1024), 'label': (1024, 1024), 'grad_scale': 0.5, 'normalization': 'batch'} |
2426921.25 |
1.896 |
--- |
Softmax |
{'data': (10000, 1), 'label': (10000, 1), 'grad_scale': 0.5, 'normalization': 'batch'} |
2424864.25 |
0.0936 |
--- |
Softmax |
{'data': (10000, 100), 'label': (10000, 100), 'grad_scale': 0.5, 'normalization': 'batch'} |
2428844.25 |
1.8644 |
--- |
SoftmaxActivation |
{'data': (1024, 1024)} |
2435232.75 |
1.8788 |
5.9668 |
SoftmaxActivation |
{'data': (10000, 1)} |
2435272.75 |
0.0892 |
0.137 |
SoftmaxActivation |
{'data': (10000, 100)} |
2439272.75 |
1.836 |
5.7544 |
SoftmaxOutput |
{'data': (32, 3, 256, 256), 'label': (32, 3, 256), 'grad_scale': 0.5, 'normalization': 'batch'} |
2003909.75 |
11.9566 |
20.0664 |
SoftmaxOutput |
{'data': (32, 3, 10000, 10), 'label': (32, 3, 10000), 'grad_scale': 0.5, 'normalization': 'batch'} |
2029726.75 |
21.7382 |
32.387 |
SpatialTransformer |
{'data': (32, 3, 256, 6), 'loc': (32, 6), 'target_shape': (32, 6), 'transform_type': 'affine', 'sampler_type': 'bilinear'} |
2010730.25 |
1.6454 |
1.7422 |
SpatialTransformer |
{'data': (256, 3, 10000, 6), 'loc': (256, 6), 'target_shape': (256, 6), 'transform_type': 'affine', 'sampler_type': 'bilinear'} |
2017063.25 |
93.8736 |
123.4996 |
UpSampling |
{'args': (32, 3, 256, 256), 'scale': 2, 'sample_type': 'nearest'} |
3029126.0 |
56.0206 |
14.326 |
UpSampling |
{'args': (32, 3, 10000, 1), 'scale': 4, 'sample_type': 'nearest'} |
3090566.0 |
31.3164 |
4.368 |
abs |
{'data': (1024, 1024)} |
2097.1521 |
0.3822 |
0.7348 |
abs |
{'data': (10000, 1)} |
20.0 |
0.0594 |
0.047 |
abs |
{'data': (10000, 100)} |
2000.0 |
0.3562 |
3.898 |
adam_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'mean': (1024, 1024), 'var': (1024, 1024), 'lr': 0.1, 'beta1': 0.1, 'beta2': 0.1, 'epsilon': 1e-08, 'wd': 0.1, 'rescale_grad': 0.4, 'lazy_update': 0} |
2637001.75 |
1.4958 |
--- |
adam_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'mean': (10000, 1), 'var': (10000, 1), 'lr': 0.5, 'beta1': 0.5, 'beta2': 0.5, 'epsilon': 1e-08, 'wd': 0.5, 'rescale_grad': 0.4, 'lazy_update': 0} |
2634944.5 |
0.072 |
--- |
adam_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'mean': (10000, 100), 'var': (10000, 100), 'lr': 0.9, 'beta1': 0.9, 'beta2': 0.9, 'epsilon': 1e-08, 'wd': 0.9, 'rescale_grad': 0.4, 'lazy_update': 0} |
2638924.5 |
1.691 |
--- |
add_n |
{'args': [(1024, 1024)]} |
2827799.5 |
0.0852 |
--- |
add_n |
{'args': [(10000, 1)]} |
2827799.5 |
0.0908 |
--- |
add_n |
{'args': [(10000, 10)]} |
2827799.5 |
0.088 |
--- |
all_finite |
{'data': (1024, 1024)} |
3115407.5 |
0.513 |
--- |
all_finite |
{'data': (10000, 1)} |
3115407.5 |
0.0512 |
--- |
all_finite |
{'data': (10000, 100)} |
3115407.5 |
0.4422 |
--- |
amp_cast |
{'data': (1024, 1024), 'dtype': 'float32'} |
4194.3042 |
10.4834 |
10.5186 |
amp_cast |
{'data': (10000, 1), 'dtype': 'int32'} |
40.0 |
0.1928 |
0.1788 |
amp_cast |
{'data': (10000, 100), 'dtype': 'float32'} |
4000.0 |
13.9772 |
13.9576 |
amp_multicast |
{'args': [(1024, 1024)], 'num_outputs': 1} |
0.004 |
0.2156 |
0.2026 |
amp_multicast |
{'args': [(10000, 1)], 'num_outputs': 1} |
0.004 |
0.2292 |
0.181 |
arccos |
{'data': (1024, 1024)} |
2097.1521 |
1.6562 |
1.3386 |
arccos |
{'data': (10000, 1)} |
20.0 |
0.1452 |
0.1288 |
arccos |
{'data': (10000, 100)} |
2000.0 |
8.8986 |
5.639 |
arccosh |
{'data': (1024, 1024)} |
2097.1521 |
1.1394 |
1.3064 |
arccosh |
{'data': (10000, 1)} |
20.0 |
0.0616 |
0.069 |
arccosh |
{'data': (10000, 100)} |
2000.0 |
0.9422 |
1.1244 |
arcsin |
{'data': (1024, 1024)} |
2097.1521 |
1.2086 |
0.9062 |
arcsin |
{'data': (10000, 1)} |
20.0 |
0.077 |
0.085 |
arcsin |
{'data': (10000, 100)} |
2000.0 |
0.9846 |
0.7116 |
arcsinh |
{'data': (1024, 1024)} |
2097.1521 |
1.6842 |
0.555 |
arcsinh |
{'data': (10000, 1)} |
20.0 |
0.1352 |
0.0658 |
arcsinh |
{'data': (10000, 100)} |
2000.0 |
1.583 |
0.4958 |
arctan |
{'data': (1024, 1024)} |
2097.1521 |
1.1826 |
0.3882 |
arctan |
{'data': (10000, 1)} |
20.0 |
0.0782 |
0.063 |
arctan |
{'data': (10000, 100)} |
2000.0 |
1.1172 |
0.3486 |
arctanh |
{'data': (1024, 1024)} |
2097.1521 |
1.545 |
0.453 |
arctanh |
{'data': (10000, 1)} |
20.0 |
0.0818 |
0.0614 |
arctanh |
{'data': (10000, 100)} |
2000.0 |
1.4678 |
0.3426 |
argmax |
{'data': (1024, 1024), 'axis': 0} |
587563.5625 |
64.056 |
--- |
argmax |
{'data': (10000, 1), 'axis': 0} |
587561.5 |
0.5038 |
--- |
argmax |
{'data': (10000, 100), 'axis': 0} |
587561.875 |
47.991 |
--- |
argmax_channel |
{'data': (1024, 1024)} |
2.048 |
1.1966 |
--- |
argmax_channel |
{'data': (10000, 1)} |
20.0 |
0.1198 |
--- |
argmax_channel |
{'data': (10000, 100)} |
20.0 |
1.1084 |
--- |
argmin |
{'data': (1024, 1024), 'axis': 0} |
587565.8125 |
65.7178 |
--- |
argmin |
{'data': (10000, 1), 'axis': 0} |
587563.75 |
0.4894 |
--- |
argmin |
{'data': (10000, 100), 'axis': 0} |
587564.125 |
48.31 |
--- |
argsort |
{'data': (1024, 1024), 'axis': 0, 'dtype': 'float32'} |
593855.4375 |
139.7702 |
--- |
argsort |
{'data': (10000, 1), 'axis': 0, 'dtype': 'int32'} |
591798.25 |
4.1624 |
--- |
argsort |
{'data': (10000, 100), 'axis': 0, 'dtype': 'float32'} |
595778.25 |
139.1118 |
--- |
batch_dot |
{'lhs': (32, 1024, 1024), 'rhs': (32, 1024, 1024)} |
134217.7344 |
108.0773 |
--- |
batch_dot |
{'lhs': (32, 1000, 10), 'rhs': (32, 1000, 10), 'transpose_b': True} |
128000.0 |
29.1314 |
--- |
batch_dot |
{'lhs': (32, 1000, 1), 'rhs': (32, 100, 1000), 'transpose_a': True, 'transpose_b': True} |
6.4 |
1.0532 |
--- |
broadcast_add |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.3246 |
0.462 |
broadcast_add |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.1634 |
0.2326 |
broadcast_add |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.0758 |
0.0672 |
broadcast_div |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.352 |
0.7318 |
broadcast_div |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.1008 |
0.1728 |
broadcast_div |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.0806 |
0.0774 |
broadcast_equal |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.364 |
--- |
broadcast_equal |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.1016 |
--- |
broadcast_equal |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.072 |
--- |
broadcast_greater |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.5948 |
--- |
broadcast_greater |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.1164 |
--- |
broadcast_greater |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.0752 |
--- |
broadcast_greater_equal |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.545 |
--- |
broadcast_greater_equal |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.119 |
--- |
broadcast_greater_equal |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.073 |
--- |
broadcast_hypot |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.7428 |
1.1842 |
broadcast_hypot |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.133 |
0.2014 |
broadcast_hypot |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.0782 |
0.0878 |
broadcast_lesser |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.5408 |
--- |
broadcast_lesser |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.118 |
--- |
broadcast_lesser |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.0726 |
--- |
broadcast_lesser_equal |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.5238 |
--- |
broadcast_lesser_equal |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.117 |
--- |
broadcast_lesser_equal |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.0736 |
--- |
broadcast_logical_and |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.3152 |
--- |
broadcast_logical_and |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.0986 |
--- |
broadcast_logical_and |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.0886 |
--- |
broadcast_logical_or |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.2852 |
--- |
broadcast_logical_or |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.0962 |
--- |
broadcast_logical_or |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.0728 |
--- |
broadcast_logical_xor |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.353 |
--- |
broadcast_logical_xor |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.1 |
--- |
broadcast_logical_xor |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.0738 |
--- |
broadcast_maximum |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.6752 |
1.1148 |
broadcast_maximum |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.129 |
0.1938 |
broadcast_maximum |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.0778 |
0.08 |
broadcast_minimum |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.6984 |
1.1264 |
broadcast_minimum |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.1392 |
0.1816 |
broadcast_minimum |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.0792 |
0.0796 |
broadcast_minus |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.2866 |
--- |
broadcast_minus |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.0958 |
--- |
broadcast_minus |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.0708 |
--- |
broadcast_mod |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
1.927 |
0.614 |
broadcast_mod |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.261 |
0.1556 |
broadcast_mod |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.091 |
0.0774 |
broadcast_mul |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.3386 |
0.6104 |
broadcast_mul |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.098 |
0.138 |
broadcast_mul |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.076 |
0.0768 |
broadcast_not_equal |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.3224 |
--- |
broadcast_not_equal |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.1526 |
--- |
broadcast_not_equal |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.0732 |
--- |
broadcast_plus |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.2888 |
--- |
broadcast_plus |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.0956 |
--- |
broadcast_plus |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.0732 |
--- |
broadcast_power |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
2.3882 |
4.8424 |
broadcast_power |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.3156 |
0.5312 |
broadcast_power |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.0988 |
0.1278 |
broadcast_sub |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.3428 |
0.3946 |
broadcast_sub |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.1146 |
0.1008 |
broadcast_sub |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.0756 |
0.067 |
cast |
{'data': (1024, 1024), 'dtype': 'float32'} |
2097.1521 |
--- |
9.0584 |
cast |
{'data': (10000, 1), 'dtype': 'int32'} |
20.0 |
--- |
0.1752 |
cast |
{'data': (10000, 100), 'dtype': 'float32'} |
2000.0 |
--- |
9.3458 |
cast_storage |
{'data': (1024, 1024), 'stype': 'default'} |
3121699.0 |
0.3788 |
--- |
cast_storage |
{'data': (10000, 1), 'stype': 'csr'} |
3119802.0 |
0.1914 |
--- |
cast_storage |
{'data': (10000, 100), 'stype': 'row_sparse'} |
3123782.0 |
1.4672 |
--- |
cbrt |
{'data': (1024, 1024)} |
2097.1521 |
1.75 |
0.568 |
cbrt |
{'data': (10000, 1)} |
20.0 |
0.1388 |
0.1018 |
cbrt |
{'data': (10000, 100)} |
2000.0 |
1.6354 |
0.531 |
ceil |
{'data': (1024, 1024)} |
2097.1521 |
0.4062 |
--- |
ceil |
{'data': (10000, 1)} |
20.0 |
0.0482 |
--- |
ceil |
{'data': (10000, 100)} |
2000.0 |
0.3962 |
--- |
choose_element_0index |
{'data': (1024, 1024), 'index': (1, 1024), 'axis': 0} |
2.048 |
0.0712 |
--- |
choose_element_0index |
{'data': (10000, 1), 'index': (1, 1), 'axis': 0} |
0.002 |
0.0664 |
--- |
choose_element_0index |
{'data': (10000, 100), 'index': (1, 100), 'axis': 0} |
0.2 |
0.0676 |
--- |
clip |
{'data': (1024, 1024), 'a_min': 0.1, 'a_max': 0.9} |
3130130.5 |
0.6922 |
0.7268 |
clip |
{'data': (10000, 1), 'a_min': 0.1, 'a_max': 0.9} |
3130170.5 |
0.0574 |
0.0464 |
clip |
{'data': (10000, 100), 'a_min': 0.1, 'a_max': 0.9} |
3134150.5 |
0.6488 |
0.6738 |
col2im |
{'data': (32, 64, 256), 'output_size': (64, 16, 1), 'kernel': (1, 1, 1), 'stride': (2, 2, 2)} |
2029646.125 |
166.2192 |
162.1108 |
col2im |
{'data': (32, 64, 256), 'output_size': (32, 8, 1), 'kernel': (1, 1, 1), 'stride': (1, 1, 1)} |
2031743.25 |
175.4904 |
155.4888 |
cos |
{'data': (1024, 1024)} |
2097.1521 |
0.9848 |
1.1088 |
cos |
{'data': (10000, 1)} |
20.0 |
0.0616 |
0.0554 |
cos |
{'data': (10000, 100)} |
2000.0 |
0.9174 |
1.0262 |
cosh |
{'data': (1024, 1024)} |
2097.1521 |
1.2668 |
2.0346 |
cosh |
{'data': (10000, 1)} |
20.0 |
0.1184 |
0.0736 |
cosh |
{'data': (10000, 100)} |
2000.0 |
1.1884 |
1.8682 |
ctc_loss |
{'data': (1024, 100, 100), 'label': (100, 100)} |
2821565.25 |
85.396 |
--- |
cumsum |
{'a': (1024, 1024), 'axis': 0, 'dtype': 'float32'} |
3140442.0 |
0.997 |
--- |
cumsum |
{'a': (1024, 1024), 'axis': 0, 'dtype': 'int32'} |
3142539.25 |
1.0286 |
--- |
cumsum |
{'a': (1024, 1024), 'axis': 0, 'dtype': 'float32'} |
3144636.25 |
1.024 |
--- |
degrees |
{'data': (1024, 1024)} |
2097.1521 |
0.4248 |
0.439 |
degrees |
{'data': (10000, 1)} |
20.0 |
0.0746 |
0.0662 |
degrees |
{'data': (10000, 100)} |
2000.0 |
0.3282 |
0.3908 |
depth_to_space |
{'data': (1, 4, 2, 4), 'block_size': 2} |
600037.0625 |
0.0542 |
--- |
depth_to_space |
{'data': (10, 25, 10, 100), 'block_size': 5} |
601037.0 |
1.5644 |
--- |
dot |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
2.3748 |
5.0332 |
dot |
{'lhs': (1000, 10), 'rhs': (1000, 10), 'transpose_b': True} |
2000.0 |
0.284 |
0.3936 |
dot |
{'lhs': (1000, 1), 'rhs': (100, 1000), 'transpose_a': True, 'transpose_b': True} |
0.2 |
0.2288 |
0.1886 |
elemwise_add |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.2972 |
--- |
elemwise_add |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.0982 |
--- |
elemwise_add |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.1844 |
--- |
elemwise_div |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.296 |
--- |
elemwise_div |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.1128 |
--- |
elemwise_div |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.0862 |
--- |
elemwise_mul |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.304 |
--- |
elemwise_mul |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.0918 |
--- |
elemwise_mul |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.071 |
--- |
elemwise_sub |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.3036 |
--- |
elemwise_sub |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.224 |
--- |
elemwise_sub |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.078 |
--- |
erf |
{'data': (1024, 1024)} |
2097.1521 |
1.1984 |
0.8734 |
erf |
{'data': (10000, 1)} |
20.0 |
0.0782 |
0.0616 |
erf |
{'data': (10000, 100)} |
2000.0 |
1.1964 |
0.8418 |
erfinv |
{'data': (1024, 1024)} |
2097.1521 |
4.2082 |
1.4468 |
erfinv |
{'data': (10000, 1)} |
20.0 |
0.0902 |
0.0522 |
erfinv |
{'data': (10000, 100)} |
2000.0 |
4.0388 |
1.3642 |
exp |
{'data': (1024, 1024)} |
2097.1521 |
1.4502 |
--- |
exp |
{'data': (10000, 1)} |
20.0 |
0.082 |
--- |
exp |
{'data': (10000, 100)} |
2000.0 |
1.2132 |
--- |
expm1 |
{'data': (1024, 1024)} |
2097.1521 |
1.4518 |
1.1002 |
expm1 |
{'data': (10000, 1)} |
20.0 |
0.0636 |
0.0932 |
expm1 |
{'data': (10000, 100)} |
2000.0 |
1.3678 |
1.046 |
fill_element_0index |
{'lhs': (1024, 1024), 'mhs': (1024,), 'rhs': (1024,)} |
3148830.5 |
0.7684 |
--- |
fill_element_0index |
{'lhs': (10000, 1), 'mhs': (10000,), 'rhs': (10000,)} |
3146773.5 |
0.0364 |
--- |
fill_element_0index |
{'lhs': (10000, 100), 'mhs': (10000,), 'rhs': (10000,)} |
3150753.5 |
0.7308 |
--- |
fix |
{'data': (1024, 1024)} |
2097.1521 |
0.9952 |
--- |
fix |
{'data': (10000, 1)} |
20.0 |
0.059 |
--- |
fix |
{'data': (10000, 100)} |
2000.0 |
0.9328 |
--- |
flatten |
{'data': (1024, 1024)} |
2097.1521 |
0.3774 |
--- |
flatten |
{'data': (10000, 1)} |
20.0 |
0.0404 |
--- |
flatten |
{'data': (10000, 100)} |
2000.0 |
0.389 |
--- |
flip |
{'data': (1024, 1024), 'axis': 0} |
606828.4375 |
2.9712 |
--- |
flip |
{'data': (10000, 1), 'axis': 0} |
604771.3125 |
0.085 |
--- |
flip |
{'data': (10000, 100), 'axis': 0} |
608751.3125 |
2.8574 |
--- |
floor |
{'data': (1024, 1024)} |
2097.1521 |
0.3952 |
--- |
floor |
{'data': (10000, 1)} |
20.0 |
0.046 |
--- |
floor |
{'data': (10000, 100)} |
2000.0 |
0.3772 |
--- |
ftml_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'd': (1024, 1024), 'v': (1024, 1024), 'z': (1024, 1024), 'lr': 0.1, 'beta1': 0.1, 'beta2': 0.1, 'epsilon': 1e-08, 't': 1, 'wd': 0.1, 'rescale_grad': 0.4, 'clip_grad': -1.0} |
2643216.0 |
2.788 |
--- |
ftml_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'd': (10000, 1), 'v': (10000, 1), 'z': (10000, 1), 'lr': 0.5, 'beta1': 0.5, 'beta2': 0.5, 'epsilon': 1e-08, 't': 1, 'wd': 0.5, 'rescale_grad': 0.4, 'clip_grad': -1.0} |
2641158.75 |
0.1028 |
--- |
ftml_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'd': (10000, 100), 'v': (10000, 100), 'z': (10000, 100), 'lr': 0.9, 'beta1': 0.9, 'beta2': 0.9, 'epsilon': 1e-08, 't': 1, 'wd': 0.9, 'rescale_grad': 0.4, 'clip_grad': -1.0} |
2645138.75 |
2.9468 |
--- |
ftrl_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'z': (1024, 1024), 'n': (1024, 1024), 'lr': 0.1, 'wd': 0.1, 'rescale_grad': 0.4} |
2649430.25 |
3.4112 |
--- |
ftrl_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'z': (10000, 1), 'n': (10000, 1), 'lr': 0.5, 'wd': 0.5, 'rescale_grad': 0.4} |
2647373.0 |
0.173 |
--- |
ftrl_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'z': (10000, 100), 'n': (10000, 100), 'lr': 0.9, 'wd': 0.9, 'rescale_grad': 0.4} |
2651353.0 |
3.2252 |
--- |
gamma |
{'data': (1024, 1024)} |
2097.1521 |
4.0916 |
9.2046 |
gamma |
{'data': (10000, 1)} |
20.0 |
0.0986 |
0.132 |
gamma |
{'data': (10000, 100)} |
2000.0 |
3.8948 |
8.7606 |
gammaln |
{'data': (1024, 1024)} |
2097.1521 |
33.7642 |
5.6866 |
gammaln |
{'data': (10000, 1)} |
20.0 |
0.3732 |
0.0958 |
gammaln |
{'data': (10000, 100)} |
2000.0 |
32.1908 |
5.3906 |
gather_nd |
{'data': (1024, 1024), 'indices': (1, 1)} |
617589.0 |
0.0622 |
--- |
gather_nd |
{'data': (10000, 1), 'indices': (1, 1)} |
617587.0 |
0.0592 |
--- |
gather_nd |
{'data': (10000, 100), 'indices': (1, 1)} |
617587.375 |
0.0586 |
--- |
hard_sigmoid |
{'data': (1024, 1024)} |
2447661.25 |
0.529 |
0.5338 |
hard_sigmoid |
{'data': (10000, 1)} |
2447701.25 |
0.0678 |
0.0586 |
hard_sigmoid |
{'data': (10000, 100)} |
2451701.25 |
0.5096 |
0.5496 |
identity |
{'data': (1024, 1024)} |
2097.1521 |
0.3722 |
--- |
identity |
{'data': (10000, 1)} |
20.0 |
0.0402 |
--- |
identity |
{'data': (10000, 100)} |
2000.0 |
0.3758 |
--- |
im2col |
{'data': (32, 3, 256, 256), 'kernel': (3,), 'stride': (1,), 'dilate': (1,), 'pad': (1,)} |
2031137.125 |
7.9486 |
7.9328 |
im2col |
{'data': (32, 3, 10000, 10), 'kernel': (3, 3), 'stride': (1, 1), 'dilate': (1, 1), 'pad': (1, 1)} |
2376589.75 |
723.5026 |
640.7732 |
khatri_rao |
{'args': [(32, 32), (32, 32)]} |
0.008 |
0.1252 |
--- |
khatri_rao |
{'args': [(64, 64), (64, 64)]} |
0.008 |
0.121 |
--- |
lamb_update_phase1 |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'mean': (1024, 1024), 'var': (1024, 1024), 'beta1': 0.1, 'beta2': 0.1, 'epsilon': 1e-08, 't': 1, 'wd': 0.1, 'rescale_grad': 0.4} |
2651450.25 |
2.1784 |
--- |
lamb_update_phase1 |
{'weight': (10000, 1), 'grad': (10000, 1), 'mean': (10000, 1), 'var': (10000, 1), 'beta1': 0.5, 'beta2': 0.5, 'epsilon': 1e-08, 't': 1, 'wd': 0.5, 'rescale_grad': 0.4} |
2651490.25 |
0.0866 |
--- |
lamb_update_phase1 |
{'weight': (10000, 100), 'grad': (10000, 100), 'mean': (10000, 100), 'var': (10000, 100), 'beta1': 0.9, 'beta2': 0.9, 'epsilon': 1e-08, 't': 1, 'wd': 0.9, 'rescale_grad': 0.4} |
2655470.25 |
2.496 |
--- |
lamb_update_phase2 |
{'weight': (1024, 1024), 'g': (1024, 1024), 'r1': (1, 1024), 'r2': (1, 1024), 'lr': 0.1} |
2659761.75 |
0.7232 |
--- |
lamb_update_phase2 |
{'weight': (10000, 1), 'g': (10000, 1), 'r1': (1, 1), 'r2': (1, 1), 'lr': 0.5} |
2657704.5 |
0.1018 |
--- |
lamb_update_phase2 |
{'weight': (10000, 100), 'g': (10000, 100), 'r1': (1, 100), 'r2': (1, 100), 'lr': 0.9} |
2661684.5 |
0.7038 |
--- |
linalg_det |
{'A': (1024, 1024)} |
3175943.5 |
5.9172 |
19.5508 |
linalg_extractdiag |
{'A': (1024, 1024)} |
3175951.5 |
0.0566 |
0.2288 |
linalg_extracttrian |
{'A': (1024, 1024)} |
3180150.0 |
1.1282 |
1.216 |
linalg_gelqf |
{'A': (1024, 1024)} |
3192733.0 |
27.5024 |
--- |
linalg_gemm |
{'A': (1024, 1024), 'B': (1024, 1024), 'C': (1024, 1024), 'axis': 0} |
3196927.25 |
3.2726 |
5.8474 |
linalg_gemm2 |
{'A': (1024, 1024), 'B': (1024, 1024), 'axis': 0} |
3205315.75 |
2.257 |
4.7474 |
linalg_inverse |
{'A': (1024, 1024)} |
3213704.5 |
13.4066 |
5.2376 |
linalg_makediag |
{'A': (1024, 1024)} |
11803639.0 |
528.0402 |
9.9892 |
linalg_maketrian |
{'A': (1024, 1035)} |
11820228.0 |
3.2908 |
2.8376 |
linalg_potrf |
{'A': [[1, 0], [0, 1]]} |
3171745.0 |
0.0538 |
--- |
linalg_potrf |
{'A': [[2, -1, 0], [-1, 2, -1], [0, -1, 2]]} |
3171745.0 |
0.0516 |
--- |
linalg_potri |
{'A': (1024, 1024)} |
11828616.0 |
11.3698 |
7.07 |
linalg_slogdet |
{'A': (1024, 1024)} |
11832815.0 |
6.5328 |
--- |
linalg_sumlogdiag |
{'A': (1024, 1024)} |
11832815.0 |
0.1038 |
0.9028 |
linalg_syrk |
{'A': (1024, 1024)} |
11841203.0 |
3.2556 |
4.3508 |
linalg_trmm |
{'A': (1024, 1024), 'B': (1024, 1024)} |
11849592.0 |
2.3468 |
4.6616 |
linalg_trsm |
{'A': (1024, 1024), 'B': (1024, 1024)} |
11857981.0 |
2.0618 |
4.5612 |
log |
{'data': (1024, 1024)} |
2097.1521 |
1.2138 |
0.4834 |
log |
{'data': (10000, 1)} |
20.0 |
0.0674 |
0.0482 |
log |
{'data': (10000, 100)} |
2000.0 |
1.1498 |
0.4456 |
log10 |
{'data': (1024, 1024)} |
2097.1521 |
1.3328 |
0.4118 |
log10 |
{'data': (10000, 1)} |
20.0 |
0.0642 |
0.0466 |
log10 |
{'data': (10000, 100)} |
2000.0 |
1.2618 |
0.4046 |
log1p |
{'data': (1024, 1024)} |
2097.1521 |
1.398 |
0.473 |
log1p |
{'data': (10000, 1)} |
20.0 |
0.0612 |
0.0478 |
log1p |
{'data': (10000, 100)} |
2000.0 |
1.3154 |
0.4492 |
log2 |
{'data': (1024, 1024)} |
2097.1521 |
2.9748 |
2.0772 |
log2 |
{'data': (10000, 1)} |
20.0 |
0.132 |
0.1216 |
log2 |
{'data': (10000, 100)} |
2000.0 |
1.9858 |
1.2906 |
log_softmax |
{'data': (1024, 1024), 'axis': 0, 'dtype': 'float16'} |
2452847.0 |
4.536 |
3.4868 |
log_softmax |
{'data': (10000, 1), 'axis': 0, 'dtype': 'float32'} |
2451838.5 |
0.654 |
0.4032 |
log_softmax |
{'data': (10000, 100), 'axis': 0, 'dtype': 'float64'} |
2459838.5 |
4.3886 |
4.1634 |
logical_not |
{'data': (1024, 1024)} |
2097.1521 |
0.3424 |
--- |
logical_not |
{'data': (10000, 1)} |
20.0 |
0.0492 |
--- |
logical_not |
{'data': (10000, 100)} |
2000.0 |
0.3182 |
--- |
make_loss |
{'data': (1024, 1024)} |
2097.1521 |
0.3642 |
--- |
make_loss |
{'data': (10000, 1)} |
20.0 |
0.0408 |
--- |
make_loss |
{'data': (10000, 100)} |
2000.0 |
0.4446 |
--- |
max |
{'data': (1024, 1024), 'axis': 0} |
587490.0625 |
3.3924 |
5.2954 |
max |
{'data': (10000, 1), 'axis': 0} |
587490.0625 |
0.8132 |
0.11 |
max |
{'data': (10000, 100), 'axis': 0} |
587490.5 |
3.9074 |
5.5152 |
max_axis |
{'data': (1024, 1024), 'axis': 0} |
587496.625 |
3.3214 |
--- |
max_axis |
{'data': (10000, 1), 'axis': 0} |
587494.5625 |
0.8142 |
--- |
max_axis |
{'data': (10000, 100), 'axis': 0} |
587495.0 |
3.8072 |
--- |
mean |
{'data': (1024, 1024), 'axis': 0} |
587503.0 |
3.8338 |
20.7358 |
mean |
{'data': (10000, 1), 'axis': 0} |
587503.0 |
0.6252 |
0.2456 |
mean |
{'data': (10000, 100), 'axis': 0} |
587503.375 |
3.3154 |
19.4032 |
min |
{'data': (1024, 1024), 'axis': 0} |
587511.5625 |
3.3796 |
5.4618 |
min |
{'data': (10000, 1), 'axis': 0} |
587511.5625 |
0.8098 |
0.1218 |
min |
{'data': (10000, 100), 'axis': 0} |
587512.0 |
3.8844 |
5.1848 |
min_axis |
{'data': (1024, 1024), 'axis': 0} |
587518.125 |
3.3462 |
--- |
min_axis |
{'data': (10000, 1), 'axis': 0} |
587516.0625 |
0.7914 |
--- |
min_axis |
{'data': (10000, 100), 'axis': 0} |
587516.5 |
3.8136 |
--- |
moments |
{'data': (1024, 1024), 'axes': [0, 1]} |
11857981.0 |
117.5186 |
--- |
moments |
{'data': (10000, 1), 'axes': [0, 1]} |
11857981.0 |
1.3184 |
--- |
moments |
{'data': (10000, 100), 'axes': [0, 1]} |
11857981.0 |
128.3984 |
--- |
mp_nag_mom_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'mom': (1024, 1024), 'weight32': (1024, 1024), 'lr': 0.1, 'wd': 0.1, 'rescale_grad': 0.4} |
2665976.0 |
1.2092 |
--- |
mp_nag_mom_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'mom': (10000, 1), 'weight32': (10000, 1), 'lr': 0.5, 'wd': 0.5, 'rescale_grad': 0.4} |
2663918.75 |
0.0652 |
--- |
mp_nag_mom_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'mom': (10000, 100), 'weight32': (10000, 100), 'lr': 0.9, 'wd': 0.9, 'rescale_grad': 0.4} |
2667898.75 |
1.1104 |
--- |
mp_sgd_mom_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'mom': (1024, 1024), 'weight32': (1024, 1024), 'lr': 0.1, 'wd': 0.1, 'rescale_grad': 0.4, 'lazy_update': 0} |
2672190.25 |
0.9882 |
--- |
mp_sgd_mom_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'mom': (10000, 1), 'weight32': (10000, 1), 'lr': 0.5, 'wd': 0.5, 'rescale_grad': 0.4, 'lazy_update': 0} |
2670133.25 |
0.0678 |
--- |
mp_sgd_mom_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'mom': (10000, 100), 'weight32': (10000, 100), 'lr': 0.9, 'wd': 0.9, 'rescale_grad': 0.4, 'lazy_update': 0} |
2674113.25 |
0.9142 |
--- |
mp_sgd_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'weight32': (1024, 1024), 'lr': 0.1, 'wd': 0.1, 'rescale_grad': 0.4, 'lazy_update': 0} |
2678404.5 |
0.7132 |
--- |
mp_sgd_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'weight32': (10000, 1), 'lr': 0.5, 'wd': 0.5, 'rescale_grad': 0.4, 'lazy_update': 0} |
2676347.5 |
0.0616 |
--- |
mp_sgd_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'weight32': (10000, 100), 'lr': 0.9, 'wd': 0.9, 'rescale_grad': 0.4, 'lazy_update': 0} |
2680327.5 |
0.688 |
--- |
multi_all_finite |
{'args': [(1024, 1024)], 'num_arrays': 1} |
2827799.5 |
0.0698 |
--- |
multi_all_finite |
{'args': [(10000, 1)], 'num_arrays': 1} |
2827799.5 |
0.0626 |
--- |
multi_all_finite |
{'args': [(10000, 10)], 'num_arrays': 1} |
2827799.5 |
0.0576 |
--- |
multi_lars |
{'lrs': (1024, 1024), 'weights_sum_sq': (1024, 1024), 'grads_sum_sq': (1024, 1024), 'wds': (1024, 1024), 'eta': 0.5, 'eps': 1e-08, 'rescale_grad': 0.4} |
3155045.0 |
1.324 |
--- |
multi_lars |
{'lrs': (10000, 1), 'weights_sum_sq': (10000, 1), 'grads_sum_sq': (10000, 1), 'wds': (10000, 1), 'eta': 0.5, 'eps': 1e-08, 'rescale_grad': 0.4} |
3152987.75 |
0.0646 |
--- |
multi_lars |
{'lrs': (10000, 100), 'weights_sum_sq': (10000, 100), 'grads_sum_sq': (10000, 100), 'wds': (10000, 100), 'eta': 0.5, 'eps': 1e-08, 'rescale_grad': 0.4} |
3156967.75 |
1.2024 |
--- |
multi_mp_sgd_mom_update |
{'args0': '<NDArray 5x5 @cpu(0)>', 'args1': '<NDArray 5x5 @cpu(0)>', 'args2': '<NDArray 5x5 @cpu(0)>', 'args3': '<NDArray 5x5 @cpu(0)>', 'lrs': 0.1, 'wds': 0.2, 'out': '<NDArray 5x5 @cpu(0)>'} |
--- |
0.0735 |
--- |
multi_mp_sgd_update |
{'args0': '<NDArray 5x5 @cpu(0)>', 'args1': '<NDArray 5x5 @cpu(0)>', 'args2': '<NDArray 5x5 @cpu(0)>', 'lrs': 0.1, 'wds': 0.2, 'out': '<NDArray 5x5 @cpu(0)>'} |
--- |
0.05 |
--- |
multi_sgd_mom_update |
{'args0': '<NDArray 5x5 @cpu(0)>', 'args1': '<NDArray 5x5 @cpu(0)>', 'args2': '<NDArray 5x5 @cpu(0)>', 'lrs': 0.1, 'wds': 0.2, 'out': '<NDArray 5x5 @cpu(0)>'} |
--- |
0.0649 |
--- |
multi_sgd_update |
{'args0': '<NDArray 5x5 @cpu(0)>', 'args1': '<NDArray 5x5 @cpu(0)>', 'lrs': 0.1, 'wds': 0.2, 'out': '<NDArray 5x5 @cpu(0)>'} |
--- |
0.049 |
--- |
multi_sum_sq |
{'args': [(1024, 1024)], 'num_arrays': 1} |
2827799.5 |
0.0626 |
--- |
multi_sum_sq |
{'args': [(10000, 1)], 'num_arrays': 1} |
2827799.5 |
0.0496 |
--- |
multi_sum_sq |
{'args': [(10000, 10)], 'num_arrays': 1} |
2827799.5 |
0.0678 |
--- |
nag_mom_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'mom': (1024, 1024), 'lr': 0.1, 'wd': 0.1, 'rescale_grad': 0.4} |
2684619.0 |
1.0912 |
--- |
nag_mom_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'mom': (10000, 1), 'lr': 0.5, 'wd': 0.5, 'rescale_grad': 0.4} |
2682561.75 |
0.0634 |
--- |
nag_mom_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'mom': (10000, 100), 'lr': 0.9, 'wd': 0.9, 'rescale_grad': 0.4} |
2686541.75 |
0.9898 |
--- |
nanprod |
{'data': (1024, 1024), 'axis': 0} |
587520.375 |
3.7316 |
5.61 |
nanprod |
{'data': (10000, 1), 'axis': 0} |
587520.375 |
0.862 |
0.1084 |
nanprod |
{'data': (10000, 100), 'axis': 0} |
587520.75 |
4.0956 |
5.4066 |
nansum |
{'data': (1024, 1024), 'axis': 0} |
587528.9375 |
3.633 |
5.6082 |
nansum |
{'data': (10000, 1), 'axis': 0} |
587529.0 |
0.8506 |
0.1206 |
nansum |
{'data': (10000, 100), 'axis': 0} |
587529.375 |
3.648 |
5.2664 |
negative |
{'data': (1024, 1024)} |
2097.1521 |
0.3028 |
--- |
negative |
{'data': (10000, 1)} |
20.0 |
0.0574 |
--- |
negative |
{'data': (10000, 100)} |
2000.0 |
0.2932 |
--- |
norm |
{'data': (1024, 1024), 'axis': 0} |
587537.5625 |
3.726 |
5.2912 |
norm |
{'data': (10000, 1), 'axis': 0} |
587537.5625 |
0.8956 |
0.1078 |
norm |
{'data': (10000, 100), 'axis': 0} |
587537.9375 |
3.8384 |
5.1258 |
one_hot |
{'indices': (1, 1), 'depth': 0, 'dtype': 'float32'} |
--- |
0.0294 |
--- |
one_hot |
{'indices': (1, 1), 'depth': 0, 'dtype': 'int32'} |
--- |
0.0306 |
--- |
one_hot |
{'indices': (1, 1), 'depth': 0, 'dtype': 'float32'} |
--- |
0.0298 |
--- |
ones_like |
{'data': (1024, 1024)} |
2097.1521 |
0.19 |
--- |
ones_like |
{'data': (10000, 1)} |
20.0 |
0.0458 |
--- |
ones_like |
{'data': (10000, 100)} |
2000.0 |
0.186 |
--- |
pick |
{'data': (1024, 1024), 'index': (1, 1024), 'axis': 0} |
617593.3125 |
0.0746 |
9.2106 |
pick |
{'data': (10000, 1), 'index': (1, 1), 'axis': 0} |
617591.25 |
0.068 |
0.1952 |
pick |
{'data': (10000, 100), 'index': (1, 100), 'axis': 0} |
617591.6875 |
0.071 |
8.7896 |
preloaded_multi_mp_sgd_mom_update |
{'args0': '<NDArray 5x5 @cpu(0)>', 'args1': '<NDArray 5x5 @cpu(0)>', 'args2': '<NDArray 5x5 @cpu(0)>', 'args3': '<NDArray 5x5 @cpu(0)>', 'args4': '<NDArray 1 @cpu(0)>', 'args5': '<NDArray 1 @cpu(0)>', 'out': '<NDArray 5x5 @cpu(0)>'} |
--- |
0.0583 |
--- |
preloaded_multi_mp_sgd_update |
{'args0': '<NDArray 5x5 @cpu(0)>', 'args1': '<NDArray 5x5 @cpu(0)>', 'args2': '<NDArray 5x5 @cpu(0)>', 'args3': '<NDArray 1 @cpu(0)>', 'args4': '<NDArray 1 @cpu(0)>', 'out': '<NDArray 5x5 @cpu(0)>'} |
--- |
0.0474 |
--- |
preloaded_multi_sgd_mom_update |
{'args0': '<NDArray 5x5 @cpu(0)>', 'args1': '<NDArray 5x5 @cpu(0)>', 'args2': '<NDArray 5x5 @cpu(0)>', 'args3': '<NDArray 1 @cpu(0)>', 'args4': '<NDArray 1 @cpu(0)>', 'out': '<NDArray 5x5 @cpu(0)>'} |
--- |
0.0479 |
--- |
preloaded_multi_sgd_update |
{'args0': '<NDArray 5x5 @cpu(0)>', 'args1': '<NDArray 5x5 @cpu(0)>', 'args4': '<NDArray 1 @cpu(0)>', 'args5': '<NDArray 1 @cpu(0)>', 'out': '<NDArray 5x5 @cpu(0)>'} |
--- |
0.0515 |
--- |
prod |
{'data': (1024, 1024), 'axis': 0} |
587546.1875 |
3.291 |
5.398 |
prod |
{'data': (10000, 1), 'axis': 0} |
587546.1875 |
0.7594 |
0.1114 |
prod |
{'data': (10000, 100), 'axis': 0} |
587546.5625 |
3.7298 |
5.253 |
radians |
{'data': (1024, 1024)} |
2097.1521 |
0.3608 |
0.4092 |
radians |
{'data': (10000, 1)} |
20.0 |
0.0566 |
0.0502 |
radians |
{'data': (10000, 100)} |
2000.0 |
0.3328 |
0.4018 |
random_exponential |
{'shape': (1024, 1024), 'dtype': 'float16'} |
1048.576 |
12.9418 |
--- |
random_exponential |
{'shape': (10000, 1), 'dtype': 'float32'} |
20.0 |
0.2164 |
--- |
random_exponential |
{'shape': (10000, 100), 'dtype': 'float64'} |
4000.0 |
12.127 |
--- |
random_gamma |
{'shape': (1024, 1024), 'dtype': 'float16'} |
1048.576 |
39.3618 |
--- |
random_gamma |
{'shape': (10000, 1), 'dtype': 'float32'} |
20.0 |
0.4488 |
--- |
random_gamma |
{'shape': (10000, 100), 'dtype': 'float64'} |
4837.3179 |
46.6734 |
--- |
random_generalized_negative_binomial |
{'shape': (1024, 1024), 'dtype': 'float16'} |
3983.0459 |
44.0244 |
--- |
random_generalized_negative_binomial |
{'shape': (10000, 1), 'dtype': 'float32'} |
2974.47 |
0.5508 |
--- |
random_generalized_negative_binomial |
{'shape': (10000, 100), 'dtype': 'float64'} |
10954.4697 |
41.6416 |
--- |
random_negative_binomial |
{'k': 1, 'p': 1, 'shape': (1024, 1024), 'dtype': 'float16'} |
10100.1982 |
35.816 |
--- |
random_negative_binomial |
{'k': 1, 'p': 1, 'shape': (10000, 1), 'dtype': 'float32'} |
9091.6221 |
0.4622 |
--- |
random_negative_binomial |
{'k': 1, 'p': 1, 'shape': (10000, 100), 'dtype': 'float64'} |
17071.6211 |
33.9036 |
--- |
random_normal |
{'shape': (1024, 1024), 'dtype': 'float16'} |
16217.3496 |
20.4122 |
--- |
random_normal |
{'shape': (10000, 1), 'dtype': 'float32'} |
15208.7744 |
0.2602 |
--- |
random_normal |
{'shape': (10000, 100), 'dtype': 'float64'} |
23188.7734 |
19.3882 |
--- |
random_pdf_dirichlet |
{'sample': (2,), 'alpha': [0.0, 2.5]} |
19188.7793 |
0.0716 |
--- |
random_pdf_exponential |
{'sample': (2,), 'lam': [1.0, 8.5]} |
19188.7891 |
0.07 |
--- |
random_pdf_gamma |
{'sample': (2,), 'alpha': [0.0, 2.5], 'beta': [1.0, 0.7]} |
19188.7988 |
0.0744 |
--- |
random_pdf_generalized_negative_binomial |
{'sample': (2,), 'mu': [2.0, 2.5], 'alpha': [0.0, 2.5]} |
19188.8066 |
0.0744 |
--- |
random_pdf_negative_binomial |
{'sample': (2,), 'k': [20, 49], 'p': [0.4, 0.77]} |
19188.8145 |
0.0744 |
--- |
random_pdf_normal |
{'sample': (2,), 'mu': [2.0, 2.5], 'sigma': [1.0, 3.7]} |
19188.8223 |
0.0732 |
--- |
random_pdf_poisson |
{'sample': (2,), 'lam': [1.0, 8.5]} |
19188.8301 |
0.1466 |
--- |
random_pdf_uniform |
{'sample': (2,), 'low': [0.0, 2.5], 'high': [1.0, 3.7]} |
19188.8379 |
0.0706 |
--- |
random_poisson |
{'shape': (1024, 1024), 'dtype': 'float16'} |
22334.5625 |
14.8466 |
--- |
random_poisson |
{'shape': (10000, 1), 'dtype': 'float32'} |
21325.9863 |
0.2366 |
--- |
random_poisson |
{'shape': (10000, 100), 'dtype': 'float64'} |
29305.9863 |
14.1912 |
--- |
random_randint |
{'low': 0, 'high': 5, 'shape': (1024, 1024), 'dtype': 'int32'} |
31597.4414 |
3.1232 |
--- |
random_randint |
{'low': 0, 'high': 5, 'shape': (10000, 1), 'dtype': 'int64'} |
29580.2891 |
0.1406 |
--- |
random_randint |
{'low': 0, 'high': 5, 'shape': (10000, 100), 'dtype': 'int32'} |
33540.2891 |
3.0268 |
--- |
random_uniform |
{'low': 0, 'high': 5, 'shape': (1024, 1024), 'dtype': 'float16'} |
34686.0195 |
8.9082 |
--- |
random_uniform |
{'low': 0, 'high': 5, 'shape': (10000, 1), 'dtype': 'float32'} |
33677.4414 |
0.1992 |
--- |
random_uniform |
{'low': 0, 'high': 5, 'shape': (10000, 100), 'dtype': 'float64'} |
41657.4414 |
8.2722 |
--- |
ravel_multi_index |
{'data': (2, 1024), 'shape': (1024, 1024)} |
617597.625 |
0.0718 |
--- |
ravel_multi_index |
{'data': (2, 1024), 'shape': (10000, 1)} |
617599.6875 |
0.0724 |
--- |
ravel_multi_index |
{'data': (2, 1024), 'shape': (10000, 100)} |
617601.6875 |
0.0714 |
--- |
rcbrt |
{'data': (1024, 1024)} |
2097.1521 |
1.836 |
1.8906 |
rcbrt |
{'data': (10000, 1)} |
20.0 |
0.0786 |
0.0912 |
rcbrt |
{'data': (10000, 100)} |
2000.0 |
1.6758 |
1.7738 |
reciprocal |
{'data': (1024, 1024)} |
2097.1521 |
0.3438 |
0.5528 |
reciprocal |
{'data': (10000, 1)} |
20.0 |
0.0534 |
0.047 |
reciprocal |
{'data': (10000, 100)} |
2000.0 |
0.3366 |
0.5098 |
relu |
{'data': (1024, 1024)} |
2097.1521 |
0.7492 |
0.8506 |
relu |
{'data': (10000, 1)} |
20.0 |
0.0568 |
0.0552 |
relu |
{'data': (10000, 100)} |
2000.0 |
0.6692 |
0.779 |
reset_arrays |
{'args': [(1024, 1024)], 'num_arrays': 1} |
--- |
0.0384 |
--- |
reset_arrays |
{'args': [(10000, 1)], 'num_arrays': 1} |
--- |
0.0362 |
--- |
reset_arrays |
{'args': [(10000, 10)], 'num_arrays': 1} |
--- |
0.0342 |
--- |
reshape_like |
{'lhs': (1024, 1024), 'rhs': (1024, 1024)} |
2097.1521 |
0.4662 |
--- |
reshape_like |
{'lhs': (10000, 10), 'rhs': (10000, 10)} |
200.0 |
0.1112 |
--- |
reshape_like |
{'lhs': (10000, 1), 'rhs': (10000, 1)} |
20.0 |
0.0642 |
--- |
rint |
{'data': (1024, 1024)} |
2097.1521 |
0.9964 |
--- |
rint |
{'data': (10000, 1)} |
20.0 |
0.0598 |
--- |
rint |
{'data': (10000, 100)} |
2000.0 |
0.9402 |
--- |
rmsprop_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'n': (1024, 1024), 'lr': 0.1, 'gamma1': 0.1, 'epsilon': 1e-08, 'wd': 0.1, 'rescale_grad': 0.4} |
2690833.25 |
1.4616 |
--- |
rmsprop_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'n': (10000, 1), 'lr': 0.5, 'gamma1': 0.5, 'epsilon': 1e-08, 'wd': 0.5, 'rescale_grad': 0.4} |
2688776.0 |
0.07 |
--- |
rmsprop_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'n': (10000, 100), 'lr': 0.9, 'gamma1': 0.9, 'epsilon': 1e-08, 'wd': 0.9, 'rescale_grad': 0.4} |
2692756.0 |
1.608 |
--- |
rmspropalex_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'n': (1024, 1024), 'g': (1024, 1024), 'delta': (1024, 1024), 'lr': 0.1, 'gamma1': 0.1, 'gamma2': 0.1, 'epsilon': 1e-08, 'wd': 0.1, 'rescale_grad': 0.4} |
2694950.5 |
2.069 |
--- |
rmspropalex_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'n': (10000, 1), 'g': (10000, 1), 'delta': (10000, 1), 'lr': 0.5, 'gamma1': 0.5, 'gamma2': 0.5, 'epsilon': 1e-08, 'wd': 0.5, 'rescale_grad': 0.4} |
2692893.25 |
0.0808 |
--- |
rmspropalex_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'n': (10000, 100), 'g': (10000, 100), 'delta': (10000, 100), 'lr': 0.9, 'gamma1': 0.9, 'gamma2': 0.9, 'epsilon': 1e-08, 'wd': 0.9, 'rescale_grad': 0.4} |
2696873.25 |
1.9606 |
--- |
round |
{'data': (1024, 1024)} |
4194.3042 |
0.8192 |
--- |
round |
{'data': (10000, 1)} |
20.0 |
0.0792 |
--- |
round |
{'data': (10000, 100)} |
2000.0 |
0.8664 |
--- |
rsqrt |
{'data': (1024, 1024)} |
2097.1521 |
0.8992 |
1.0464 |
rsqrt |
{'data': (10000, 1)} |
20.0 |
0.0598 |
0.0612 |
rsqrt |
{'data': (10000, 100)} |
2000.0 |
0.914 |
1.16 |
sample_exponential |
{'lam': [1.0, 8.5], 'shape': (1024, 1024), 'dtype': 'float16'} |
43948.8984 |
18.9886 |
--- |
sample_exponential |
{'lam': [1.0, 8.5], 'shape': (10000, 1), 'dtype': 'float32'} |
41931.7461 |
0.2754 |
--- |
sample_exponential |
{'lam': [1.0, 8.5], 'shape': (10000, 100), 'dtype': 'float64'} |
57891.7461 |
17.5774 |
--- |
sample_gamma |
{'alpha': [0.0, 2.5], 'shape': (1024, 1024), 'dtype': 'float16', 'beta': [1.0, 0.7]} |
56183.2031 |
72.7822 |
--- |
sample_gamma |
{'alpha': [0.0, 2.5], 'shape': (10000, 1), 'dtype': 'float32', 'beta': [1.0, 0.7]} |
54126.0508 |
1.0362 |
--- |
sample_gamma |
{'alpha': [0.0, 2.5], 'shape': (10000, 100), 'dtype': 'float64', 'beta': [1.0, 0.7]} |
70126.0469 |
77.2066 |
--- |
sample_generalized_negative_binomial |
{'mu': [2.0, 2.5], 'shape': (1024, 1024), 'dtype': 'float16', 'alpha': [0.0, 2.5]} |
68417.5078 |
132.5426 |
--- |
sample_generalized_negative_binomial |
{'mu': [2.0, 2.5], 'shape': (10000, 1), 'dtype': 'float32', 'alpha': [0.0, 2.5]} |
66400.3516 |
1.4492 |
--- |
sample_generalized_negative_binomial |
{'mu': [2.0, 2.5], 'shape': (10000, 100), 'dtype': 'float64', 'alpha': [0.0, 2.5]} |
82360.3516 |
157.3978 |
--- |
sample_multinomial |
{'data': (32, 32), 'shape': (1024, 1024), 'dtype': 'float16'} |
443459.0938 |
6482.2856 |
--- |
sample_multinomial |
{'data': (32, 32), 'shape': (10000, 1), 'dtype': 'float32'} |
411184.6875 |
60.138 |
--- |
sample_multinomial |
{'data': (32, 32), 'shape': (10000, 100), 'dtype': 'float64'} |
666544.6875 |
6201.6147 |
--- |
sample_negative_binomial |
{'k': [20, 49], 'shape': (1024, 1024), 'dtype': 'float16', 'p': [0.4, 0.77]} |
544836.125 |
583.3652 |
--- |
sample_negative_binomial |
{'k': [20, 49], 'shape': (10000, 1), 'dtype': 'float32', 'p': [0.4, 0.77]} |
542819.0 |
5.7008 |
--- |
sample_negative_binomial |
{'k': [20, 49], 'shape': (10000, 100), 'dtype': 'float64', 'p': [0.4, 0.77]} |
558779.0 |
555.5234 |
--- |
sample_normal |
{'mu': [2.0, 2.5], 'shape': (1024, 1024), 'dtype': 'float16', 'sigma': [1.0, 3.7]} |
557070.4375 |
56.7754 |
--- |
sample_normal |
{'mu': [2.0, 2.5], 'shape': (10000, 1), 'dtype': 'float32', 'sigma': [1.0, 3.7]} |
555053.3125 |
0.68 |
--- |
sample_normal |
{'mu': [2.0, 2.5], 'shape': (10000, 100), 'dtype': 'float64', 'sigma': [1.0, 3.7]} |
571013.3125 |
49.8232 |
--- |
sample_poisson |
{'lam': [1.0, 8.5], 'shape': (1024, 1024), 'dtype': 'float16'} |
569304.75 |
143.0912 |
--- |
sample_poisson |
{'lam': [1.0, 8.5], 'shape': (10000, 1), 'dtype': 'float32'} |
567287.5625 |
1.5108 |
--- |
sample_poisson |
{'lam': [1.0, 8.5], 'shape': (10000, 100), 'dtype': 'float64'} |
583247.5625 |
126.2726 |
--- |
sample_uniform |
{'low': [0.0, 2.5], 'shape': (1024, 1024), 'dtype': 'float16', 'high': [1.0, 3.7]} |
581539.0625 |
24.4044 |
--- |
sample_uniform |
{'low': [0.0, 2.5], 'shape': (10000, 1), 'dtype': 'float32', 'high': [1.0, 3.7]} |
579521.875 |
0.2776 |
--- |
sample_uniform |
{'low': [0.0, 2.5], 'shape': (10000, 100), 'dtype': 'float64', 'high': [1.0, 3.7]} |
595481.875 |
22.6432 |
--- |
sgd_mom_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'mom': (1024, 1024), 'lr': 0.1, 'wd': 0.1, 'rescale_grad': 0.4, 'lazy_update': 0} |
2701164.75 |
0.7962 |
--- |
sgd_mom_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'mom': (10000, 1), 'lr': 0.5, 'wd': 0.5, 'rescale_grad': 0.4, 'lazy_update': 0} |
2699107.5 |
0.0606 |
--- |
sgd_mom_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'mom': (10000, 100), 'lr': 0.9, 'wd': 0.9, 'rescale_grad': 0.4, 'lazy_update': 0} |
2703087.5 |
0.749 |
--- |
sgd_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'lr': 0.1, 'wd': 0.1, 'rescale_grad': 0.4, 'lazy_update': 0} |
2707379.0 |
0.5756 |
--- |
sgd_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'lr': 0.5, 'wd': 0.5, 'rescale_grad': 0.4, 'lazy_update': 0} |
2705321.75 |
0.0546 |
--- |
sgd_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'lr': 0.9, 'wd': 0.9, 'rescale_grad': 0.4, 'lazy_update': 0} |
2709301.75 |
0.5606 |
--- |
shape_array |
{'data': (1024, 1024)} |
0.008 |
0.0524 |
--- |
shape_array |
{'data': (10000, 1)} |
0.008 |
0.0536 |
--- |
shape_array |
{'data': (10000, 100)} |
0.008 |
0.0528 |
--- |
shuffle |
{'data': (1024, 1024)} |
2097.1521 |
1.027 |
--- |
shuffle |
{'data': (10000, 1)} |
20.0 |
0.8646 |
--- |
shuffle |
{'data': (10000, 100)} |
2000.0 |
1.714 |
--- |
sigmoid |
{'data': (1024, 1024)} |
2097.1521 |
0.9368 |
0.3078 |
sigmoid |
{'data': (10000, 1)} |
20.0 |
0.0746 |
0.0608 |
sigmoid |
{'data': (10000, 100)} |
2000.0 |
0.8692 |
0.2952 |
sign |
{'data': (1024, 1024)} |
2097.1521 |
0.5408 |
0.2894 |
sign |
{'data': (10000, 1)} |
20.0 |
0.07 |
0.0602 |
sign |
{'data': (10000, 100)} |
2000.0 |
0.5158 |
0.2646 |
signsgd_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'lr': 0.1, 'wd': 0.1, 'rescale_grad': 0.4} |
2713593.25 |
0.634 |
--- |
signsgd_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'lr': 0.5, 'wd': 0.5, 'rescale_grad': 0.4} |
2711536.25 |
0.053 |
--- |
signsgd_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'lr': 0.9, 'wd': 0.9, 'rescale_grad': 0.4} |
2715516.25 |
0.5916 |
--- |
signum_update |
{'weight': (1024, 1024), 'grad': (1024, 1024), 'mom': (1024, 1024), 'lr': 0.1, 'wd': 0.1, 'rescale_grad': 0.4} |
2719807.5 |
0.9946 |
--- |
signum_update |
{'weight': (10000, 1), 'grad': (10000, 1), 'mom': (10000, 1), 'lr': 0.5, 'wd': 0.5, 'rescale_grad': 0.4} |
2717750.5 |
0.0678 |
--- |
signum_update |
{'weight': (10000, 100), 'grad': (10000, 100), 'mom': (10000, 100), 'lr': 0.9, 'wd': 0.9, 'rescale_grad': 0.4} |
2721730.5 |
0.9248 |
--- |
sin |
{'data': (1024, 1024)} |
2097.1521 |
0.8578 |
0.738 |
sin |
{'data': (10000, 1)} |
20.0 |
0.0736 |
0.0648 |
sin |
{'data': (10000, 100)} |
2000.0 |
0.8424 |
0.7242 |
sinh |
{'data': (1024, 1024)} |
2097.1521 |
1.635 |
0.9536 |
sinh |
{'data': (10000, 1)} |
20.0 |
0.0824 |
0.0674 |
sinh |
{'data': (10000, 100)} |
2000.0 |
1.581 |
0.925 |
size_array |
{'data': (1024, 1024)} |
0.004 |
0.0562 |
--- |
size_array |
{'data': (10000, 1)} |
0.004 |
0.055 |
--- |
size_array |
{'data': (10000, 100)} |
0.004 |
0.0548 |
--- |
slice |
{'data': (1024, 1024), 'begin': 0, 'end': 1} |
617607.875 |
0.115 |
0.2404 |
slice |
{'data': (10000, 1), 'begin': 0, 'end': 1} |
617607.875 |
0.0692 |
0.058 |
slice |
{'data': (10000, 100), 'begin': 0, 'end': 1} |
617608.25 |
0.0734 |
0.202 |
slice_axis |
{'data': (1024, 1024), 'axis': 0, 'begin': 0, 'end': 1} |
617616.4375 |
0.1322 |
0.6256 |
slice_axis |
{'data': (10000, 1), 'axis': 0, 'begin': 0, 'end': 1} |
617616.4375 |
0.0546 |
0.0558 |
slice_axis |
{'data': (10000, 100), 'axis': 0, 'begin': 0, 'end': 1} |
617616.875 |
0.0622 |
0.5092 |
slice_like |
{'data': (1024, 1024), 'shape_like': (100, 100), 'axes': [0, 1]} |
617696.875 |
0.0962 |
0.2756 |
slice_like |
{'data': (10000, 1), 'shape_like': (10, 1), 'axes': [0, 1]} |
617696.875 |
0.0856 |
0.0704 |
slice_like |
{'data': (10000, 100), 'shape_like': (100, 10), 'axes': [0, 1]} |
617700.875 |
0.0822 |
0.2112 |
smooth_l1 |
{'data': (1024, 1024)} |
2827856.5 |
0.9642 |
1.0984 |
smooth_l1 |
{'data': (10000, 1)} |
2825799.5 |
0.058 |
0.0666 |
smooth_l1 |
{'data': (10000, 100)} |
2829799.5 |
0.9352 |
0.995 |
softmax |
{'data': (1024, 1024), 'axis': 0, 'dtype': 'float16'} |
2462984.25 |
4.9242 |
4.9294 |
softmax |
{'data': (10000, 1), 'axis': 0, 'dtype': 'float32'} |
2461975.75 |
0.6576 |
0.2452 |
softmax |
{'data': (10000, 100), 'axis': 0, 'dtype': 'float64'} |
2469975.75 |
5.29 |
4.056 |
softmax_cross_entropy |
{'data': (1024, 1024), 'label': (1024,)} |
2827799.5 |
2.0616 |
--- |
softmin |
{'data': (1024, 1024), 'axis': 0, 'dtype': 'float16'} |
2473121.5 |
4.7208 |
4.697 |
softmin |
{'data': (10000, 1), 'axis': 0, 'dtype': 'float32'} |
2473161.5 |
0.677 |
0.2462 |
softmin |
{'data': (10000, 100), 'axis': 0, 'dtype': 'float64'} |
2481161.5 |
4.3982 |
4.0266 |
softsign |
{'data': (1024, 1024)} |
2097.1521 |
0.3686 |
0.4132 |
softsign |
{'data': (10000, 1)} |
20.0 |
0.0684 |
0.0622 |
softsign |
{'data': (10000, 100)} |
2000.0 |
0.3358 |
0.3952 |
sort |
{'data': (1024, 1024), 'axis': 0} |
604264.0 |
186.6978 |
--- |
sort |
{'data': (10000, 1), 'axis': 0} |
598052.5625 |
4.414 |
--- |
sort |
{'data': (10000, 100), 'axis': 0} |
606032.5625 |
133.1322 |
--- |
space_to_depth |
{'data': (1, 4, 2, 4), 'block_size': 2} |
606751.5 |
0.1712 |
--- |
space_to_depth |
{'data': (10, 25, 10, 100), 'block_size': 5} |
607751.4375 |
2.5286 |
--- |
sqrt |
{'data': (1024, 1024)} |
2097.1521 |
1.0564 |
0.3612 |
sqrt |
{'data': (10000, 1)} |
20.0 |
0.2086 |
0.0882 |
sqrt |
{'data': (10000, 100)} |
2000.0 |
0.7928 |
0.2664 |
square |
{'data': (1024, 1024)} |
2097.1521 |
0.3532 |
0.3282 |
square |
{'data': (10000, 1)} |
20.0 |
0.069 |
0.061 |
square |
{'data': (10000, 100)} |
2000.0 |
0.3334 |
0.3196 |
squeeze |
{'data': (1, 1024, 1024), 'axis': 0} |
3163356.25 |
0.4602 |
0.6294 |
squeeze |
{'data': (32, 1, 256, 256), 'axis': 1} |
3171745.0 |
1.2596 |
1.0216 |
stop_gradient |
{'data': (1024, 1024)} |
2097.1521 |
0.3934 |
--- |
stop_gradient |
{'data': (10000, 1)} |
20.0 |
0.0638 |
--- |
stop_gradient |
{'data': (10000, 100)} |
2000.0 |
0.4262 |
--- |
sum |
{'data': (1024, 1024), 'axis': 0} |
587554.75 |
3.785 |
5.28 |
sum |
{'data': (10000, 1), 'axis': 0} |
587554.75 |
0.9076 |
0.0968 |
sum |
{'data': (10000, 100), 'axis': 0} |
587555.1875 |
3.8986 |
4.9982 |
sum_axis |
{'data': (1024, 1024), 'axis': 0} |
587561.3125 |
3.8026 |
--- |
sum_axis |
{'data': (10000, 1), 'axis': 0} |
587559.25 |
0.9064 |
--- |
sum_axis |
{'data': (10000, 100), 'axis': 0} |
587559.625 |
3.8454 |
--- |
swapaxes |
{'data': (1024, 1024), 'dim1': 0, 'dim2': 1} |
611445.75 |
5.7426 |
--- |
swapaxes |
{'data': (10000, 1), 'dim1': 0, 'dim2': 1} |
609388.5625 |
0.1074 |
--- |
swapaxes |
{'data': (10000, 100), 'dim1': 0, 'dim2': 1} |
613368.5625 |
5.3814 |
--- |
take |
{'a': (1024, 1024), 'indices': (1, 1), 'axis': 0} |
617709.0625 |
0.0578 |
0.5558 |
tan |
{'data': (1024, 1024)} |
2097.1521 |
1.3864 |
0.3176 |
tan |
{'data': (10000, 1)} |
20.0 |
0.079 |
0.0662 |
tan |
{'data': (10000, 100)} |
2000.0 |
1.305 |
0.3012 |
tanh |
{'data': (1024, 1024)} |
2097.1521 |
1.52 |
0.3584 |
tanh |
{'data': (10000, 1)} |
20.0 |
0.0826 |
0.0596 |
tanh |
{'data': (10000, 100)} |
2000.0 |
1.4784 |
0.315 |
topk |
{'data': (1024, 1024), 'axis': 0, 'k': 1, 'dtype': 'float32'} |
600038.6875 |
125.058 |
--- |
topk |
{'data': (10000, 1), 'axis': 0, 'k': 1, 'dtype': 'int32'} |
600036.6875 |
1.3998 |
--- |
topk |
{'data': (10000, 100), 'axis': 0, 'k': 1, 'dtype': 'float32'} |
600037.0625 |
115.3784 |
--- |
transpose |
{'data': (1024, 1024), 'axes': [0, 1]} |
617660.0 |
0.407 |
--- |
transpose |
{'data': (10000, 1), 'axes': [0, 1]} |
615602.875 |
0.0782 |
--- |
transpose |
{'data': (10000, 100), 'axes': [0, 1]} |
619582.875 |
1.0166 |
--- |
trunc |
{'data': (1024, 1024)} |
2097.1521 |
0.513 |
--- |
trunc |
{'data': (10000, 1)} |
20.0 |
0.0678 |
--- |
trunc |
{'data': (10000, 100)} |
2000.0 |
0.5132 |
--- |
where |
{'condition': (1024,), 'x': (1024, 1024), 'y': (1024, 1024)} |
626097.6875 |
0.8682 |
1.5826 |
where |
{'condition': (10000,), 'x': (10000, 1), 'y': (10000, 1)} |
626137.6875 |
0.0738 |
0.059 |
where |
{'condition': (10000,), 'x': (10000, 100), 'y': (10000, 100)} |
630137.6875 |
0.8326 |
1.5166 |
zeros_like |
{'data': (1024, 1024)} |
2097.1521 |
0.1942 |
--- |
zeros_like |
{'data': (10000, 1)} |
20.0 |
0.0604 |
--- |
zeros_like |
{'data': (10000, 100)} |
2000.0 |
0.1856 |
--- |