Operator: aten._log_softmax.default
cnt: 1, ((T([128, 1000], f16), 1, False), {})
Operator: aten._log_softmax_backward_data.default
cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16), 1, f16), {})
Operator: aten._softmax.default
cnt: 4, ((T([128, 4, 196, 196], f16), -1, False), {})
cnt: 1, ((T([128, 8, 49, 196], f16), -1, False), {})
cnt: 4, ((T([128, 8, 49, 49], f16), -1, False), {})
cnt: 1, ((T([128, 16, 16, 49], f16), -1, False), {})
cnt: 4, ((T([128, 12, 16, 16], f16), -1, False), {})
Operator: aten._softmax_backward_data.default
cnt: 4, ((T([128, 12, 16, 16], f16), T([128, 12, 16, 16], f16), -1, f16), {})
cnt: 1, ((T([128, 16, 16, 49], f16), T([128, 16, 16, 49], f16), -1, f16), {})
cnt: 4, ((T([128, 8, 49, 49], f16), T([128, 8, 49, 49], f16), -1, f16), {})
cnt: 1, ((T([128, 8, 49, 196], f16), T([128, 8, 49, 196], f16), -1, f16), {})
cnt: 4, ((T([128, 4, 196, 196], f16), T([128, 4, 196, 196], f16), -1, f16), {})
Operator: aten._unsafe_view.default
cnt: 8, ((T([128, 196, 256], f16), [128, 196, 256]), {})
cnt: 4, ((T([128, 4, 196, 16], f16), [512, 196, 16]), {})
cnt: 4, ((T([128, 4, 16, 196], f16), [512, 16, 196]), {})
cnt: 4, ((T([512, 196, 196], f16), [128, 4, 196, 196]), {})
cnt: 8, ((T([128, 4, 196, 32], f16), [512, 196, 32]), {})
cnt: 4, ((T([512, 196, 32], f16), [128, 4, 196, 32]), {})
cnt: 4, ((T([128, 196, 4, 32], f16), [128, 196, 128]), {})
cnt: 8, ((T([25088, 128], f16), [128, 196, 128]), {})
cnt: 1, ((T([128, 196, 640], f16), [128, 196, 640]), {})
cnt: 1, ((T([128, 7, 7, 128], f16), [128, 49, 128]), {})
cnt: 1, ((T([6272, 128], f16), [128, 49, 128]), {})
cnt: 5, ((T([128, 8, 49, 16], f16), [1024, 49, 16]), {})
cnt: 1, ((T([128, 8, 16, 196], f16), [1024, 16, 196]), {})
cnt: 1, ((T([1024, 49, 196], f16), [128, 8, 49, 196]), {})
cnt: 1, ((T([128, 8, 196, 64], f16), [1024, 196, 64]), {})
cnt: 1, ((T([1024, 49, 64], f16), [128, 8, 49, 64]), {})
cnt: 1, ((T([128, 49, 8, 64], f16), [128, 49, 512]), {})
cnt: 10, ((T([6272, 256], f16), [128, 49, 256]), {})
cnt: 9, ((T([6272, 512], f16), [128, 49, 512]), {})
cnt: 4, ((T([128, 8, 16, 49], f16), [1024, 16, 49]), {})
cnt: 4, ((T([1024, 49, 49], f16), [128, 8, 49, 49]), {})
cnt: 8, ((T([128, 8, 49, 32], f16), [1024, 49, 32]), {})
cnt: 4, ((T([1024, 49, 32], f16), [128, 8, 49, 32]), {})
cnt: 4, ((T([128, 49, 8, 32], f16), [128, 49, 256]), {})
cnt: 1, ((T([6272, 1280], f16), [128, 49, 1280]), {})
cnt: 1, ((T([128, 4, 4, 256], f16), [128, 16, 256]), {})
cnt: 1, ((T([2048, 256], f16), [128, 16, 256]), {})
cnt: 1, ((T([128, 16, 16, 16], f16), [2048, 16, 16]), {})
cnt: 1, ((T([128, 16, 16, 49], f16), [2048, 16, 49]), {})
cnt: 1, ((T([2048, 16, 49], f16), [128, 16, 16, 49]), {})
cnt: 1, ((T([128, 16, 49, 64], f16), [2048, 49, 64]), {})
cnt: 1, ((T([2048, 16, 64], f16), [128, 16, 16, 64]), {})
cnt: 1, ((T([128, 16, 16, 64], f16), [128, 16, 1024]), {})
cnt: 10, ((T([2048, 384], f16), [128, 16, 384]), {})
cnt: 9, ((T([2048, 768], f16), [128, 16, 768]), {})
cnt: 8, ((T([128, 12, 16, 16], f16), [1536, 16, 16]), {})
cnt: 4, ((T([1536, 16, 16], f16), [128, 12, 16, 16]), {})
cnt: 8, ((T([128, 12, 16, 32], f16), [1536, 16, 32]), {})
cnt: 4, ((T([1536, 16, 32], f16), [128, 12, 16, 32]), {})
cnt: 4, ((T([128, 16, 12, 32], f16), [128, 16, 384]), {})
cnt: 1, ((T([128, 16, 16, 64], f16), [2048, 16, 64]), {})
cnt: 1, ((T([128, 16, 16, 16], f16), [128, 16, 256]), {})
cnt: 1, ((T([128, 8, 49, 64], f16), [1024, 49, 64]), {})
cnt: 1, ((T([128, 49, 8, 16], f16), [128, 49, 128]), {})
Operator: aten.add.Tensor
cnt: 4, ((T([128, 4, 196, 196], f16), T([4, 196, 196], f16)), {})
cnt: 8, ((T([128, 196, 128], f16, stride=(25088, 1, 196)), T([128, 196, 128], f16)), {})
cnt: 1, ((T([128, 8, 49, 196], f16), T([8, 49, 196], f16)), {})
cnt: 19, ((T([128, 49, 256], f16), T([128, 49, 256], f16)), {})
cnt: 4, ((T([128, 8, 49, 49], f16), T([8, 49, 49], f16)), {})
cnt: 1, ((T([128, 16, 16, 49], f16), T([16, 16, 49], f16)), {})
cnt: 18, ((T([128, 16, 384], f16), T([128, 16, 384], f16)), {})
cnt: 4, ((T([128, 12, 16, 16], f16), T([12, 16, 16], f16)), {})
cnt: 1, ((T([128, 1000], f16), T([128, 1000], f16)), {})
cnt: 1, ((T([128, 384], f16), T([128, 384], f16)), {})
cnt: 9, ((T([128, 196, 128], f16), T([128, 196, 128], f16)), {})
Operator: aten.add_.Tensor
cnt: 64, ((T([], i64), 1), {})
Operator: aten.addmm.default
cnt: 2, ((T([1000], f16), T([128, 384], f16), T([384, 1000], f16, stride=(1, 384))), {})
Operator: aten.bmm.default
cnt: 8, ((T([128, 196, 128], f16, stride=(25088, 1, 196)), T([128, 128, 256], f16, stride=(0, 1, 128))), {})
cnt: 4, ((T([512, 196, 16], f16), T([512, 16, 196], f16)), {})
cnt: 4, ((T([512, 196, 196], f16), T([512, 196, 32], f16)), {})
cnt: 1, ((T([128, 196, 128], f16, stride=(25088, 1, 196)), T([128, 128, 640], f16, stride=(0, 1, 128))), {})
cnt: 1, ((T([1024, 49, 16], f16), T([1024, 16, 196], f16)), {})
cnt: 1, ((T([1024, 49, 196], f16), T([1024, 196, 64], f16)), {})
cnt: 4, ((T([1024, 49, 16], f16), T([1024, 16, 49], f16)), {})
cnt: 4, ((T([1024, 49, 49], f16), T([1024, 49, 32], f16)), {})
cnt: 1, ((T([2048, 16, 16], f16), T([2048, 16, 49], f16)), {})
cnt: 1, ((T([2048, 16, 49], f16), T([2048, 49, 64], f16)), {})
cnt: 4, ((T([1536, 16, 16], f16), T([1536, 16, 16], f16)), {})
cnt: 4, ((T([1536, 16, 16], f16), T([1536, 16, 32], f16)), {})
cnt: 4, ((T([1536, 16, 16], f16, stride=(256, 1, 16)), T([1536, 16, 32], f16)), {})
cnt: 4, ((T([1536, 16, 32], f16), T([1536, 32, 16], f16, stride=(512, 1, 32))), {})
cnt: 4, ((T([1536, 16, 16], f16, stride=(256, 1, 16)), T([1536, 16, 16], f16)), {})
cnt: 4, ((T([1536, 16, 16], f16), T([1536, 16, 16], f16, stride=(256, 1, 16))), {})
cnt: 1, ((T([2048, 49, 16], f16, stride=(784, 1, 49)), T([2048, 16, 64], f16)), {})
cnt: 1, ((T([2048, 16, 64], f16), T([2048, 64, 49], f16, stride=(3136, 1, 64))), {})
cnt: 1, ((T([2048, 16, 16], f16, stride=(256, 1, 16)), T([2048, 16, 49], f16)), {})
cnt: 1, ((T([2048, 16, 49], f16), T([2048, 49, 16], f16, stride=(784, 1, 49))), {})
cnt: 4, ((T([1024, 49, 49], f16, stride=(2401, 1, 49)), T([1024, 49, 32], f16)), {})
cnt: 4, ((T([1024, 49, 32], f16), T([1024, 32, 49], f16, stride=(1568, 1, 32))), {})
cnt: 4, ((T([1024, 16, 49], f16, stride=(784, 1, 16)), T([1024, 49, 49], f16)), {})
cnt: 4, ((T([1024, 49, 49], f16), T([1024, 49, 16], f16, stride=(784, 1, 49))), {})
cnt: 1, ((T([1024, 196, 49], f16, stride=(9604, 1, 196)), T([1024, 49, 64], f16)), {})
cnt: 1, ((T([1024, 49, 64], f16), T([1024, 64, 196], f16, stride=(12544, 1, 64))), {})
cnt: 1, ((T([1024, 16, 49], f16, stride=(784, 1, 16)), T([1024, 49, 196], f16)), {})
cnt: 1, ((T([1024, 49, 196], f16), T([1024, 196, 16], f16, stride=(3136, 1, 196))), {})
cnt: 1, ((T([128, 128, 196], f16), T([128, 196, 640], f16)), {})
cnt: 1, ((T([128, 196, 640], f16), T([128, 640, 128], f16, stride=(0, 128, 1))), {})
cnt: 8, ((T([128, 128, 196], f16), T([128, 196, 256], f16)), {})
cnt: 8, ((T([128, 196, 256], f16), T([128, 256, 128], f16, stride=(0, 128, 1))), {})
cnt: 4, ((T([512, 196, 196], f16, stride=(38416, 1, 196)), T([512, 196, 32], f16)), {})
cnt: 4, ((T([512, 196, 32], f16), T([512, 32, 196], f16, stride=(6272, 1, 32))), {})
cnt: 4, ((T([512, 16, 196], f16, stride=(3136, 1, 16)), T([512, 196, 196], f16)), {})
cnt: 4, ((T([512, 196, 196], f16), T([512, 196, 16], f16, stride=(3136, 1, 196))), {})
Operator: aten.cat.default
cnt: 4, (([T([128, 16, 12, 16], f16, stride=(3072, 16, 256, 1)), T([128, 16, 12, 16], f16, stride=(3072, 1, 256, 16)), T([128, 16, 12, 32], f16, stride=(6144, 32, 512, 1))], 3), {})
cnt: 1, (([T([128, 49, 16, 16], f16, stride=(12544, 1, 784, 49)), T([128, 49, 16, 64], f16, stride=(50176, 64, 3136, 1))], 3), {})
cnt: 4, (([T([128, 49, 8, 16], f16, stride=(6272, 16, 784, 1)), T([128, 49, 8, 16], f16, stride=(6272, 1, 784, 49)), T([128, 49, 8, 32], f16, stride=(12544, 32, 1568, 1))], 3), {})
cnt: 1, (([T([128, 196, 8, 16], f16, stride=(25088, 1, 3136, 196)), T([128, 196, 8, 64], f16, stride=(100352, 64, 12544, 1))], 3), {})
cnt: 4, (([T([128, 196, 4, 16], f16, stride=(12544, 16, 3136, 1)), T([128, 196, 4, 16], f16, stride=(12544, 1, 3136, 196)), T([128, 196, 4, 32], f16, stride=(25088, 32, 6272, 1))], 3), {})
Operator: aten.clone.default
cnt: 1, ((T([128, 3, 224, 224], f16),), {})
Operator: aten.convolution.default
cnt: 1, ((T([128, 3, 224, 224], f16), T([16, 3, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {})
cnt: 1, ((T([128, 16, 112, 112], f16), T([32, 16, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {})
cnt: 1, ((T([128, 32, 56, 56], f16), T([64, 32, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {})
cnt: 1, ((T([128, 64, 28, 28], f16), T([128, 64, 3, 3], f16), None, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), {})
Operator: aten.convolution_backward.default
cnt: 1, ((T([128, 128, 14, 14], f16, stride=(25088, 1, 1792, 128)), T([128, 64, 28, 28], f16), T([128, 64, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {})
cnt: 1, ((T([128, 64, 28, 28], f16), T([128, 32, 56, 56], f16), T([64, 32, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {})
cnt: 1, ((T([128, 32, 56, 56], f16), T([128, 16, 112, 112], f16), T([32, 16, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]), {})
cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 3, 224, 224], f16), T([16, 3, 3, 3], f16), [0], [2, 2], [1, 1], [1, 1], False, [0, 0], 1, [False, True, False]), {})
Operator: aten.copy_.default
cnt: 1, ((T([128, 3, 224, 224], f16), T([128, 3, 224, 224], f16)), {})
cnt: 1, ((T([640, 128], f16), T([640, 128], f16, stride=(1, 640))), {})
cnt: 8, ((T([256, 128], f16), T([256, 128], f16, stride=(1, 256))), {})
Operator: aten.div.Scalar
cnt: 1, ((T([128, 16, 384], f16, stride=(384, 0, 1)), 16), {})
Operator: aten.div.Tensor
cnt: 2, ((T([128, 1000], f16), 2), {})
Operator: aten.hardswish.default
cnt: 1, ((T([128, 16, 112, 112], f16),), {})
cnt: 1, ((T([128, 32, 56, 56], f16),), {})
cnt: 1, ((T([128, 64, 28, 28], f16),), {})
cnt: 4, ((T([128, 196, 128], f16),), {})
cnt: 4, ((T([128, 196, 256], f16),), {})
cnt: 6, ((T([128, 49, 512], f16),), {})
cnt: 4, ((T([128, 49, 256], f16),), {})
cnt: 1, ((T([128, 16, 1024], f16),), {})
cnt: 5, ((T([128, 16, 768], f16),), {})
cnt: 4, ((T([128, 16, 384], f16),), {})
Operator: aten.hardswish_backward.default
cnt: 5, ((T([128, 16, 768], f16), T([128, 16, 768], f16)), {})
cnt: 4, ((T([128, 16, 384], f16), T([128, 16, 384], f16)), {})
cnt: 1, ((T([128, 16, 1024], f16), T([128, 16, 1024], f16)), {})
cnt: 6, ((T([128, 49, 512], f16), T([128, 49, 512], f16)), {})
cnt: 4, ((T([128, 49, 256], f16), T([128, 49, 256], f16)), {})
cnt: 4, ((T([128, 196, 256], f16), T([128, 196, 256], f16)), {})
cnt: 4, ((T([128, 196, 128], f16), T([128, 196, 128], f16)), {})
cnt: 1, ((T([128, 64, 28, 28], f16), T([128, 64, 28, 28], f16)), {})
cnt: 1, ((T([128, 32, 56, 56], f16), T([128, 32, 56, 56], f16)), {})
cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16)), {})
Operator: aten.index.Tensor
cnt: 4, ((T([4, 196], f16), [None, T([196, 196], i64)]), {})
cnt: 1, ((T([8, 196], f16), [None, T([49, 196], i64)]), {})
cnt: 4, ((T([8, 49], f16), [None, T([49, 49], i64)]), {})
cnt: 1, ((T([16, 49], f16), [None, T([16, 49], i64)]), {})
cnt: 4, ((T([12, 16], f16), [None, T([16, 16], i64)]), {})
Operator: aten.index_put.default
cnt: 4, ((T([12, 16], f16), [None, T([16, 16], i64)], T([12, 16, 16], f16), True), {})
cnt: 1, ((T([16, 49], f16), [None, T([16, 49], i64)], T([16, 16, 49], f16), True), {})
cnt: 4, ((T([8, 49], f16), [None, T([49, 49], i64)], T([8, 49, 49], f16), True), {})
cnt: 1, ((T([8, 196], f16), [None, T([49, 196], i64)], T([8, 49, 196], f16), True), {})
cnt: 4, ((T([4, 196], f16), [None, T([196, 196], i64)], T([4, 196, 196], f16), True), {})
Operator: aten.lift_fresh_copy.default
cnt: 1, ((T([128], i64),), {})
Operator: aten.mean.dim
cnt: 1, ((T([128, 16, 384], f16), [1]), {})
Operator: aten.mm.default
cnt: 4, ((T([25088, 128], f16), T([128, 128], f16, stride=(1, 128))), {})
cnt: 4, ((T([25088, 256], f16), T([256, 128], f16, stride=(1, 256))), {})
cnt: 1, ((T([6272, 128], f16), T([128, 128], f16, stride=(1, 128))), {})
cnt: 6, ((T([6272, 512], f16), T([512, 256], f16, stride=(1, 512))), {})
cnt: 9, ((T([6272, 256], f16), T([256, 512], f16, stride=(1, 256))), {})
cnt: 4, ((T([6272, 256], f16), T([256, 256], f16, stride=(1, 256))), {})
cnt: 1, ((T([6272, 256], f16), T([256, 1280], f16, stride=(1, 256))), {})
cnt: 1, ((T([2048, 256], f16), T([256, 256], f16, stride=(1, 256))), {})
cnt: 1, ((T([2048, 1024], f16), T([1024, 384], f16, stride=(1, 1024))), {})
cnt: 9, ((T([2048, 384], f16), T([384, 768], f16, stride=(1, 384))), {})
cnt: 5, ((T([2048, 768], f16), T([768, 384], f16, stride=(1, 768))), {})
cnt: 4, ((T([2048, 384], f16), T([384, 384], f16, stride=(1, 384))), {})
cnt: 2, ((T([128, 1000], f16), T([1000, 384], f16)), {})
cnt: 2, ((T([1000, 128], f16, stride=(1, 1000)), T([128, 384], f16)), {})
cnt: 5, ((T([384, 2048], f16, stride=(1, 384)), T([2048, 768], f16)), {})
cnt: 5, ((T([2048, 384], f16), T([384, 768], f16)), {})
cnt: 9, ((T([768, 2048], f16, stride=(1, 768)), T([2048, 384], f16)), {})
cnt: 9, ((T([2048, 768], f16), T([768, 384], f16)), {})
cnt: 4, ((T([384, 2048], f16, stride=(1, 384)), T([2048, 384], f16)), {})
cnt: 4, ((T([2048, 384], f16), T([384, 384], f16)), {})
cnt: 1, ((T([384, 2048], f16, stride=(1, 384)), T([2048, 1024], f16)), {})
cnt: 1, ((T([2048, 384], f16), T([384, 1024], f16)), {})
cnt: 1, ((T([256, 2048], f16, stride=(1, 256)), T([2048, 256], f16)), {})
cnt: 1, ((T([2048, 256], f16), T([256, 256], f16)), {})
cnt: 1, ((T([1280, 6272], f16, stride=(1, 1280)), T([6272, 256], f16)), {})
cnt: 1, ((T([6272, 1280], f16), T([1280, 256], f16)), {})
cnt: 6, ((T([256, 6272], f16, stride=(1, 256)), T([6272, 512], f16)), {})
cnt: 6, ((T([6272, 256], f16), T([256, 512], f16)), {})
cnt: 9, ((T([512, 6272], f16, stride=(1, 512)), T([6272, 256], f16)), {})
cnt: 9, ((T([6272, 512], f16), T([512, 256], f16)), {})
cnt: 4, ((T([256, 6272], f16, stride=(1, 256)), T([6272, 256], f16)), {})
cnt: 4, ((T([6272, 256], f16), T([256, 256], f16)), {})
cnt: 1, ((T([128, 6272], f16, stride=(1, 128)), T([6272, 128], f16)), {})
cnt: 1, ((T([6272, 128], f16), T([128, 128], f16)), {})
cnt: 4, ((T([128, 25088], f16, stride=(1, 128)), T([25088, 256], f16)), {})
cnt: 4, ((T([25088, 128], f16), T([128, 256], f16)), {})
cnt: 4, ((T([128, 25088], f16, stride=(1, 128)), T([25088, 128], f16)), {})
cnt: 4, ((T([25088, 128], f16), T([128, 128], f16)), {})
Operator: aten.mul.Tensor
cnt: 8, ((T([128, 4, 196, 196], f16), 0.25), {})
cnt: 2, ((T([128, 8, 49, 196], f16), 0.25), {})
cnt: 8, ((T([128, 8, 49, 49], f16), 0.25), {})
cnt: 2, ((T([128, 16, 16, 49], f16), 0.25), {})
cnt: 8, ((T([128, 12, 16, 16], f16), 0.25), {})
Operator: aten.native_batch_norm.default
cnt: 1, ((T([128, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f16), True, 0.1, 1e-05), {})
cnt: 1, ((T([128, 32, 56, 56], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f16), True, 0.1, 1e-05), {})
cnt: 1, ((T([128, 64, 28, 28], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f16), True, 0.1, 1e-05), {})
cnt: 1, ((T([128, 128, 14, 14], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {})
cnt: 8, ((T([25088, 256], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {})
cnt: 8, ((T([25088, 128], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {})
cnt: 1, ((T([25088, 640], f16), T([640], f16), T([640], f16), T([640], f16), T([640], f16), True, 0.1, 1e-05), {})
cnt: 1, ((T([6272, 128], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f16), True, 0.1, 1e-05), {})
cnt: 10, ((T([6272, 256], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {})
cnt: 9, ((T([6272, 512], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f16), True, 0.1, 1e-05), {})
cnt: 1, ((T([6272, 1280], f16), T([1280], f16), T([1280], f16), T([1280], f16), T([1280], f16), True, 0.1, 1e-05), {})
cnt: 1, ((T([2048, 256], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f16), True, 0.1, 1e-05), {})
cnt: 10, ((T([2048, 384], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f16), True, 0.1, 1e-05), {})
cnt: 9, ((T([2048, 768], f16), T([768], f16), T([768], f16), T([768], f16), T([768], f16), True, 0.1, 1e-05), {})
cnt: 2, ((T([128, 384], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f16), True, 0.1, 1e-05), {})
Operator: aten.native_batch_norm_backward.default
cnt: 2, ((T([128, 384], f16), T([128, 384], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f32), T([384], f32), True, 1e-05, [True, True, True]), {})
cnt: 10, ((T([2048, 384], f16), T([2048, 384], f16), T([384], f16), T([384], f16), T([384], f16), T([384], f32), T([384], f32), True, 1e-05, [True, True, True]), {})
cnt: 9, ((T([2048, 768], f16), T([2048, 768], f16), T([768], f16), T([768], f16), T([768], f16), T([768], f32), T([768], f32), True, 1e-05, [True, True, True]), {})
cnt: 1, ((T([2048, 256], f16), T([2048, 256], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {})
cnt: 1, ((T([6272, 1280], f16), T([6272, 1280], f16), T([1280], f16), T([1280], f16), T([1280], f16), T([1280], f32), T([1280], f32), True, 1e-05, [True, True, True]), {})
cnt: 10, ((T([6272, 256], f16), T([6272, 256], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {})
cnt: 9, ((T([6272, 512], f16), T([6272, 512], f16), T([512], f16), T([512], f16), T([512], f16), T([512], f32), T([512], f32), True, 1e-05, [True, True, True]), {})
cnt: 1, ((T([6272, 128], f16), T([6272, 128], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {})
cnt: 1, ((T([25088, 640], f16), T([25088, 640], f16), T([640], f16), T([640], f16), T([640], f16), T([640], f32), T([640], f32), True, 1e-05, [True, True, True]), {})
cnt: 8, ((T([25088, 128], f16), T([25088, 128], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {})
cnt: 8, ((T([25088, 256], f16), T([25088, 256], f16), T([256], f16), T([256], f16), T([256], f16), T([256], f32), T([256], f32), True, 1e-05, [True, True, True]), {})
cnt: 1, ((T([128, 128, 14, 14], f16, stride=(25088, 1, 1792, 128)), T([128, 128, 14, 14], f16), T([128], f16), T([128], f16), T([128], f16), T([128], f32), T([128], f32), True, 1e-05, [True, True, True]), {})
cnt: 1, ((T([128, 64, 28, 28], f16), T([128, 64, 28, 28], f16), T([64], f16), T([64], f16), T([64], f16), T([64], f32), T([64], f32), True, 1e-05, [True, True, True]), {})
cnt: 1, ((T([128, 32, 56, 56], f16), T([128, 32, 56, 56], f16), T([32], f16), T([32], f16), T([32], f16), T([32], f32), T([32], f32), True, 1e-05, [True, True, True]), {})
cnt: 1, ((T([128, 16, 112, 112], f16), T([128, 16, 112, 112], f16), T([16], f16), T([16], f16), T([16], f16), T([16], f32), T([16], f32), True, 1e-05, [True, True, True]), {})
Operator: aten.new_empty_strided.default
cnt: 1, ((T([640, 128], f16, stride=(1, 640)), [640, 128], [128, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'})
cnt: 8, ((T([256, 128], f16, stride=(1, 256)), [256, 128], [128, 1]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'})
Operator: aten.new_zeros.default
cnt: 4, ((T([12, 16, 16], f16), [12, 16]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'})
cnt: 1, ((T([16, 16, 49], f16), [16, 49]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'})
cnt: 4, ((T([8, 49, 49], f16), [8, 49]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'})
cnt: 1, ((T([8, 49, 196], f16), [8, 196]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'})
cnt: 4, ((T([4, 196, 196], f16), [4, 196]), {'dtype': f16, 'layout': torch.strided, 'device': 'cuda'})
Operator: aten.nll_loss_backward.default
cnt: 1, ((T([], f16), T([128, 1000], f16), T([128], i64), None, 1, -100, T([], f16)), {})
Operator: aten.nll_loss_forward.default
cnt: 1, ((T([128, 1000], f16), T([128], i64), None, 1, -100), {})
Operator: aten.slice_backward.default
cnt: 4, ((T([12, 16], f16), [12, 16], 0, 0, 9223372036854775807, 1), {})
cnt: 1, ((T([16, 49], f16), [16, 49], 0, 0, 9223372036854775807, 1), {})
cnt: 1, ((T([128, 4, 4, 256], f16), [128, 4, 7, 256], 2, 0, 9223372036854775807, 2), {})
cnt: 1, ((T([128, 4, 7, 256], f16), [128, 7, 7, 256], 1, 0, 9223372036854775807, 2), {})
cnt: 1, ((T([128, 7, 7, 256], f16), [128, 7, 7, 256], 0, 0, 9223372036854775807, 1), {})
cnt: 4, ((T([8, 49], f16), [8, 49], 0, 0, 9223372036854775807, 1), {})
cnt: 1, ((T([8, 196], f16), [8, 196], 0, 0, 9223372036854775807, 1), {})
cnt: 1, ((T([128, 7, 7, 128], f16), [128, 7, 14, 128], 2, 0, 9223372036854775807, 2), {})
cnt: 1, ((T([128, 7, 14, 128], f16), [128, 14, 14, 128], 1, 0, 9223372036854775807, 2), {})
cnt: 1, ((T([128, 14, 14, 128], f16), [128, 14, 14, 128], 0, 0, 9223372036854775807, 1), {})
cnt: 4, ((T([4, 196], f16), [4, 196], 0, 0, 9223372036854775807, 1), {})
Operator: aten.split_with_sizes.default
cnt: 4, ((T([128, 196, 4, 64], f16), [16, 16, 32], 3), {})
cnt: 1, ((T([128, 196, 8, 80], f16), [16, 64], 3), {})
cnt: 4, ((T([128, 49, 8, 64], f16), [16, 16, 32], 3), {})
cnt: 1, ((T([128, 49, 16, 80], f16), [16, 64], 3), {})
cnt: 4, ((T([128, 16, 12, 64], f16), [16, 16, 32], 3), {})
Operator: aten.sum.SymInt
cnt: 2, ((T([128, 1000], f16), [0], True), {})
cnt: 4, ((T([128, 12, 16, 16], f16), [0], True), {})
cnt: 1, ((T([128, 16, 16, 49], f16), [0], True), {})
cnt: 4, ((T([128, 8, 49, 49], f16), [0], True), {})
cnt: 1, ((T([128, 8, 49, 196], f16), [0], True), {})
cnt: 1, ((T([128, 128, 640], f16), [0], True), {})
cnt: 8, ((T([128, 128, 256], f16), [0], True), {})
cnt: 4, ((T([128, 4, 196, 196], f16), [0], True), {})
