# 原文链接

PyTorch由4个主要包装组成：

Torch：类似于Numpy的通用数组库，可以在将张量类型转换为（torch.cuda.TensorFloat）并在GPU上进行计算。

torch.nn：具有共同层和成本函数的神经网络库

1.导入工具

importtorch # arrays on GPUimporttorch.autogradasautograd #build a computational graphimporttorch.nnasnn # neural net libraryimporttorch.nn.functionalasF # most non-linearities are hereimporttorch.optimasoptim # optimizationpackage

2.torch数组取代了numpy ndarray - >在GPU支持下提供线性代数

#2matricesofsize 2x3 into a 3d tensor 2x2x3d=[[[1.,2.,3.],[4.,5.,6.]],[[7.,8.,9.],[11.,12.,13.]]]d=torch.Tensor(d) # arrayfrompython listprint"shape of the tensor:",d.size()# the first index is the depthz=d[0]+d[1]print"adding up the two matrices of the 3d tensor:",zshapeofthe tensor: torch.Size([2,2,3])adding up the two matricesofthe 3d tensor:81012151719[torch.FloatTensorofsize 2x3]# a heavily used operation is reshapingoftensors using .view()print d.view(2,-1) #-1makes torch infer the second dim123456789111213[torch.FloatTensorofsize 2x6]

# d is a tensor not a node, to create a node based on it:x= autograd.Variable(d, requires_grad=True)print"the node's data is the tensor:", x.data.size()print"the node's gradient is empty at creation:", x.grad # the grad is empty right nowthe node's data is the tensor: torch.Size([2,2,3])the node's gradient is empty at creation: None#dooperation on the node to make a computational graphy= x+1z=x+ys=z.sum()print s.creator# calculate gradientss.backward()print"the variable now has gradients:",x.gradthe variable now has gradients: Variable containing:(0,.,.) =222222(1,.,.) =222222[torch.FloatTensorofsize 2x2x3]

4.torch.nn包含各种NN层（张量行的线性映射）+（非线性）-->

# linear transformationofa 2x5 matrix into a 2x3 matrixlinear_map=nn.Linear(5,3)print"using randomly initialized params:", linear_map.parametersusing randomly initialized params: 3)># data has2exampleswith5features and3targetdata=torch.randn(2,5) # trainingy=autograd.Variable(torch.randn(2,3)) # target# make a nodex=autograd.Variable(data, requires_grad=True)# apply transformation to a node creates a computational grapha=linear_map(x)z=F.relu(a)o=F.softmax(z)print"output of softmax as a probability distribution:", o.data.view(1,-1)# lossfunctionloss_func=nn.MSELoss() #instantiate lossfunctionL=loss_func(z,y) # calculateMSE loss between output and targetprint"Loss:", Loutputofsoftmaxasa probability distribution:0.20920.19790.59290.43430.30380.2619[torch.FloatTensorofsize 1x6]Loss: Variable containing:2.9838[torch.FloatTensorofsize1]

_init_函数必须始终被继承，然后层的所有参数必须在这里定义为类变量（self.x）

classLog_reg_classifier(nn.Module):    def__init__(self, in_size,out_size):super(Log_reg_classifier,self).__init__() #always call parent's init        self.linear=nn.Linear(in_size, out_size) #layer parameters    defforward(self,vect):returnF.log_softmax(self.linear(vect)) #

5.torch.optim也可以做优化—>

optimizer=optim.SGD(linear_map.parameters(),lr=1e-2) # instantiate optimizerwithmodel params + learning rate# epoch loop: we run following until convergenceoptimizer.zero_grad() # make gradients zeroL.backward(retain_variables=True)optimizer.step()print LVariable containing:2.9838[torch.FloatTensorofsize1]

# define modelmodel =Log_reg_classifier(10,2)# define lossfunctionloss_func=nn.MSELoss() # define optimizeroptimizer=optim.SGD(model.parameters(),lr=1e-1)# send data through modelinminibatchesfor10epochsforepochinrange(10):forminibatch, targetindata:        model.zero_grad() # pytorch accumulates gradients, making them zeroforeach minibatch        #forward pass        out=model(autograd.Variable(minibatch))        #backward pass        L=loss_func(out,target) #calculate loss        L.backward() # calculate gradients        optimizer.step() # make an update step