ABOUT ME

-

Today
-
Yesterday
-
Total
-
  • Neural Networks and Deep Learning - 4 week 실습
    Google ML Bootcamp 2022/Coursera mission 2022. 7. 8. 17:24

    데이터 가공이나, 라이브러리 같은 부분 말고 직접적은 코드만 살펴보자

    4주차 실습은 사진을 고양이인지 판단하는 모델 제작이다

    고양이다

    이를 L개의 레이어를 가지는 모델로 구현해보자

     

    먼저 모델을 보며 전체적인 흐름을 살펴보자

    def L_layer_model(X, Y, layers_dims, learning_rate = 0.0075, num_iterations = 3000, print_cost=False):
        """
        Arguments:
        X -- data, numpy array of shape (num_px * num_px * 3, number of examples)
        Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples)
        layers_dims -- list containing the input size and each layer size, of length (number of layers + 1).
        learning_rate -- learning rate of the gradient descent update rule
        num_iterations -- number of iterations of the optimization loop
        print_cost -- if True, it prints the cost every 100 steps
        
        Returns:
        parameters -- parameters learnt by the model. They can then be used to predict.
        """
        
        #채점과 결과 확인용 코드이다
        np.random.seed(1)
        costs = []
        #---------------------------
        
        # 변수 초기화
        parameters=initialize_parameters_deep(layers_dims)
        
        # 경사 하강법으로 parameter 최적화
        for i in range(0, num_iterations):
    
            # 1. Forward propagation
            # 마지막의 경우 활성 함수로 Sigmoid, 나머지는 렐루 사용함
            AL, caches = L_model_forward(X,parameters)
            
            # 2. compute cost
            cost=compute_cost(AL,Y)
            
            # 3. Backward propagation.
            grads=L_model_backward(AL,Y,caches)
            
            # 4. Update parameters.
            parameters = update_parameters(parameters,grads,learning_rate)
                    
            # 결과 확인용 출력---------------------------------------------------
            if print_cost and i % 100 == 0 or i == num_iterations - 1:
                print("Cost after iteration {}: {}".format(i, np.squeeze(cost)))
            if i % 100 == 0 or i == num_iterations:
                costs.append(cost)
        	#--------------------------------------------------------------------
        return parameters, costs

    FP, 비용 계산, BP, parameter 업데이트 4과정을 반복하는 경사하강법을 사용한다

    각 함수의 세부 구현을 살펴보자

    Forward Propogation

    def L_model_forward(X, parameters):
        """
        Arguments:
        X -- data, numpy array of shape (input size, number of examples)
        parameters -- initialize_parameters_deep()으로 초기화된 w,b
        
        Returns:
        AL -- 시그모이드에 넣은 A[L]
        caches -- list of caches containing:
                    every cache of linear_activation_forward() (there are L of them, indexed from 0 to L-1)
        """
    
        caches = []
        A = X
        #w,b 2개이므로 2로 나누면 레이어 수가 된다
        L = len(parameters) // 2
        
        #L-1 레이어 까지는 렐루 함수로 FP가 진행된다
        for l in range(1, L):
            A_prev = A 
            A,cache=linear_activation_forward(A_prev,parameters['W'+str(l)],parameters['b'+str(l)],activation="relu")
            caches.append(cache)
            
        #마지막 레이어는 활성 함수로 sigmoid를 이용한다
        AL,cache=linear_activation_forward(A,parameters['W'+str(L)],parameters['b'+str(L)],activation="sigmoid")
        caches.append(cache)
              
        return AL, caches

     

    def linear_activation_forward(A_prev, W, b, activation):
        """
        활성 함수에 따른 계산을 한다
        Arguments:
        A_prev -- activations from previous layer (or input data): (size of previous layer, number of examples)
        W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)
        b -- bias vector, numpy array of shape (size of the current layer, 1)
        activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"
    
        Returns:
        A -- the output of the activation function, also called the post-activation value 
        cache -- a python tuple containing "linear_cache" and "activation_cache";
                 stored for computing the backward pass efficiently
        """
        
        if activation == "sigmoid":
            Z, linear_cache = linear_forward(A_prev,W,b)
            A,activation_cache = sigmoid(Z)
        elif activation == "relu":
            Z, linear_cache = linear_forward(A_prev,W,b)
            A,activation_cache = relu(Z)
        
        cache = (linear_cache, activation_cache)
    
        return A, cache
    def linear_forward(A, W, b):
        """
        하나의 레이어에서 FP를 계싼한다
        Arguments:
        A -- activations from previous layer (or input data): (size of previous layer, number of examples)
        W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)
        b -- bias vector, numpy array of shape (size of the current layer, 1)
    
        Returns:
        Z -- the input of the activation function, also called pre-activation parameter 
        cache -- a python tuple containing "A", "W" and "b" ; stored for computing the backward pass efficiently
        """
        Z=np.dot(W,A)+b
        cache = (A, W, b)
        
        return Z, cache

    Compute Cost

    이진 분류이기에 아래 식을 사용하였다

    def compute_cost(AL, Y):
        """
        Arguments:
        AL -- probability vector corresponding to your label predictions, shape (1, number of examples)
        Y -- true "label" vector (for example: containing 0 if non-cat, 1 if cat), shape (1, number of examples)
    
        Returns:
        cost -- cross-entropy cost
        """
        
        m = Y.shape[1]
    
        cost=-np.sum(Y*np.log(AL)+(1-Y)*np.log(1-AL))/m    
        #numpy 실수로 변환
        #[[17]] -> 17 느낌
        cost = np.squeeze(cost)
        
        return cost

    Backward Propogation

    def L_model_backward(AL, Y, caches):
        """
        Arguments:
        AL -- probability vector, output of the forward propagation (L_model_forward())
        Y -- true "label" vector (containing 0 if non-cat, 1 if cat)
        caches -- list of caches containing:
                    every cache of linear_activation_forward() with "relu" (it's caches[l], for l in range(L-1) i.e l = 0...L-2)
                    the cache of linear_activation_forward() with "sigmoid" (it's caches[L-1])
        
        Returns:
        grads -- A dictionary with the gradients
                 grads["dA" + str(l)] = ... 
                 grads["dW" + str(l)] = ...
                 grads["db" + str(l)] = ... 
        """
        grads = {}
        L = len(caches) # the number of layers
        m = AL.shape[1]
        Y = Y.reshape(AL.shape) # after this line, Y is the same shape as AL
        
        # loss함수를 미분한 값이다
        # 위의 loss를 미분 해 보면 구할 수 있다
        dAL=-(np.divide(Y,AL)-np.divide(1-Y,1-AL))
        
        #마지막 레이어엔 sigmoid를 사용하였으니 따로 계산해 준다
        current_cache=caches[L-1]
        grads["dA"+str(L-1)],grads["dW"+str(L)],grads["db"+str(L)]\=
        	linear_activation_backward(dAL,current_cache,activation="sigmoid")
        
        # 나머지는 렐루를 경사하강 시켜준다
        for l in reversed(range(L-1)):
            current_cache=caches[l]
            grads["dA"+str(l)],grads["dW"+str(l+1)],grads["db"+str(l+1)]\=
            	linear_activation_backward(grads["dA"+str(l+1)],current_cache,activation="relu")
            
        return grads
    def linear_activation_backward(dA, cache, activation):
        """
        활성함수에 따른 BP를 계산해 준다
        Arguments:
        dA -- post-activation gradient for current layer l 
        cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently
        activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"
        
        Returns:
        dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
        dW -- Gradient of the cost with respect to W (current layer l), same shape as W
        db -- Gradient of the cost with respect to b (current layer l), same shape as b
        """
        linear_cache, activation_cache = cache
        
        #dz=da*g'(z)이므로 활성함수에 따라 다르다
        if activation == "relu":
            dZ=relu_backward(dA,activation_cache)
            dA_prev,dW,db=linear_backward(dZ,linear_cache)
            
        elif activation == "sigmoid":
            dZ=sigmoid_backward(dA,activation_cache)
            dA_prev,dW,db=linear_backward(dZ,linear_cache)
            
        return dA_prev, dW, db
    def linear_backward(dZ, cache):
        """
        단일레이어 BP를 계산해 준다
        Arguments:
        dZ -- Gradient of the cost with respect to the linear output (of current layer l)
        cache -- tuple of values (A_prev, W, b) coming from the forward propagation in the current layer
    
        Returns:
        dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
        dW -- Gradient of the cost with respect to W (current layer l), same shape as W
        db -- Gradient of the cost with respect to b (current layer l), same shape as b
        """
        A_prev, W, b = cache
        m = A_prev.shape[1]
    
        #앞서 미분으로 왜 이런 식이 나오는지 배웠다
        dW=np.dot(dZ,A_prev.T)/m
        db=np.sum(dZ,axis=1,keepdims=True)/m
        dA_prev=np.dot(W.T,dZ)
        
        return dA_prev, dW, db

    Update parameters

    def update_parameters(params, grads, learning_rate):
        """
        경사하강의 결과를 parameter에 적용해 준다
        Arguments:
        params -- python dictionary containing your parameters 
        grads -- python dictionary containing your gradients, output of L_model_backward
        
        Returns:
        parameters -- python dictionary containing your updated parameters 
                      parameters["W" + str(l)] = ... 
                      parameters["b" + str(l)] = ...
        """
        parameters = params.copy()
        L = len(parameters)
    
        for l in range(L):
            parameters["W" + str(l+1)]=parameters["W" + str(l+1)]-learning_rate*grads["dW"+str(l+1)]
            parameters["b" + str(l+1)]=parameters["b" + str(l+1)]-learning_rate*grads["db"+str(l+1)]
       
       return parameters

     

    댓글

Designed by Tistory.