-
Neural Networks and Deep Learning - 4 week 실습Google ML Bootcamp 2022/Coursera mission 2022. 7. 8. 17:24
데이터 가공이나, 라이브러리 같은 부분 말고 직접적은 코드만 살펴보자
4주차 실습은 사진을 고양이인지 판단하는 모델 제작이다
고양이다 이를 L개의 레이어를 가지는 모델로 구현해보자
먼저 모델을 보며 전체적인 흐름을 살펴보자
def L_layer_model(X, Y, layers_dims, learning_rate = 0.0075, num_iterations = 3000, print_cost=False): """ Arguments: X -- data, numpy array of shape (num_px * num_px * 3, number of examples) Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples) layers_dims -- list containing the input size and each layer size, of length (number of layers + 1). learning_rate -- learning rate of the gradient descent update rule num_iterations -- number of iterations of the optimization loop print_cost -- if True, it prints the cost every 100 steps Returns: parameters -- parameters learnt by the model. They can then be used to predict. """ #채점과 결과 확인용 코드이다 np.random.seed(1) costs = [] #--------------------------- # 변수 초기화 parameters=initialize_parameters_deep(layers_dims) # 경사 하강법으로 parameter 최적화 for i in range(0, num_iterations): # 1. Forward propagation # 마지막의 경우 활성 함수로 Sigmoid, 나머지는 렐루 사용함 AL, caches = L_model_forward(X,parameters) # 2. compute cost cost=compute_cost(AL,Y) # 3. Backward propagation. grads=L_model_backward(AL,Y,caches) # 4. Update parameters. parameters = update_parameters(parameters,grads,learning_rate) # 결과 확인용 출력--------------------------------------------------- if print_cost and i % 100 == 0 or i == num_iterations - 1: print("Cost after iteration {}: {}".format(i, np.squeeze(cost))) if i % 100 == 0 or i == num_iterations: costs.append(cost) #-------------------------------------------------------------------- return parameters, costs
FP, 비용 계산, BP, parameter 업데이트 4과정을 반복하는 경사하강법을 사용한다
각 함수의 세부 구현을 살펴보자
Forward Propogation
def L_model_forward(X, parameters): """ Arguments: X -- data, numpy array of shape (input size, number of examples) parameters -- initialize_parameters_deep()으로 초기화된 w,b Returns: AL -- 시그모이드에 넣은 A[L] caches -- list of caches containing: every cache of linear_activation_forward() (there are L of them, indexed from 0 to L-1) """ caches = [] A = X #w,b 2개이므로 2로 나누면 레이어 수가 된다 L = len(parameters) // 2 #L-1 레이어 까지는 렐루 함수로 FP가 진행된다 for l in range(1, L): A_prev = A A,cache=linear_activation_forward(A_prev,parameters['W'+str(l)],parameters['b'+str(l)],activation="relu") caches.append(cache) #마지막 레이어는 활성 함수로 sigmoid를 이용한다 AL,cache=linear_activation_forward(A,parameters['W'+str(L)],parameters['b'+str(L)],activation="sigmoid") caches.append(cache) return AL, caches
def linear_activation_forward(A_prev, W, b, activation): """ 활성 함수에 따른 계산을 한다 Arguments: A_prev -- activations from previous layer (or input data): (size of previous layer, number of examples) W -- weights matrix: numpy array of shape (size of current layer, size of previous layer) b -- bias vector, numpy array of shape (size of the current layer, 1) activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu" Returns: A -- the output of the activation function, also called the post-activation value cache -- a python tuple containing "linear_cache" and "activation_cache"; stored for computing the backward pass efficiently """ if activation == "sigmoid": Z, linear_cache = linear_forward(A_prev,W,b) A,activation_cache = sigmoid(Z) elif activation == "relu": Z, linear_cache = linear_forward(A_prev,W,b) A,activation_cache = relu(Z) cache = (linear_cache, activation_cache) return A, cache
def linear_forward(A, W, b): """ 하나의 레이어에서 FP를 계싼한다 Arguments: A -- activations from previous layer (or input data): (size of previous layer, number of examples) W -- weights matrix: numpy array of shape (size of current layer, size of previous layer) b -- bias vector, numpy array of shape (size of the current layer, 1) Returns: Z -- the input of the activation function, also called pre-activation parameter cache -- a python tuple containing "A", "W" and "b" ; stored for computing the backward pass efficiently """ Z=np.dot(W,A)+b cache = (A, W, b) return Z, cache
Compute Cost
이진 분류이기에 아래 식을 사용하였다
def compute_cost(AL, Y): """ Arguments: AL -- probability vector corresponding to your label predictions, shape (1, number of examples) Y -- true "label" vector (for example: containing 0 if non-cat, 1 if cat), shape (1, number of examples) Returns: cost -- cross-entropy cost """ m = Y.shape[1] cost=-np.sum(Y*np.log(AL)+(1-Y)*np.log(1-AL))/m #numpy 실수로 변환 #[[17]] -> 17 느낌 cost = np.squeeze(cost) return cost
Backward Propogation
def L_model_backward(AL, Y, caches): """ Arguments: AL -- probability vector, output of the forward propagation (L_model_forward()) Y -- true "label" vector (containing 0 if non-cat, 1 if cat) caches -- list of caches containing: every cache of linear_activation_forward() with "relu" (it's caches[l], for l in range(L-1) i.e l = 0...L-2) the cache of linear_activation_forward() with "sigmoid" (it's caches[L-1]) Returns: grads -- A dictionary with the gradients grads["dA" + str(l)] = ... grads["dW" + str(l)] = ... grads["db" + str(l)] = ... """ grads = {} L = len(caches) # the number of layers m = AL.shape[1] Y = Y.reshape(AL.shape) # after this line, Y is the same shape as AL # loss함수를 미분한 값이다 # 위의 loss를 미분 해 보면 구할 수 있다 dAL=-(np.divide(Y,AL)-np.divide(1-Y,1-AL)) #마지막 레이어엔 sigmoid를 사용하였으니 따로 계산해 준다 current_cache=caches[L-1] grads["dA"+str(L-1)],grads["dW"+str(L)],grads["db"+str(L)]\= linear_activation_backward(dAL,current_cache,activation="sigmoid") # 나머지는 렐루를 경사하강 시켜준다 for l in reversed(range(L-1)): current_cache=caches[l] grads["dA"+str(l)],grads["dW"+str(l+1)],grads["db"+str(l+1)]\= linear_activation_backward(grads["dA"+str(l+1)],current_cache,activation="relu") return grads
def linear_activation_backward(dA, cache, activation): """ 활성함수에 따른 BP를 계산해 준다 Arguments: dA -- post-activation gradient for current layer l cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu" Returns: dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev dW -- Gradient of the cost with respect to W (current layer l), same shape as W db -- Gradient of the cost with respect to b (current layer l), same shape as b """ linear_cache, activation_cache = cache #dz=da*g'(z)이므로 활성함수에 따라 다르다 if activation == "relu": dZ=relu_backward(dA,activation_cache) dA_prev,dW,db=linear_backward(dZ,linear_cache) elif activation == "sigmoid": dZ=sigmoid_backward(dA,activation_cache) dA_prev,dW,db=linear_backward(dZ,linear_cache) return dA_prev, dW, db
def linear_backward(dZ, cache): """ 단일레이어 BP를 계산해 준다 Arguments: dZ -- Gradient of the cost with respect to the linear output (of current layer l) cache -- tuple of values (A_prev, W, b) coming from the forward propagation in the current layer Returns: dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev dW -- Gradient of the cost with respect to W (current layer l), same shape as W db -- Gradient of the cost with respect to b (current layer l), same shape as b """ A_prev, W, b = cache m = A_prev.shape[1] #앞서 미분으로 왜 이런 식이 나오는지 배웠다 dW=np.dot(dZ,A_prev.T)/m db=np.sum(dZ,axis=1,keepdims=True)/m dA_prev=np.dot(W.T,dZ) return dA_prev, dW, db
Update parameters
def update_parameters(params, grads, learning_rate): """ 경사하강의 결과를 parameter에 적용해 준다 Arguments: params -- python dictionary containing your parameters grads -- python dictionary containing your gradients, output of L_model_backward Returns: parameters -- python dictionary containing your updated parameters parameters["W" + str(l)] = ... parameters["b" + str(l)] = ... """ parameters = params.copy() L = len(parameters) for l in range(L): parameters["W" + str(l+1)]=parameters["W" + str(l+1)]-learning_rate*grads["dW"+str(l+1)] parameters["b" + str(l+1)]=parameters["b" + str(l+1)]-learning_rate*grads["db"+str(l+1)] return parameters
'Google ML Bootcamp 2022 > Coursera mission' 카테고리의 다른 글
Improving Deep Neural Networks - 2 week (0) 2022.07.13 Improving Deep Neural Networks - 1 week (0) 2022.07.10 Neural Networks and Deep Learning - 4 week (0) 2022.07.08 Neural Networks and Deep Learning - 3 week (0) 2022.07.07 Neural Networks and Deep Learning - 2 week (4) 2022.07.04