Google ML Bootcamp 2022/Coursera mission
Neural Networks and Deep Learning - 4 week 실습
피준
2022. 7. 8. 17:24
데이터 가공이나, 라이브러리 같은 부분 말고 직접적은 코드만 살펴보자
4주차 실습은 사진을 고양이인지 판단하는 모델 제작이다
이를 L개의 레이어를 가지는 모델로 구현해보자
먼저 모델을 보며 전체적인 흐름을 살펴보자
def L_layer_model(X, Y, layers_dims, learning_rate = 0.0075, num_iterations = 3000, print_cost=False):
"""
Arguments:
X -- data, numpy array of shape (num_px * num_px * 3, number of examples)
Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples)
layers_dims -- list containing the input size and each layer size, of length (number of layers + 1).
learning_rate -- learning rate of the gradient descent update rule
num_iterations -- number of iterations of the optimization loop
print_cost -- if True, it prints the cost every 100 steps
Returns:
parameters -- parameters learnt by the model. They can then be used to predict.
"""
#채점과 결과 확인용 코드이다
np.random.seed(1)
costs = []
#---------------------------
# 변수 초기화
parameters=initialize_parameters_deep(layers_dims)
# 경사 하강법으로 parameter 최적화
for i in range(0, num_iterations):
# 1. Forward propagation
# 마지막의 경우 활성 함수로 Sigmoid, 나머지는 렐루 사용함
AL, caches = L_model_forward(X,parameters)
# 2. compute cost
cost=compute_cost(AL,Y)
# 3. Backward propagation.
grads=L_model_backward(AL,Y,caches)
# 4. Update parameters.
parameters = update_parameters(parameters,grads,learning_rate)
# 결과 확인용 출력---------------------------------------------------
if print_cost and i % 100 == 0 or i == num_iterations - 1:
print("Cost after iteration {}: {}".format(i, np.squeeze(cost)))
if i % 100 == 0 or i == num_iterations:
costs.append(cost)
#--------------------------------------------------------------------
return parameters, costs
FP, 비용 계산, BP, parameter 업데이트 4과정을 반복하는 경사하강법을 사용한다
각 함수의 세부 구현을 살펴보자
Forward Propogation
def L_model_forward(X, parameters):
"""
Arguments:
X -- data, numpy array of shape (input size, number of examples)
parameters -- initialize_parameters_deep()으로 초기화된 w,b
Returns:
AL -- 시그모이드에 넣은 A[L]
caches -- list of caches containing:
every cache of linear_activation_forward() (there are L of them, indexed from 0 to L-1)
"""
caches = []
A = X
#w,b 2개이므로 2로 나누면 레이어 수가 된다
L = len(parameters) // 2
#L-1 레이어 까지는 렐루 함수로 FP가 진행된다
for l in range(1, L):
A_prev = A
A,cache=linear_activation_forward(A_prev,parameters['W'+str(l)],parameters['b'+str(l)],activation="relu")
caches.append(cache)
#마지막 레이어는 활성 함수로 sigmoid를 이용한다
AL,cache=linear_activation_forward(A,parameters['W'+str(L)],parameters['b'+str(L)],activation="sigmoid")
caches.append(cache)
return AL, caches
def linear_activation_forward(A_prev, W, b, activation):
"""
활성 함수에 따른 계산을 한다
Arguments:
A_prev -- activations from previous layer (or input data): (size of previous layer, number of examples)
W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)
b -- bias vector, numpy array of shape (size of the current layer, 1)
activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"
Returns:
A -- the output of the activation function, also called the post-activation value
cache -- a python tuple containing "linear_cache" and "activation_cache";
stored for computing the backward pass efficiently
"""
if activation == "sigmoid":
Z, linear_cache = linear_forward(A_prev,W,b)
A,activation_cache = sigmoid(Z)
elif activation == "relu":
Z, linear_cache = linear_forward(A_prev,W,b)
A,activation_cache = relu(Z)
cache = (linear_cache, activation_cache)
return A, cache
def linear_forward(A, W, b):
"""
하나의 레이어에서 FP를 계싼한다
Arguments:
A -- activations from previous layer (or input data): (size of previous layer, number of examples)
W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)
b -- bias vector, numpy array of shape (size of the current layer, 1)
Returns:
Z -- the input of the activation function, also called pre-activation parameter
cache -- a python tuple containing "A", "W" and "b" ; stored for computing the backward pass efficiently
"""
Z=np.dot(W,A)+b
cache = (A, W, b)
return Z, cache
Compute Cost
이진 분류이기에 아래 식을 사용하였다
def compute_cost(AL, Y):
"""
Arguments:
AL -- probability vector corresponding to your label predictions, shape (1, number of examples)
Y -- true "label" vector (for example: containing 0 if non-cat, 1 if cat), shape (1, number of examples)
Returns:
cost -- cross-entropy cost
"""
m = Y.shape[1]
cost=-np.sum(Y*np.log(AL)+(1-Y)*np.log(1-AL))/m
#numpy 실수로 변환
#[[17]] -> 17 느낌
cost = np.squeeze(cost)
return cost
Backward Propogation
def L_model_backward(AL, Y, caches):
"""
Arguments:
AL -- probability vector, output of the forward propagation (L_model_forward())
Y -- true "label" vector (containing 0 if non-cat, 1 if cat)
caches -- list of caches containing:
every cache of linear_activation_forward() with "relu" (it's caches[l], for l in range(L-1) i.e l = 0...L-2)
the cache of linear_activation_forward() with "sigmoid" (it's caches[L-1])
Returns:
grads -- A dictionary with the gradients
grads["dA" + str(l)] = ...
grads["dW" + str(l)] = ...
grads["db" + str(l)] = ...
"""
grads = {}
L = len(caches) # the number of layers
m = AL.shape[1]
Y = Y.reshape(AL.shape) # after this line, Y is the same shape as AL
# loss함수를 미분한 값이다
# 위의 loss를 미분 해 보면 구할 수 있다
dAL=-(np.divide(Y,AL)-np.divide(1-Y,1-AL))
#마지막 레이어엔 sigmoid를 사용하였으니 따로 계산해 준다
current_cache=caches[L-1]
grads["dA"+str(L-1)],grads["dW"+str(L)],grads["db"+str(L)]\=
linear_activation_backward(dAL,current_cache,activation="sigmoid")
# 나머지는 렐루를 경사하강 시켜준다
for l in reversed(range(L-1)):
current_cache=caches[l]
grads["dA"+str(l)],grads["dW"+str(l+1)],grads["db"+str(l+1)]\=
linear_activation_backward(grads["dA"+str(l+1)],current_cache,activation="relu")
return grads
def linear_activation_backward(dA, cache, activation):
"""
활성함수에 따른 BP를 계산해 준다
Arguments:
dA -- post-activation gradient for current layer l
cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently
activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"
Returns:
dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
dW -- Gradient of the cost with respect to W (current layer l), same shape as W
db -- Gradient of the cost with respect to b (current layer l), same shape as b
"""
linear_cache, activation_cache = cache
#dz=da*g'(z)이므로 활성함수에 따라 다르다
if activation == "relu":
dZ=relu_backward(dA,activation_cache)
dA_prev,dW,db=linear_backward(dZ,linear_cache)
elif activation == "sigmoid":
dZ=sigmoid_backward(dA,activation_cache)
dA_prev,dW,db=linear_backward(dZ,linear_cache)
return dA_prev, dW, db
def linear_backward(dZ, cache):
"""
단일레이어 BP를 계산해 준다
Arguments:
dZ -- Gradient of the cost with respect to the linear output (of current layer l)
cache -- tuple of values (A_prev, W, b) coming from the forward propagation in the current layer
Returns:
dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
dW -- Gradient of the cost with respect to W (current layer l), same shape as W
db -- Gradient of the cost with respect to b (current layer l), same shape as b
"""
A_prev, W, b = cache
m = A_prev.shape[1]
#앞서 미분으로 왜 이런 식이 나오는지 배웠다
dW=np.dot(dZ,A_prev.T)/m
db=np.sum(dZ,axis=1,keepdims=True)/m
dA_prev=np.dot(W.T,dZ)
return dA_prev, dW, db
Update parameters
def update_parameters(params, grads, learning_rate):
"""
경사하강의 결과를 parameter에 적용해 준다
Arguments:
params -- python dictionary containing your parameters
grads -- python dictionary containing your gradients, output of L_model_backward
Returns:
parameters -- python dictionary containing your updated parameters
parameters["W" + str(l)] = ...
parameters["b" + str(l)] = ...
"""
parameters = params.copy()
L = len(parameters)
for l in range(L):
parameters["W" + str(l+1)]=parameters["W" + str(l+1)]-learning_rate*grads["dW"+str(l+1)]
parameters["b" + str(l+1)]=parameters["b" + str(l+1)]-learning_rate*grads["db"+str(l+1)]
return parameters