import numpy as np
import matplotlib.pyplot as plt


# arbitary input data from the gaussian distribution
D=np.random.randn(1000, 500)

# 10 hidden layers and each layer has 500 neurons
hidden = [500]*10

# setting the activation function as hyper tangent
nonlinearities = ['tanh']*len(hidden)

# inline implementation of the two activation functions 
act = {'relu': lambda x:np.maximum(0,x), 'tanh':lambda x:np.tanh(x)}
print(act['relu'](3))
print(act['tanh'](3))
print(nonlinearities)

3
0.9950547536867305
['tanh', 'tanh', 'tanh', 'tanh', 'tanh', 'tanh', 'tanh', 'tanh', 'tanh', 'tanh']


#########################################################
# tanh and small weight setting initiall for each weight
#########################################################

Hs = {}
# imitating the data flow through the hidden layer
for i in range(len(hidden)):
    # initially input and the input data flow goes through the hidden layer
    x = D if i == 0 else Hs[i-1]
    # x.shape = (1000, 500)
    f_in  = x.shape[1]
    # hidden = [500, 500, ...]
    f_out = hidden[i]
    # weight initialization 500 by 500 (full connection)
    w = np.random.randn(f_in, f_out) * 0.01 
    # output of each layer
    h = np.dot(x,w)
    # go through the activation function
    h = act[nonlinearities[i]](h)
    # save the weights of each hidden layer
    Hs[i] = h   

# mean and std of the input layer    
print(f'input layer had mean {np.mean(D)} and std {np.std(D)}')

# means and stds of each hidden layer
layer_means = [np.mean(H) for i, H in Hs.items()]
layer_stds  = [np.std(H) for i, H in Hs.items()]

# Plotting the data
plt.figure(figsize=(9, 3))
plt.subplot(121)
plt.plot(Hs.keys(),layer_means, 'ob-')
plt.title('layer mean')
plt.subplot(122)
plt.plot(Hs.keys(), layer_stds, 'or-')
plt.title('layer std')
plt.figure(figsize=(10, 2))
for i, H in Hs.items():
    plt.subplot(1, len(Hs), i+1)
    # ravel: multiple dimension data to one dimension data
    # plt.hist(one_dimension_data, num_bins)
    plt.hist(H.ravel(), 30, range=(-1, 1))    
    plt.ylim([0, 300000])    
    plt.xlim([-1, 1])
    if i != 0: 
        plt.yticks([])
        plt.xticks([])

input layer had mean -0.0009296053185340224 and std 1.0010669172302005


#########################################################
# tanh and large (1) weight setting initiall for each weight
#########################################################

Hs = {}
for i in range(len(hidden)):
    x = D if i == 0 else Hs[i-1]
    f_in  = x.shape[1]
    f_out = hidden[i]
    w = np.random.randn(f_in, f_out) * 1
    h = np.dot(x,w)
    h = act[nonlinearities[i]](h)
    Hs[i] = h   
print(f"input layer had mean {np.mean(D)} and std {np.std(D)}")
layer_means = [np.mean(H) for i, H in Hs.items()]
layer_stds  = [np.std(H) for i, H in Hs.items()]

# Plotting the data
plt.figure(figsize=(9, 3))
plt.subplot(121)
plt.plot(Hs.keys(),layer_means, 'ob-')
plt.title('layer mean')
plt.subplot(122)
plt.plot(Hs.keys(), layer_stds, 'or-')
plt.title('layer std')
plt.figure(figsize=(10, 2))
for i, H in Hs.items():
    plt.subplot(1, len(Hs), i+1)
    # ravel: multiple dimension data to one dimension data
    # plt.hist(one_dimension_data, num_bins)
    plt.hist(H.ravel(), 30, range=(-1, 1))    
    plt.ylim([0, 300000])    
    plt.xlim([-1.1, 1.1])
    if i != 0: 
        plt.yticks([])
        plt.xticks([])

input layer had mean -0.0009296053185340224 and std 1.0010669172302005


#########################################################
# tanh and Xavier weight setting initiall for each weight
#########################################################

Hs = {}
for i in range(len(hidden)):
    x = D if i == 0 else Hs[i-1]
    f_in  = x.shape[1]
    f_out = hidden[i]
    w = np.random.randn(f_in, f_out) /np.sqrt((f_in+f_out)/2)
    h = np.dot(x,w)
    h = act[nonlinearities[i]](h)
    Hs[i] = h   
print(f"input layer had mean {np.mean(D)} and std {np.std(D)}")
layer_means = [np.mean(H) for i, H in Hs.items()]
layer_stds  = [np.std(H) for i, H in Hs.items()]

# Plotting the data
plt.figure(figsize=(9, 3))
plt.subplot(121)
plt.plot(Hs.keys(),layer_means, 'ob-')
plt.title('layer mean')
plt.subplot(122)
plt.plot(Hs.keys(), layer_stds, 'or-')
plt.title('layer std')
plt.figure(figsize=(10, 2))
for i, H in Hs.items():
    plt.subplot(1, len(Hs), i+1)
    # ravel: multiple dimension data to one dimension data
    # plt.hist(one_dimension_data, num_bins)
    plt.hist(H.ravel(), 30, range=(-1, 1))    
    plt.ylim([0, 60000])    
    plt.xlim([-1, 1])
    if i != 0: 
        plt.yticks([])
        plt.xticks([])

input layer had mean -0.0009296053185340224 and std 1.0010669172302005


#########################################################
# relu and Xavier weight setting initiall for each weight
#########################################################

Hs = {}
# setting the activation function as hyper tangent
nonlinearities = ['relu']*len(hidden)

for i in range(len(hidden)):
    x = D if i == 0 else Hs[i-1]
    f_in  = x.shape[1]
    f_out = hidden[i]
    w = np.random.randn(f_in, f_out) /np.sqrt((f_in+f_out)/2)
    h = np.dot(x,w)
    h = act[nonlinearities[i]](h)
    Hs[i] = h   
print(f"input layer had mean {np.mean(D)} and std {np.std(D)}")
layer_means = [np.mean(H) for i, H in Hs.items()]
layer_stds  = [np.std(H) for i, H in Hs.items()]

# Plotting the data
plt.figure(figsize=(9, 3))
plt.subplot(121)
plt.plot(Hs.keys(),layer_means, 'ob-')
plt.title('layer mean')
plt.subplot(122)
plt.plot(Hs.keys(), layer_stds, 'or-')
plt.title('layer std')
plt.figure(figsize=(10, 2))
for i, H in Hs.items():
    plt.subplot(1, len(Hs), i+1)
    # ravel: multiple dimension data to one dimension data
    # plt.hist(one_dimension_data, num_bins)
    plt.hist(H.ravel(), 30, range=(-1, 1))    
    plt.ylim([0, 60000])    
    plt.xlim([-1, 1])
    if i != 0: 
        plt.yticks([])
        plt.xticks([])

input layer had mean -0.0009296053185340224 and std 1.0010669172302005


#########################################################
# relu and He weight setting initiall for each weight
#########################################################

Hs = {}
# setting the activation function as hyper tangent
nonlinearities = ['relu']*len(hidden)

for i in range(len(hidden)):
    x = D if i == 0 else Hs[i-1]
    f_in  = x.shape[1]
    f_out = hidden[i]
    w = np.random.randn(f_in, f_out) /np.sqrt((f_in+f_out)/4)
    h = np.dot(x,w)
    h = act[nonlinearities[i]](h)
    Hs[i] = h   
print(f"input layer had mean {np.mean(D)} and std {np.std(D)}")
layer_means = [np.mean(H) for i, H in Hs.items()]
layer_stds  = [np.std(H) for i, H in Hs.items()]

# Plotting the data
plt.figure(figsize=(9, 3))
plt.subplot(121)
plt.plot(Hs.keys(),layer_means, 'ob-')
plt.title('layer mean')
plt.subplot(122)
plt.plot(Hs.keys(), layer_stds, 'or-')
plt.title('layer std')
plt.figure(figsize=(10, 2))
for i, H in Hs.items():
    plt.subplot(1, len(Hs), i+1)
    # ravel: multiple dimension data to one dimension data
    # plt.hist(one_dimension_data, num_bins)
    plt.hist(H.ravel(), 30, range=(-1, 1))    
    plt.ylim([0, 60000])    
    plt.xlim([-1, 1])
    if i != 0: 
        plt.yticks([])
        plt.xticks([])

input layer had mean -0.0009296053185340224 and std 1.0010669172302005