InĀ [1]:
Copied!
import numpy as np
np.random.seed(42)
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from scipy.special import softmax
import numpy as np
np.random.seed(42)
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from scipy.special import softmax
InĀ [2]:
Copied!
import os, sys
#sys.path.append("..")
sys.path.insert(1, "/kaggle/input/multitask")
import os, sys
#sys.path.append("..")
sys.path.insert(1, "/kaggle/input/multitask")
InĀ [3]:
Copied!
n_input = 5
n_hidden = 4
n_tasks = 10
n_clusters = 2
n_features = 3
n_samples = 200
n_input = 5
n_hidden = 4
n_tasks = 10
n_clusters = 2
n_features = 3
n_samples = 200
Neural Network Model¶
InĀ [4]:
Copied!
from MultiTask_Algo import MultiTaskNN
from MultiTask_Algo import MultiTaskNN
InĀ [5]:
Copied!
def generate_synthetic_data(n_tasks=10, n_samples_train=50, n_samples_test=300,
n_input=5, n_hidden=4, activation='tanh'):
# True parameters
true_W = np.random.randn(n_hidden, n_input + 1)
true_Sigma = np.eye(n_hidden + 1) * 0.5
true_m = np.array([0.1, 1.5, -1.5, 2, 4])
# Generate data for each task
train_data = []
test_data = []
for i in range(n_tasks):
# Generate covariates
X_train = np.random.randn(n_samples_train, n_input)
X_test = np.random.randn(n_samples_test, n_input)
# Scale per task to zero mean and unit variance
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Compute hidden activations
X_train_bias = np.hstack([X_train, np.ones((n_samples_train, 1))])
X_test_bias = np.hstack([X_test, np.ones((n_samples_test, 1))])
if activation == 'tanh':
h_train = np.tanh(np.dot(X_train_bias, true_W.T))
h_test = np.tanh(np.dot(X_test_bias, true_W.T))
else:
h_train = np.dot(X_train_bias, true_W.T)
h_test = np.dot(X_test_bias, true_W.T)
# Add bias term
h_train = np.hstack([h_train, np.ones((n_samples_train, 1))])
h_test = np.hstack([h_test, np.ones((n_samples_test, 1))])
# Generate task-specific weights from true distribution
A = np.random.multivariate_normal(true_m, true_Sigma)
# Generate responses with noise
y_train = np.dot(h_train, A) + np.random.randn(n_samples_train) * 0.1
y_test = np.dot(h_test, A) + np.random.randn(n_samples_test) * 0.1
train_data.append((X_train, y_train))
test_data.append((X_test, y_test))
return train_data, test_data
def generate_synthetic_data(n_tasks=10, n_samples_train=50, n_samples_test=300,
n_input=5, n_hidden=4, activation='tanh'):
# True parameters
true_W = np.random.randn(n_hidden, n_input + 1)
true_Sigma = np.eye(n_hidden + 1) * 0.5
true_m = np.array([0.1, 1.5, -1.5, 2, 4])
# Generate data for each task
train_data = []
test_data = []
for i in range(n_tasks):
# Generate covariates
X_train = np.random.randn(n_samples_train, n_input)
X_test = np.random.randn(n_samples_test, n_input)
# Scale per task to zero mean and unit variance
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Compute hidden activations
X_train_bias = np.hstack([X_train, np.ones((n_samples_train, 1))])
X_test_bias = np.hstack([X_test, np.ones((n_samples_test, 1))])
if activation == 'tanh':
h_train = np.tanh(np.dot(X_train_bias, true_W.T))
h_test = np.tanh(np.dot(X_test_bias, true_W.T))
else:
h_train = np.dot(X_train_bias, true_W.T)
h_test = np.dot(X_test_bias, true_W.T)
# Add bias term
h_train = np.hstack([h_train, np.ones((n_samples_train, 1))])
h_test = np.hstack([h_test, np.ones((n_samples_test, 1))])
# Generate task-specific weights from true distribution
A = np.random.multivariate_normal(true_m, true_Sigma)
# Generate responses with noise
y_train = np.dot(h_train, A) + np.random.randn(n_samples_train) * 0.1
y_test = np.dot(h_test, A) + np.random.randn(n_samples_test) * 0.1
train_data.append((X_train, y_train))
test_data.append((X_test, y_test))
return train_data, test_data
InĀ [6]:
Copied!
def evaluate_model(model, test_data):
mse = 0
for i, (X_test, y_test) in enumerate(test_data):
y_pred = model.predict(X_test, i)
mse += np.mean((y_test - y_pred) ** 2)
mse /= len(test_data)
return {'test_mse': mse}
def evaluate_model(model, test_data):
mse = 0
for i, (X_test, y_test) in enumerate(test_data):
y_pred = model.predict(X_test, i)
mse += np.mean((y_test - y_pred) ** 2)
mse /= len(test_data)
return {'test_mse': mse}
InĀ [7]:
Copied!
def plot_training_results(model, X_list, y_list):
plt.figure(figsize=(18, 12))
# 1. ŠŃŠ°ŃŠøŠŗ ŠæŃŠµŠ“ŃŠŗŠ°Š·Š°Š½ŠøŠ¹ vs ŠøŃŃŠøŠ½Š½ŃŃ
Š·Š½Š°ŃŠµŠ½ŠøŠ¹ (ŠæŠµŃŠ²Ńе 3 Š·Š°Š“Š°ŃŠø)
plt.subplot(2, 3, 1)
global_min = min(np.min(y) for y in y_list)
global_max = max(np.max(y) for y in y_list)
for i in range(min(3, len(X_list))):
y_pred = model.predict(X_list[i], i)
plt.scatter(y_list[i], y_pred, alpha=0.6, label=f'Task {i+1}')
plt.plot([global_min, global_max], [global_min, global_max], 'k--', label='Ideal')
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.title('Predictions vs True Values (Train)')
plt.legend()
# 2. MSE по Š·Š°Š“Š°ŃŠ°Š¼
#plt.subplot(2, 3, 2)
#train_mses = []
#for i, (X, y) in enumerate(zip(X_list, y_list)):
# y_pred = model.predict(X, i)
# train_mses.append(mean_squared_error(y, y_pred))
#plt.bar(range(1, len(train_mses)+1), train_mses)
#plt.xlabel('Task')
#plt.ylabel('MSE')
#plt.title('Train MSE per Task')
# 3. ŠŠøŠ·ŃŠ°Š»ŠøŠ·Š°ŃŠøŃ Š²ŠµŃŠ¾Š² W
#plt.subplot(2, 3, 3)
#plt.imshow(model.W, aspect='auto', cmap='coolwarm')
#plt.colorbar()
#plt.title('Shared Weights W')
# 4. ŠŠøŠ·ŃŠ°Š»ŠøŠ·Š°ŃŠøŃ Š²ŠµŃŠ¾Š² A (ŠæŠµŃŠ²Ńе 10 заГаŃ)
#plt.subplot(2, 3, 4)
#A_matrix = np.array(model.A_map[:10]) # ŠŠ¾ŠŗŠ°Š¶ŠµŠ¼ ŃŠ¾Š»Ńко ŠæŠµŃŠ²Ńе 10 заГаŃ
#plt.imshow(A_matrix, aspect='auto', cmap='coolwarm')
#plt.colorbar()
#plt.title('Task-specific Weights A (first 10 tasks)')
plt.tight_layout()
plt.show()
def plot_training_results(model, X_list, y_list):
plt.figure(figsize=(18, 12))
# 1. ŠŃŠ°ŃŠøŠŗ ŠæŃŠµŠ“ŃŠŗŠ°Š·Š°Š½ŠøŠ¹ vs ŠøŃŃŠøŠ½Š½ŃŃ
Š·Š½Š°ŃŠµŠ½ŠøŠ¹ (ŠæŠµŃŠ²Ńе 3 Š·Š°Š“Š°ŃŠø)
plt.subplot(2, 3, 1)
global_min = min(np.min(y) for y in y_list)
global_max = max(np.max(y) for y in y_list)
for i in range(min(3, len(X_list))):
y_pred = model.predict(X_list[i], i)
plt.scatter(y_list[i], y_pred, alpha=0.6, label=f'Task {i+1}')
plt.plot([global_min, global_max], [global_min, global_max], 'k--', label='Ideal')
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.title('Predictions vs True Values (Train)')
plt.legend()
# 2. MSE по Š·Š°Š“Š°ŃŠ°Š¼
#plt.subplot(2, 3, 2)
#train_mses = []
#for i, (X, y) in enumerate(zip(X_list, y_list)):
# y_pred = model.predict(X, i)
# train_mses.append(mean_squared_error(y, y_pred))
#plt.bar(range(1, len(train_mses)+1), train_mses)
#plt.xlabel('Task')
#plt.ylabel('MSE')
#plt.title('Train MSE per Task')
# 3. ŠŠøŠ·ŃŠ°Š»ŠøŠ·Š°ŃŠøŃ Š²ŠµŃŠ¾Š² W
#plt.subplot(2, 3, 3)
#plt.imshow(model.W, aspect='auto', cmap='coolwarm')
#plt.colorbar()
#plt.title('Shared Weights W')
# 4. ŠŠøŠ·ŃŠ°Š»ŠøŠ·Š°ŃŠøŃ Š²ŠµŃŠ¾Š² A (ŠæŠµŃŠ²Ńе 10 заГаŃ)
#plt.subplot(2, 3, 4)
#A_matrix = np.array(model.A_map[:10]) # ŠŠ¾ŠŗŠ°Š¶ŠµŠ¼ ŃŠ¾Š»Ńко ŠæŠµŃŠ²Ńе 10 заГаŃ
#plt.imshow(A_matrix, aspect='auto', cmap='coolwarm')
#plt.colorbar()
#plt.title('Task-specific Weights A (first 10 tasks)')
plt.tight_layout()
plt.show()
InĀ [8]:
Copied!
def run_simulations(n_simulations=1, activation='tanh', with_plot=False,
n_tasks=10, n_input=5, n_hidden=4):
results = []
successful_simulations = 0
for sim in range(n_simulations):
print(f"\nSimulation {sim + 1}/{n_simulations}")
try:
# Generate data
train_data, test_data = generate_synthetic_data(
n_tasks=n_tasks, n_input=n_input,
n_hidden=n_hidden, activation=activation
)
# Initialize and fit model
model = MultiTaskNN(
n_input=n_input,
n_hidden=n_hidden,
n_tasks=n_tasks,
activation=activation
)
X_list = [X for X, y in train_data]
y_list = [y for X, y in train_data]
# Fit model with error handling
fit_result = model.fit(X_list, y_list, max_iter=100)
if fit_result is None:
print("Skipping simulation due to fitting error")
continue
# Evaluate
metrics = evaluate_model(model, test_data)
results.append(metrics)
successful_simulations += 1
print(f"Test MSE: {metrics['test_mse']:.4f}")
if with_plot:
plot_training_results(model, X_list, y_list)
except Exception as e:
print(f"Error in simulation {sim + 1}: {str(e)}")
continue
if successful_simulations == 0:
print("Warning: All simulations failed")
return None
# Aggregate results
avg_results = {
'avg_test_mse': np.mean([r['test_mse'] for r in results]),
'success_rate': successful_simulations / n_simulations
}
return avg_results
def run_simulations(n_simulations=1, activation='tanh', with_plot=False,
n_tasks=10, n_input=5, n_hidden=4):
results = []
successful_simulations = 0
for sim in range(n_simulations):
print(f"\nSimulation {sim + 1}/{n_simulations}")
try:
# Generate data
train_data, test_data = generate_synthetic_data(
n_tasks=n_tasks, n_input=n_input,
n_hidden=n_hidden, activation=activation
)
# Initialize and fit model
model = MultiTaskNN(
n_input=n_input,
n_hidden=n_hidden,
n_tasks=n_tasks,
activation=activation
)
X_list = [X for X, y in train_data]
y_list = [y for X, y in train_data]
# Fit model with error handling
fit_result = model.fit(X_list, y_list, max_iter=100)
if fit_result is None:
print("Skipping simulation due to fitting error")
continue
# Evaluate
metrics = evaluate_model(model, test_data)
results.append(metrics)
successful_simulations += 1
print(f"Test MSE: {metrics['test_mse']:.4f}")
if with_plot:
plot_training_results(model, X_list, y_list)
except Exception as e:
print(f"Error in simulation {sim + 1}: {str(e)}")
continue
if successful_simulations == 0:
print("Warning: All simulations failed")
return None
# Aggregate results
avg_results = {
'avg_test_mse': np.mean([r['test_mse'] for r in results]),
'success_rate': successful_simulations / n_simulations
}
return avg_results
InĀ [9]:
Copied!
tanh_results = run_simulations(n_simulations=1, activation='tanh', with_plot=True)
print(f"Tanh results: {tanh_results}")
tanh_results = run_simulations(n_simulations=1, activation='tanh', with_plot=True)
print(f"Tanh results: {tanh_results}")
Simulation 1/1 Test MSE: 9.5748
Tanh results: {'avg_test_mse': 9.574803376357206, 'success_rate': 1.0}
InĀ [10]:
Copied!
linear_results = run_simulations(n_simulations=1, activation='linear', with_plot=True)
print(f"linear results: {linear_results}")
linear_results = run_simulations(n_simulations=1, activation='linear', with_plot=True)
print(f"linear results: {linear_results}")
Simulation 1/1 Test MSE: 104.7930
linear results: {'avg_test_mse': 104.79300970318968, 'success_rate': 1.0}
Task-dependent Prior Mean¶
InĀ [11]:
Copied!
from MultiTask_Algo import MultiTaskNNDependentMean
from MultiTask_Algo import MultiTaskNNDependentMean
InĀ [12]:
Copied!
def generate_synthetic_data(n_tasks=10, n_samples_train=50, n_samples_test=300,
n_input=5, n_hidden=4, n_features=3, activation='tanh'):
# True parameters
true_W = np.random.randn(n_hidden, n_input + 1)
true_Sigma = np.eye(n_hidden + 1) * 0.5
true_M = np.random.randn(n_hidden + 1, n_features) * 0.5
# Generate random features for each task
task_features = np.random.randn(n_tasks, n_features)
# Generate data for each task
train_data = []
test_data = []
for i in range(n_tasks):
# Generate covariates
X_train = np.random.randn(n_samples_train, n_input)
X_test = np.random.randn(n_samples_test, n_input)
# Scale per task to zero mean and unit variance
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Compute hidden activations
X_train_bias = np.hstack([X_train, np.ones((n_samples_train, 1))])
X_test_bias = np.hstack([X_test, np.ones((n_samples_test, 1))])
if activation == 'tanh':
h_train = np.tanh(np.dot(X_train_bias, true_W.T))
h_test = np.tanh(np.dot(X_test_bias, true_W.T))
else:
h_train = np.dot(X_train_bias, true_W.T)
h_test = np.dot(X_test_bias, true_W.T)
# Add bias term
h_train = np.hstack([h_train, np.ones((n_samples_train, 1))])
h_test = np.hstack([h_test, np.ones((n_samples_test, 1))])
# Compute task-specific mean
m_i = np.dot(true_M, task_features[i])
# Generate task-specific weights from distribution with task-dependent mean
A = np.random.multivariate_normal(m_i, true_Sigma)
# Generate responses with noise
y_train = np.dot(h_train, A) + np.random.randn(n_samples_train) * 0.1
y_test = np.dot(h_test, A) + np.random.randn(n_samples_test) * 0.1
train_data.append((X_train, y_train))
test_data.append((X_test, y_test))
return train_data, test_data, task_features
def generate_synthetic_data(n_tasks=10, n_samples_train=50, n_samples_test=300,
n_input=5, n_hidden=4, n_features=3, activation='tanh'):
# True parameters
true_W = np.random.randn(n_hidden, n_input + 1)
true_Sigma = np.eye(n_hidden + 1) * 0.5
true_M = np.random.randn(n_hidden + 1, n_features) * 0.5
# Generate random features for each task
task_features = np.random.randn(n_tasks, n_features)
# Generate data for each task
train_data = []
test_data = []
for i in range(n_tasks):
# Generate covariates
X_train = np.random.randn(n_samples_train, n_input)
X_test = np.random.randn(n_samples_test, n_input)
# Scale per task to zero mean and unit variance
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Compute hidden activations
X_train_bias = np.hstack([X_train, np.ones((n_samples_train, 1))])
X_test_bias = np.hstack([X_test, np.ones((n_samples_test, 1))])
if activation == 'tanh':
h_train = np.tanh(np.dot(X_train_bias, true_W.T))
h_test = np.tanh(np.dot(X_test_bias, true_W.T))
else:
h_train = np.dot(X_train_bias, true_W.T)
h_test = np.dot(X_test_bias, true_W.T)
# Add bias term
h_train = np.hstack([h_train, np.ones((n_samples_train, 1))])
h_test = np.hstack([h_test, np.ones((n_samples_test, 1))])
# Compute task-specific mean
m_i = np.dot(true_M, task_features[i])
# Generate task-specific weights from distribution with task-dependent mean
A = np.random.multivariate_normal(m_i, true_Sigma)
# Generate responses with noise
y_train = np.dot(h_train, A) + np.random.randn(n_samples_train) * 0.1
y_test = np.dot(h_test, A) + np.random.randn(n_samples_test) * 0.1
train_data.append((X_train, y_train))
test_data.append((X_test, y_test))
return train_data, test_data, task_features
InĀ [13]:
Copied!
def evaluate_model(model, test_data, task_features):
mse = 0
for i, (X_test, y_test) in enumerate(test_data):
y_pred = model.predict(X_test, i)
mse += np.mean((y_test - y_pred) ** 2)
mse /= len(test_data)
return {
'test_mse': mse,
'task_features': task_features
}
def evaluate_model(model, test_data, task_features):
mse = 0
for i, (X_test, y_test) in enumerate(test_data):
y_pred = model.predict(X_test, i)
mse += np.mean((y_test - y_pred) ** 2)
mse /= len(test_data)
return {
'test_mse': mse,
'task_features': task_features
}
InĀ [14]:
Copied!
def plot_training_results(model, X_list, y_list, task_features):
plt.figure(figsize=(20, 12))
# 1. ŠŃŠ°ŃŠøŠŗ ŠæŃŠµŠ“ŃŠŗŠ°Š·Š°Š½ŠøŠ¹ vs ŠøŃŃŠøŠ½Š½ŃŃ
Š·Š½Š°ŃŠµŠ½ŠøŠ¹ (ŠæŠµŃŠ²Ńе 3 Š·Š°Š“Š°ŃŠø)
plt.subplot(2, 3, 1)
global_min = min(np.min(y) for y in y_list)
global_max = max(np.max(y) for y in y_list)
for i in range(min(3, len(X_list))):
y_pred = model.predict(X_list[i], i)
plt.scatter(y_list[i], y_pred, alpha=0.6, label=f'Task {i+1}')
plt.plot([global_min, global_max], [global_min, global_max], 'k--', label='Ideal')
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.title('Predictions vs True Values (Train)')
plt.legend()
# 2. MSE по Š·Š°Š“Š°ŃŠ°Š¼
#plt.subplot(2, 3, 2)
#train_mses = []
#for i, (X, y) in enumerate(zip(X_list, y_list)):
# y_pred = model.predict(X, i)
# train_mses.append(mean_squared_error(y, y_pred))
#plt.bar(range(1, len(train_mses)+1), train_mses)
#plt.xlabel('Task')
#plt.ylabel('MSE')
#plt.title('Train MSE per Task')
# 3. ŠŠøŠ·ŃŠ°Š»ŠøŠ·Š°ŃŠøŃ Š²ŠµŃŠ¾Š² W
#plt.subplot(2, 3, 3)
#plt.imshow(model.W, aspect='auto', cmap='coolwarm')
#plt.colorbar()
#plt.title('Shared Weights W')
# 4. ŠŠøŠ·ŃŠ°Š»ŠøŠ·Š°ŃŠøŃ Š²ŠµŃŠ¾Š² A (ŠæŠµŃŠ²Ńе 10 заГаŃ)
#plt.subplot(2, 3, 4)
#A_matrix = np.array(model.A_map[:10]) # ŠŠ¾ŠŗŠ°Š¶ŠµŠ¼ ŃŠ¾Š»Ńко ŠæŠµŃŠ²Ńе 10 заГаŃ
#plt.imshow(A_matrix, aspect='auto', cmap='coolwarm')
#plt.colorbar()
#plt.title('Task-specific Weights A (first 10 tasks)')
# 5. ŠŠøŠ·ŃŠ°Š»ŠøŠ·Š°ŃŠøŃ маŃŃŠøŃŃ M
#plt.subplot(2, 3, 5)
#plt.imshow(model.M, aspect='auto', cmap='coolwarm')
#plt.colorbar()
#plt.title('Feature Weight Matrix M')
# 6. ŠŠøŠ·ŃŠ°Š»ŠøŠ·Š°ŃŠøŃ ŃŠ²Ńзей Š¼ŠµŠ¶Š“Ń features Šø Š²ŠµŃŠ°Š¼Šø
#plt.subplot(2, 3, 6)
#for i in range(min(10, len(model.A_map))):
# # ŠŃовеŃŃŠµŠ¼ ŃŠ¾Š²ŠæŠ°Š“ение ŃŠ°Š·Š¼ŠµŃноŃŃŠµŠ¹
# if len(task_features[i]) == len(model.A_map[i]):
# plt.scatter(task_features[i], model.A_map[i], alpha=0.6, label=f'Task {i+1}')
# else:
# # ŠŃли ŃŠ°Š·Š¼ŠµŃноŃŃŠø не ŃŠ¾Š²ŠæŠ°Š“аŃŃ, ŃŃŃŠ¾ŠøŠ¼ Š“Š»Ń ŠæŠµŃŠ²ŃŃ
n_features Š²ŠµŃŠ¾Š²
# n = min(len(task_features[i]), len(model.A_map[i]))
# plt.scatter(task_features[i][:n], model.A_map[i][:n], alpha=0.6, label=f'Task {i+1}')
#plt.xlabel('Task Features')
#plt.ylabel('A weights')
#plt.title('Task Features vs A Weights')
#plt.legend()
plt.tight_layout()
plt.show()
def plot_training_results(model, X_list, y_list, task_features):
plt.figure(figsize=(20, 12))
# 1. ŠŃŠ°ŃŠøŠŗ ŠæŃŠµŠ“ŃŠŗŠ°Š·Š°Š½ŠøŠ¹ vs ŠøŃŃŠøŠ½Š½ŃŃ
Š·Š½Š°ŃŠµŠ½ŠøŠ¹ (ŠæŠµŃŠ²Ńе 3 Š·Š°Š“Š°ŃŠø)
plt.subplot(2, 3, 1)
global_min = min(np.min(y) for y in y_list)
global_max = max(np.max(y) for y in y_list)
for i in range(min(3, len(X_list))):
y_pred = model.predict(X_list[i], i)
plt.scatter(y_list[i], y_pred, alpha=0.6, label=f'Task {i+1}')
plt.plot([global_min, global_max], [global_min, global_max], 'k--', label='Ideal')
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.title('Predictions vs True Values (Train)')
plt.legend()
# 2. MSE по Š·Š°Š“Š°ŃŠ°Š¼
#plt.subplot(2, 3, 2)
#train_mses = []
#for i, (X, y) in enumerate(zip(X_list, y_list)):
# y_pred = model.predict(X, i)
# train_mses.append(mean_squared_error(y, y_pred))
#plt.bar(range(1, len(train_mses)+1), train_mses)
#plt.xlabel('Task')
#plt.ylabel('MSE')
#plt.title('Train MSE per Task')
# 3. ŠŠøŠ·ŃŠ°Š»ŠøŠ·Š°ŃŠøŃ Š²ŠµŃŠ¾Š² W
#plt.subplot(2, 3, 3)
#plt.imshow(model.W, aspect='auto', cmap='coolwarm')
#plt.colorbar()
#plt.title('Shared Weights W')
# 4. ŠŠøŠ·ŃŠ°Š»ŠøŠ·Š°ŃŠøŃ Š²ŠµŃŠ¾Š² A (ŠæŠµŃŠ²Ńе 10 заГаŃ)
#plt.subplot(2, 3, 4)
#A_matrix = np.array(model.A_map[:10]) # ŠŠ¾ŠŗŠ°Š¶ŠµŠ¼ ŃŠ¾Š»Ńко ŠæŠµŃŠ²Ńе 10 заГаŃ
#plt.imshow(A_matrix, aspect='auto', cmap='coolwarm')
#plt.colorbar()
#plt.title('Task-specific Weights A (first 10 tasks)')
# 5. ŠŠøŠ·ŃŠ°Š»ŠøŠ·Š°ŃŠøŃ маŃŃŠøŃŃ M
#plt.subplot(2, 3, 5)
#plt.imshow(model.M, aspect='auto', cmap='coolwarm')
#plt.colorbar()
#plt.title('Feature Weight Matrix M')
# 6. ŠŠøŠ·ŃŠ°Š»ŠøŠ·Š°ŃŠøŃ ŃŠ²Ńзей Š¼ŠµŠ¶Š“Ń features Šø Š²ŠµŃŠ°Š¼Šø
#plt.subplot(2, 3, 6)
#for i in range(min(10, len(model.A_map))):
# # ŠŃовеŃŃŠµŠ¼ ŃŠ¾Š²ŠæŠ°Š“ение ŃŠ°Š·Š¼ŠµŃноŃŃŠµŠ¹
# if len(task_features[i]) == len(model.A_map[i]):
# plt.scatter(task_features[i], model.A_map[i], alpha=0.6, label=f'Task {i+1}')
# else:
# # ŠŃли ŃŠ°Š·Š¼ŠµŃноŃŃŠø не ŃŠ¾Š²ŠæŠ°Š“аŃŃ, ŃŃŃŠ¾ŠøŠ¼ Š“Š»Ń ŠæŠµŃŠ²ŃŃ
n_features Š²ŠµŃŠ¾Š²
# n = min(len(task_features[i]), len(model.A_map[i]))
# plt.scatter(task_features[i][:n], model.A_map[i][:n], alpha=0.6, label=f'Task {i+1}')
#plt.xlabel('Task Features')
#plt.ylabel('A weights')
#plt.title('Task Features vs A Weights')
#plt.legend()
plt.tight_layout()
plt.show()
InĀ [15]:
Copied!
def run_simulations(n_simulations=1, activation='tanh', with_plot=False,
n_tasks=10, n_input=5, n_hidden=4, n_features=3):
results = []
successful_simulations = 0
for sim in range(n_simulations):
print(f"\nSimulation {sim+1}/{n_simulations}")
try:
# Generate data with task features
train_data, test_data, task_features = generate_synthetic_data(
n_tasks=n_tasks, n_input=n_input, n_hidden=n_hidden,
n_features=n_features, activation=activation
)
# Initialize and fit model
model = MultiTaskNNDependentMean(
n_input=n_input,
n_hidden=n_hidden,
n_tasks=n_tasks,
n_features=n_features,
activation=activation
)
X_list = [X for X, y in train_data]
y_list = [y for X, y in train_data]
# Fit model with error handling
fit_result = model.fit(X_list, y_list, task_features, max_iter=100)
if fit_result is None:
print("Skipping simulation due to fitting error")
continue
# Evaluate
metrics = evaluate_model(model, test_data, task_features)
results.append(metrics)
successful_simulations += 1
print(f"Test MSE: {metrics['test_mse']:.4f}")
if with_plot:
try:
plot_training_results(model, X_list, y_list, task_features)
except Exception as e:
print(f"Plotting error: {str(e)}")
continue
except Exception as e:
print(f"Error in simulation {sim+1}: {str(e)}")
continue
if successful_simulations == 0:
print("Warning: All simulations failed")
return None
# Aggregate results
avg_results = {
'avg_test_mse': np.mean([r['test_mse'] for r in results]),
'success_rate': successful_simulations / n_simulations
}
return avg_results
def run_simulations(n_simulations=1, activation='tanh', with_plot=False,
n_tasks=10, n_input=5, n_hidden=4, n_features=3):
results = []
successful_simulations = 0
for sim in range(n_simulations):
print(f"\nSimulation {sim+1}/{n_simulations}")
try:
# Generate data with task features
train_data, test_data, task_features = generate_synthetic_data(
n_tasks=n_tasks, n_input=n_input, n_hidden=n_hidden,
n_features=n_features, activation=activation
)
# Initialize and fit model
model = MultiTaskNNDependentMean(
n_input=n_input,
n_hidden=n_hidden,
n_tasks=n_tasks,
n_features=n_features,
activation=activation
)
X_list = [X for X, y in train_data]
y_list = [y for X, y in train_data]
# Fit model with error handling
fit_result = model.fit(X_list, y_list, task_features, max_iter=100)
if fit_result is None:
print("Skipping simulation due to fitting error")
continue
# Evaluate
metrics = evaluate_model(model, test_data, task_features)
results.append(metrics)
successful_simulations += 1
print(f"Test MSE: {metrics['test_mse']:.4f}")
if with_plot:
try:
plot_training_results(model, X_list, y_list, task_features)
except Exception as e:
print(f"Plotting error: {str(e)}")
continue
except Exception as e:
print(f"Error in simulation {sim+1}: {str(e)}")
continue
if successful_simulations == 0:
print("Warning: All simulations failed")
return None
# Aggregate results
avg_results = {
'avg_test_mse': np.mean([r['test_mse'] for r in results]),
'success_rate': successful_simulations / n_simulations
}
return avg_results
InĀ [16]:
Copied!
# ŠŠ°ŠæŃŃŠŗ ŃŠøŠ¼ŃŠ»ŃŃŠøŠø Ń Š¾Š±ŃŠ°Š±Š¾Ńкой возможнŃŃ
Š¾ŃŠøŠ±Š¾Šŗ
try:
dependent_mean_results = run_simulations(
n_simulations=1,
activation='tanh',
with_plot=True,
n_tasks=10,
n_input=5,
n_hidden=4,
n_features=3
)
print(f"Dependent Mean tanh results: {dependent_mean_results}")
except Exception as e:
print(f"Simulation failed with error: {str(e)}")
# ŠŠ°ŠæŃŃŠŗ ŃŠøŠ¼ŃŠ»ŃŃŠøŠø Ń Š¾Š±ŃŠ°Š±Š¾Ńкой возможнŃŃ
Š¾ŃŠøŠ±Š¾Šŗ
try:
dependent_mean_results = run_simulations(
n_simulations=1,
activation='tanh',
with_plot=True,
n_tasks=10,
n_input=5,
n_hidden=4,
n_features=3
)
print(f"Dependent Mean tanh results: {dependent_mean_results}")
except Exception as e:
print(f"Simulation failed with error: {str(e)}")
Simulation 1/1 Test MSE: 4.4064
Dependent Mean tanh results: {'avg_test_mse': 4.406377505021862, 'success_rate': 1.0}
InĀ [17]:
Copied!
# ŠŠ°ŠæŃŃŠŗ ŃŠøŠ¼ŃŠ»ŃŃŠøŠø Ń Š¾Š±ŃŠ°Š±Š¾Ńкой возможнŃŃ
Š¾ŃŠøŠ±Š¾Šŗ
try:
dependent_mean_results = run_simulations(
n_simulations=1,
activation='linear',
with_plot=True,
n_tasks=10,
n_input=5,
n_hidden=4,
n_features=3
)
print(f"Dependent Mean linear results: {dependent_mean_results}")
except Exception as e:
print(f"Simulation failed with error: {str(e)}")
# ŠŠ°ŠæŃŃŠŗ ŃŠøŠ¼ŃŠ»ŃŃŠøŠø Ń Š¾Š±ŃŠ°Š±Š¾Ńкой возможнŃŃ
Š¾ŃŠøŠ±Š¾Šŗ
try:
dependent_mean_results = run_simulations(
n_simulations=1,
activation='linear',
with_plot=True,
n_tasks=10,
n_input=5,
n_hidden=4,
n_features=3
)
print(f"Dependent Mean linear results: {dependent_mean_results}")
except Exception as e:
print(f"Simulation failed with error: {str(e)}")
Simulation 1/1
/kaggle/input/multitask/MultiTask_Algo.py:228: RuntimeWarning: overflow encountered in exp sigma = np.exp(log_sigma) /usr/local/lib/python3.11/dist-packages/scipy/optimize/_numdiff.py:596: RuntimeWarning: invalid value encountered in subtract df = fun(x1) - f0
Test MSE: 28.4530
Dependent Mean linear results: {'avg_test_mse': 28.45303892302573, 'success_rate': 1.0}
Clustering of Tasks¶
InĀ [18]:
Copied!
from MultiTask_Algo import MultiTaskNNClustering
from MultiTask_Algo import MultiTaskNNClustering
InĀ [19]:
Copied!
def generate_clustered_data(n_tasks=10, n_samples_train=50, n_samples_test=300,
n_input=5, n_hidden=4, n_clusters=2, activation='tanh'):
# True parameters with clear cluster separation
true_W = np.random.randn(n_hidden, n_input + 1) * 0.5
# Create distinct cluster centers
cluster_centers = np.random.randn(n_clusters, n_hidden + 1) * 2
for i in range(n_clusters):
cluster_centers[i] += i * 3 # Separate clusters
# Assign tasks to clusters
true_z = np.zeros((n_tasks, n_clusters))
for i in range(n_tasks):
true_z[i, i % n_clusters] = 1
# Generate data for each task
train_data = []
test_data = []
for i in range(n_tasks):
# Generate covariates
X_train = np.random.randn(n_samples_train, n_input)
X_test = np.random.randn(n_samples_test, n_input)
# Scale per task
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Compute hidden activations
X_train_bias = np.hstack([X_train, np.ones((n_samples_train, 1))])
X_test_bias = np.hstack([X_test, np.ones((n_samples_test, 1))])
if activation == 'tanh':
h_train = np.tanh(np.dot(X_train_bias, true_W.T))
h_test = np.tanh(np.dot(X_test_bias, true_W.T))
else:
h_train = np.dot(X_train_bias, true_W.T)
h_test = np.dot(X_test_bias, true_W.T)
# Add bias term
h_train = np.hstack([h_train, np.ones((n_samples_train, 1))])
h_test = np.hstack([h_test, np.ones((n_samples_test, 1))])
# Get cluster for this task
cluster_idx = np.argmax(true_z[i])
# Generate task weights from cluster distribution
A = np.random.multivariate_normal(
cluster_centers[cluster_idx],
np.eye(n_hidden + 1) * 0.1
)
# Generate responses
y_train = np.dot(h_train, A) + np.random.randn(n_samples_train) * 0.1
y_test = np.dot(h_test, A) + np.random.randn(n_samples_test) * 0.1
train_data.append((X_train, y_train))
test_data.append((X_test, y_test))
return train_data, test_data, true_z
def generate_clustered_data(n_tasks=10, n_samples_train=50, n_samples_test=300,
n_input=5, n_hidden=4, n_clusters=2, activation='tanh'):
# True parameters with clear cluster separation
true_W = np.random.randn(n_hidden, n_input + 1) * 0.5
# Create distinct cluster centers
cluster_centers = np.random.randn(n_clusters, n_hidden + 1) * 2
for i in range(n_clusters):
cluster_centers[i] += i * 3 # Separate clusters
# Assign tasks to clusters
true_z = np.zeros((n_tasks, n_clusters))
for i in range(n_tasks):
true_z[i, i % n_clusters] = 1
# Generate data for each task
train_data = []
test_data = []
for i in range(n_tasks):
# Generate covariates
X_train = np.random.randn(n_samples_train, n_input)
X_test = np.random.randn(n_samples_test, n_input)
# Scale per task
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Compute hidden activations
X_train_bias = np.hstack([X_train, np.ones((n_samples_train, 1))])
X_test_bias = np.hstack([X_test, np.ones((n_samples_test, 1))])
if activation == 'tanh':
h_train = np.tanh(np.dot(X_train_bias, true_W.T))
h_test = np.tanh(np.dot(X_test_bias, true_W.T))
else:
h_train = np.dot(X_train_bias, true_W.T)
h_test = np.dot(X_test_bias, true_W.T)
# Add bias term
h_train = np.hstack([h_train, np.ones((n_samples_train, 1))])
h_test = np.hstack([h_test, np.ones((n_samples_test, 1))])
# Get cluster for this task
cluster_idx = np.argmax(true_z[i])
# Generate task weights from cluster distribution
A = np.random.multivariate_normal(
cluster_centers[cluster_idx],
np.eye(n_hidden + 1) * 0.1
)
# Generate responses
y_train = np.dot(h_train, A) + np.random.randn(n_samples_train) * 0.1
y_test = np.dot(h_test, A) + np.random.randn(n_samples_test) * 0.1
train_data.append((X_train, y_train))
test_data.append((X_test, y_test))
return train_data, test_data, true_z
InĀ [20]:
Copied!
def evaluate_clustering_model(model, test_data):
mse = 0
for i, (X_test, y_test) in enumerate(test_data):
y_pred = model.predict(X_test, i)
mse += np.mean((y_test - y_pred) ** 2)
mse /= len(test_data)
return {
'test_mse': mse,
'cluster_assignments': model.get_cluster_assignments(),
'task_similarity': model.get_task_similarity()
}
def evaluate_clustering_model(model, test_data):
mse = 0
for i, (X_test, y_test) in enumerate(test_data):
y_pred = model.predict(X_test, i)
mse += np.mean((y_test - y_pred) ** 2)
mse /= len(test_data)
return {
'test_mse': mse,
'cluster_assignments': model.get_cluster_assignments(),
'task_similarity': model.get_task_similarity()
}
InĀ [21]:
Copied!
def plot_clustering_results(model, X_list, y_list):
plt.figure(figsize=(20, 15))
# 1. Predictions vs True (first 3 tasks)
plt.subplot(3, 3, 1)
global_min = min(np.min(y) for y in y_list)
global_max = max(np.max(y) for y in y_list)
for i in range(min(3, len(X_list))):
y_pred = model.predict(X_list[i], i)
plt.scatter(y_list[i], y_pred, alpha=0.6, label=f'Task {i + 1}')
plt.plot([global_min, global_max], [global_min, global_max], 'k--')
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.title('Predictions vs True Values')
plt.legend()
# 2. Cluster assignments
#plt.subplot(3, 3, 2)
#assignments = model.get_cluster_assignments()
#plt.hist(assignments, bins=model.n_clusters, rwidth=0.8)
#plt.xlabel('Cluster')
#plt.ylabel('Number of Tasks')
#plt.title('Task Cluster Assignments')
# 3. Task similarity matrix
#plt.subplot(3, 3, 3)
#similarity = model.get_task_similarity()
#plt.imshow(similarity, cmap='Blues', interpolation='nearest')
#plt.colorbar()
#plt.title('Task Similarity Matrix')
# 4. Cluster centers visualization
#plt.subplot(3, 3, 4)
#for cluster in range(model.n_clusters):
# plt.plot(model.m[cluster], label=f'Cluster {cluster + 1}')
#plt.xlabel('Weight Index')
#plt.ylabel('Value')
#plt.title('Cluster Centers (m)')
#plt.legend()
# 5. Cluster probabilities (q)
#plt.subplot(3, 3, 5)
#plt.bar(range(model.n_clusters), model.q)
#plt.xlabel('Cluster')
#plt.ylabel('Probability')
#plt.title('Cluster Probabilities (q)')
# 6. First two dimensions of A weights colored by cluster
#plt.subplot(3, 3, 6)
#colors = ['r', 'g', 'b', 'c', 'm', 'y', 'k'][:model.n_clusters]
#for i, A in enumerate(model.A_map):
# cluster = assignments[i]
# plt.scatter(A[0], A[1], color=colors[cluster], alpha=0.6)
#plt.xlabel('A[0]')
#plt.ylabel('A[1]')
#plt.title('A Weights Colored by Cluster')
# 7. Shared weights W
#plt.subplot(3, 3, 7)
#plt.imshow(model.W, aspect='auto', cmap='coolwarm')
#plt.colorbar()
#plt.title('Shared Weights W')
# 8. Cluster responsibilities (z)
#plt.subplot(3, 3, 8)
#plt.imshow(model.z.T, aspect='auto', cmap='viridis')
#plt.colorbar()
#plt.xlabel('Task')
#plt.ylabel('Cluster')
#plt.title('Cluster Responsibilities')
plt.tight_layout()
plt.show()
def plot_clustering_results(model, X_list, y_list):
plt.figure(figsize=(20, 15))
# 1. Predictions vs True (first 3 tasks)
plt.subplot(3, 3, 1)
global_min = min(np.min(y) for y in y_list)
global_max = max(np.max(y) for y in y_list)
for i in range(min(3, len(X_list))):
y_pred = model.predict(X_list[i], i)
plt.scatter(y_list[i], y_pred, alpha=0.6, label=f'Task {i + 1}')
plt.plot([global_min, global_max], [global_min, global_max], 'k--')
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.title('Predictions vs True Values')
plt.legend()
# 2. Cluster assignments
#plt.subplot(3, 3, 2)
#assignments = model.get_cluster_assignments()
#plt.hist(assignments, bins=model.n_clusters, rwidth=0.8)
#plt.xlabel('Cluster')
#plt.ylabel('Number of Tasks')
#plt.title('Task Cluster Assignments')
# 3. Task similarity matrix
#plt.subplot(3, 3, 3)
#similarity = model.get_task_similarity()
#plt.imshow(similarity, cmap='Blues', interpolation='nearest')
#plt.colorbar()
#plt.title('Task Similarity Matrix')
# 4. Cluster centers visualization
#plt.subplot(3, 3, 4)
#for cluster in range(model.n_clusters):
# plt.plot(model.m[cluster], label=f'Cluster {cluster + 1}')
#plt.xlabel('Weight Index')
#plt.ylabel('Value')
#plt.title('Cluster Centers (m)')
#plt.legend()
# 5. Cluster probabilities (q)
#plt.subplot(3, 3, 5)
#plt.bar(range(model.n_clusters), model.q)
#plt.xlabel('Cluster')
#plt.ylabel('Probability')
#plt.title('Cluster Probabilities (q)')
# 6. First two dimensions of A weights colored by cluster
#plt.subplot(3, 3, 6)
#colors = ['r', 'g', 'b', 'c', 'm', 'y', 'k'][:model.n_clusters]
#for i, A in enumerate(model.A_map):
# cluster = assignments[i]
# plt.scatter(A[0], A[1], color=colors[cluster], alpha=0.6)
#plt.xlabel('A[0]')
#plt.ylabel('A[1]')
#plt.title('A Weights Colored by Cluster')
# 7. Shared weights W
#plt.subplot(3, 3, 7)
#plt.imshow(model.W, aspect='auto', cmap='coolwarm')
#plt.colorbar()
#plt.title('Shared Weights W')
# 8. Cluster responsibilities (z)
#plt.subplot(3, 3, 8)
#plt.imshow(model.z.T, aspect='auto', cmap='viridis')
#plt.colorbar()
#plt.xlabel('Task')
#plt.ylabel('Cluster')
#plt.title('Cluster Responsibilities')
plt.tight_layout()
plt.show()
InĀ [22]:
Copied!
def run_clustering_simulation(n_simulations=1, activation='tanh', with_plot=False,
n_tasks=10, n_input=5, n_hidden=4, n_clusters=2):
results = []
for sim in range(n_simulations):
print(f"\nSimulation {sim + 1}/{n_simulations}")
try:
# Generate clustered data
train_data, test_data, true_z = generate_clustered_data(
n_tasks=n_tasks, n_input=n_input, n_hidden=n_hidden,
n_clusters=n_clusters, activation=activation
)
# Initialize model
model = MultiTaskNNClustering(
n_input=n_input,
n_hidden=n_hidden,
n_tasks=n_tasks,
n_clusters=n_clusters,
activation=activation
)
X_list = [X for X, y in train_data]
y_list = [y for X, y in train_data]
# Fit model
model.fit(train_data, max_iter=100)
# Evaluate
metrics = evaluate_clustering_model(model, test_data)
results.append(metrics)
print(f"Test MSE: {metrics['test_mse']:.4f}")
#print(f"Cluster assignments: {metrics['cluster_assignments']}")
if with_plot:
plot_clustering_results(model, X_list, y_list)
except Exception as e:
print(f"Error in simulation {sim + 1}: {str(e)}")
continue
if not results:
print("Warning: All simulations failed")
return None
return {
'avg_test_mse': np.mean([r['test_mse'] for r in results]),
'cluster_consistency': np.mean([
np.mean(r['cluster_assignments'] == np.array([i % n_clusters for i in range(n_tasks)]))
for r in results
])
}
def run_clustering_simulation(n_simulations=1, activation='tanh', with_plot=False,
n_tasks=10, n_input=5, n_hidden=4, n_clusters=2):
results = []
for sim in range(n_simulations):
print(f"\nSimulation {sim + 1}/{n_simulations}")
try:
# Generate clustered data
train_data, test_data, true_z = generate_clustered_data(
n_tasks=n_tasks, n_input=n_input, n_hidden=n_hidden,
n_clusters=n_clusters, activation=activation
)
# Initialize model
model = MultiTaskNNClustering(
n_input=n_input,
n_hidden=n_hidden,
n_tasks=n_tasks,
n_clusters=n_clusters,
activation=activation
)
X_list = [X for X, y in train_data]
y_list = [y for X, y in train_data]
# Fit model
model.fit(train_data, max_iter=100)
# Evaluate
metrics = evaluate_clustering_model(model, test_data)
results.append(metrics)
print(f"Test MSE: {metrics['test_mse']:.4f}")
#print(f"Cluster assignments: {metrics['cluster_assignments']}")
if with_plot:
plot_clustering_results(model, X_list, y_list)
except Exception as e:
print(f"Error in simulation {sim + 1}: {str(e)}")
continue
if not results:
print("Warning: All simulations failed")
return None
return {
'avg_test_mse': np.mean([r['test_mse'] for r in results]),
'cluster_consistency': np.mean([
np.mean(r['cluster_assignments'] == np.array([i % n_clusters for i in range(n_tasks)]))
for r in results
])
}
InĀ [23]:
Copied!
# Run simulation with visualization
clustering_results = run_clustering_simulation(
n_simulations=1,
activation='tanh',
with_plot=True,
n_tasks=10,
n_input=5,
n_hidden=4,
n_clusters=2
)
# Run simulation with visualization
clustering_results = run_clustering_simulation(
n_simulations=1,
activation='tanh',
with_plot=True,
n_tasks=10,
n_input=5,
n_hidden=4,
n_clusters=2
)
Simulation 1/1
100%|āāāāāāāāāā| 100/100 [02:29<00:00, 1.49s/it]
Test MSE: 0.0490
InĀ [24]:
Copied!
print("\nFinal Results tanh:")
print(f"Average Test MSE: {clustering_results['avg_test_mse']:.4f}")
#print(f"Cluster Consistency: {clustering_results['cluster_consistency']:.2%}")
print("\nFinal Results tanh:")
print(f"Average Test MSE: {clustering_results['avg_test_mse']:.4f}")
#print(f"Cluster Consistency: {clustering_results['cluster_consistency']:.2%}")
Final Results tanh: Average Test MSE: 0.0490
InĀ [25]:
Copied!
# Run simulation with visualization
clustering_results = run_clustering_simulation(
n_simulations=1,
activation='linear',
with_plot=True,
n_tasks=10,
n_input=5,
n_hidden=4,
n_clusters=2
)
# Run simulation with visualization
clustering_results = run_clustering_simulation(
n_simulations=1,
activation='linear',
with_plot=True,
n_tasks=10,
n_input=5,
n_hidden=4,
n_clusters=2
)
Simulation 1/1
95%|āāāāāāāāāā| 95/100 [01:12<00:03, 1.30it/s]
Converged at iteration 95 Test MSE: 0.0114
InĀ [26]:
Copied!
print("\nFinal Results linear:")
print(f"Average Test MSE: {clustering_results['avg_test_mse']:.4f}")
#print(f"Cluster Consistency: {clustering_results['cluster_consistency']:.2%}")
print("\nFinal Results linear:")
print(f"Average Test MSE: {clustering_results['avg_test_mse']:.4f}")
#print(f"Cluster Consistency: {clustering_results['cluster_consistency']:.2%}")
Final Results linear: Average Test MSE: 0.0114
Gating of Tasks¶
InĀ [27]:
Copied!
from MultiTask_Algo import MultiTaskNNGating
from MultiTask_Algo import MultiTaskNNGating
InĀ [28]:
Copied!
def generate_gating_data(n_tasks=10, n_samples_train=50, n_samples_test=300,
n_input=5, n_hidden=4, n_clusters=2, n_features=3, activation='tanh'):
# True parameters with clear cluster separation
true_W = np.random.randn(n_hidden, n_input + 1) * 0.5
# Create distinct cluster centers
cluster_centers = np.random.randn(n_clusters, n_hidden + 1) * 2
for i in range(n_clusters):
cluster_centers[i] += i * 3 # Separate clusters
# Generate task features that correlate with cluster assignments
true_U = np.random.randn(n_clusters, n_features) * 1.5
task_features = np.random.randn(n_tasks, n_features)
# Assign tasks to clusters based on features
logits = np.dot(task_features, true_U.T)
true_z = softmax(logits, axis=1)
# Generate data for each task
train_data = []
test_data = []
for i in range(n_tasks):
# Generate covariates
X_train = np.random.randn(n_samples_train, n_input)
X_test = np.random.randn(n_samples_test, n_input)
# Scale per task
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Compute hidden activations
X_train_bias = np.hstack([X_train, np.ones((n_samples_train, 1))])
X_test_bias = np.hstack([X_test, np.ones((n_samples_test, 1))])
if activation == 'tanh':
h_train = np.tanh(np.dot(X_train_bias, true_W.T))
h_test = np.tanh(np.dot(X_test_bias, true_W.T))
else:
h_train = np.dot(X_train_bias, true_W.T)
h_test = np.dot(X_test_bias, true_W.T)
# Add bias term
h_train = np.hstack([h_train, np.ones((n_samples_train, 1))])
h_test = np.hstack([h_test, np.ones((n_samples_test, 1))])
# Get cluster for this task
cluster_idx = np.argmax(true_z[i])
# Generate task weights from cluster distribution
A = np.random.multivariate_normal(
cluster_centers[cluster_idx],
np.eye(n_hidden + 1) * 0.1
)
# Generate responses
y_train = np.dot(h_train, A) + np.random.randn(n_samples_train) * 0.1
y_test = np.dot(h_test, A) + np.random.randn(n_samples_test) * 0.1
train_data.append((X_train, y_train))
test_data.append((X_test, y_test))
return train_data, test_data, task_features, true_z
def generate_gating_data(n_tasks=10, n_samples_train=50, n_samples_test=300,
n_input=5, n_hidden=4, n_clusters=2, n_features=3, activation='tanh'):
# True parameters with clear cluster separation
true_W = np.random.randn(n_hidden, n_input + 1) * 0.5
# Create distinct cluster centers
cluster_centers = np.random.randn(n_clusters, n_hidden + 1) * 2
for i in range(n_clusters):
cluster_centers[i] += i * 3 # Separate clusters
# Generate task features that correlate with cluster assignments
true_U = np.random.randn(n_clusters, n_features) * 1.5
task_features = np.random.randn(n_tasks, n_features)
# Assign tasks to clusters based on features
logits = np.dot(task_features, true_U.T)
true_z = softmax(logits, axis=1)
# Generate data for each task
train_data = []
test_data = []
for i in range(n_tasks):
# Generate covariates
X_train = np.random.randn(n_samples_train, n_input)
X_test = np.random.randn(n_samples_test, n_input)
# Scale per task
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Compute hidden activations
X_train_bias = np.hstack([X_train, np.ones((n_samples_train, 1))])
X_test_bias = np.hstack([X_test, np.ones((n_samples_test, 1))])
if activation == 'tanh':
h_train = np.tanh(np.dot(X_train_bias, true_W.T))
h_test = np.tanh(np.dot(X_test_bias, true_W.T))
else:
h_train = np.dot(X_train_bias, true_W.T)
h_test = np.dot(X_test_bias, true_W.T)
# Add bias term
h_train = np.hstack([h_train, np.ones((n_samples_train, 1))])
h_test = np.hstack([h_test, np.ones((n_samples_test, 1))])
# Get cluster for this task
cluster_idx = np.argmax(true_z[i])
# Generate task weights from cluster distribution
A = np.random.multivariate_normal(
cluster_centers[cluster_idx],
np.eye(n_hidden + 1) * 0.1
)
# Generate responses
y_train = np.dot(h_train, A) + np.random.randn(n_samples_train) * 0.1
y_test = np.dot(h_test, A) + np.random.randn(n_samples_test) * 0.1
train_data.append((X_train, y_train))
test_data.append((X_test, y_test))
return train_data, test_data, task_features, true_z
InĀ [29]:
Copied!
def evaluate_gating_model(model, test_data, true_z):
mse = 0
for i, (X_test, y_test) in enumerate(test_data):
y_pred = model.predict(X_test, i)
mse += np.mean((y_test - y_pred) ** 2)
mse /= len(test_data)
# Compute clustering accuracy
true_assignments = np.argmax(true_z, axis=1)
pred_assignments = model.get_cluster_assignments()
clustering_acc = np.mean(true_assignments == pred_assignments)
return {
'test_mse': mse,
'clustering_accuracy': clustering_acc,
'cluster_assignments': pred_assignments,
'task_similarity': model.get_task_similarity(),
'true_z': true_z,
'pred_z': model.z
}
def evaluate_gating_model(model, test_data, true_z):
mse = 0
for i, (X_test, y_test) in enumerate(test_data):
y_pred = model.predict(X_test, i)
mse += np.mean((y_test - y_pred) ** 2)
mse /= len(test_data)
# Compute clustering accuracy
true_assignments = np.argmax(true_z, axis=1)
pred_assignments = model.get_cluster_assignments()
clustering_acc = np.mean(true_assignments == pred_assignments)
return {
'test_mse': mse,
'clustering_accuracy': clustering_acc,
'cluster_assignments': pred_assignments,
'task_similarity': model.get_task_similarity(),
'true_z': true_z,
'pred_z': model.z
}
InĀ [30]:
Copied!
def plot_gating_results(model, X_list, y_list, task_features, true_z):
plt.figure(figsize=(24, 16))
# 1. Predictions vs True (first 3 tasks)
plt.subplot(3, 4, 1)
global_min = min(np.min(y) for y in y_list)
global_max = max(np.max(y) for y in y_list)
for i in range(min(3, len(X_list))):
y_pred = model.predict(X_list[i], i)
plt.scatter(y_list[i], y_pred, alpha=0.6, label=f'Task {i + 1}')
plt.plot([global_min, global_max], [global_min, global_max], 'k--')
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.title('Predictions vs True Values')
plt.legend()
# 2. True vs Predicted cluster assignments
# plt.subplot(3, 4, 2)
# true_assign = np.argmax(true_z, axis=1)
# pred_assign = model.get_cluster_assignments()
# plt.scatter(true_assign, pred_assign, alpha=0.6)
# plt.xlabel('True Cluster')
# plt.ylabel('Predicted Cluster')
# plt.title('Cluster Assignment Accuracy')
# 3. Gating network weights (U)
# plt.subplot(3, 4, 3)
# plt.imshow(model.U, aspect='auto', cmap='coolwarm')
# plt.colorbar()
# plt.title('Gating Network Weights (U)')
# 4. Cluster centers visualization
# plt.subplot(3, 4, 4)
# for cluster in range(model.n_clusters):
# plt.plot(model.m[cluster], label=f'Cluster {cluster+1}')
# plt.xlabel('Weight Index')
# plt.ylabel('Value')
# plt.title('Cluster Centers (m)')
# plt.legend()
# 5. True vs Predicted responsibilities
# plt.subplot(3, 4, 5)
# plt.scatter(true_z[:,0], model.z[:,0], alpha=0.6)
# plt.xlabel('True Responsibility Cluster 0')
# plt.ylabel('Predicted Responsibility Cluster 0')
# plt.title('Cluster Responsibilities Comparison')
# 6. Task features colored by true cluster
# plt.subplot(3, 4, 6)
# true_assign = np.argmax(true_z, axis=1)
# for i in range(model.n_clusters):
# mask = true_assign == i
# plt.scatter(task_features[mask,0], task_features[mask,1],
# label=f'True Cluster {i}', alpha=0.6)
# plt.xlabel('Feature 1')
# plt.ylabel('Feature 2')
# plt.title('Task Features (True Clusters)')
# plt.legend()
# 7. Task features colored by predicted cluster
# plt.subplot(3, 4, 7)
# pred_assign = model.get_cluster_assignments()
# for i in range(model.n_clusters):
# mask = pred_assign == i
# plt.scatter(task_features[mask,0], task_features[mask,1],
# label=f'Pred Cluster {i}', alpha=0.6)
# plt.xlabel('Feature 1')
# plt.ylabel('Feature 2')
# plt.title('Task Features (Predicted Clusters)')
# plt.legend()
# 8. Shared weights W
# plt.subplot(3, 4, 8)
# plt.imshow(model.W, aspect='auto', cmap='coolwarm')
# plt.colorbar()
# plt.title('Shared Weights W')
# 9. Cluster responsibilities (z)
# plt.subplot(3, 4, 9)
# plt.imshow(model.z.T, aspect='auto', cmap='viridis')
# plt.colorbar()
# plt.xlabel('Task')
# plt.ylabel('Cluster')
# plt.title('Cluster Responsibilities (z)')
# 10. True responsibilities
# plt.subplot(3, 4, 10)
# plt.imshow(true_z.T, aspect='auto', cmap='viridis')
# plt.colorbar()
# plt.xlabel('Task')
# plt.ylabel('Cluster')
# plt.title('True Responsibilities')
plt.tight_layout()
plt.show()
def plot_gating_results(model, X_list, y_list, task_features, true_z):
plt.figure(figsize=(24, 16))
# 1. Predictions vs True (first 3 tasks)
plt.subplot(3, 4, 1)
global_min = min(np.min(y) for y in y_list)
global_max = max(np.max(y) for y in y_list)
for i in range(min(3, len(X_list))):
y_pred = model.predict(X_list[i], i)
plt.scatter(y_list[i], y_pred, alpha=0.6, label=f'Task {i + 1}')
plt.plot([global_min, global_max], [global_min, global_max], 'k--')
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.title('Predictions vs True Values')
plt.legend()
# 2. True vs Predicted cluster assignments
# plt.subplot(3, 4, 2)
# true_assign = np.argmax(true_z, axis=1)
# pred_assign = model.get_cluster_assignments()
# plt.scatter(true_assign, pred_assign, alpha=0.6)
# plt.xlabel('True Cluster')
# plt.ylabel('Predicted Cluster')
# plt.title('Cluster Assignment Accuracy')
# 3. Gating network weights (U)
# plt.subplot(3, 4, 3)
# plt.imshow(model.U, aspect='auto', cmap='coolwarm')
# plt.colorbar()
# plt.title('Gating Network Weights (U)')
# 4. Cluster centers visualization
# plt.subplot(3, 4, 4)
# for cluster in range(model.n_clusters):
# plt.plot(model.m[cluster], label=f'Cluster {cluster+1}')
# plt.xlabel('Weight Index')
# plt.ylabel('Value')
# plt.title('Cluster Centers (m)')
# plt.legend()
# 5. True vs Predicted responsibilities
# plt.subplot(3, 4, 5)
# plt.scatter(true_z[:,0], model.z[:,0], alpha=0.6)
# plt.xlabel('True Responsibility Cluster 0')
# plt.ylabel('Predicted Responsibility Cluster 0')
# plt.title('Cluster Responsibilities Comparison')
# 6. Task features colored by true cluster
# plt.subplot(3, 4, 6)
# true_assign = np.argmax(true_z, axis=1)
# for i in range(model.n_clusters):
# mask = true_assign == i
# plt.scatter(task_features[mask,0], task_features[mask,1],
# label=f'True Cluster {i}', alpha=0.6)
# plt.xlabel('Feature 1')
# plt.ylabel('Feature 2')
# plt.title('Task Features (True Clusters)')
# plt.legend()
# 7. Task features colored by predicted cluster
# plt.subplot(3, 4, 7)
# pred_assign = model.get_cluster_assignments()
# for i in range(model.n_clusters):
# mask = pred_assign == i
# plt.scatter(task_features[mask,0], task_features[mask,1],
# label=f'Pred Cluster {i}', alpha=0.6)
# plt.xlabel('Feature 1')
# plt.ylabel('Feature 2')
# plt.title('Task Features (Predicted Clusters)')
# plt.legend()
# 8. Shared weights W
# plt.subplot(3, 4, 8)
# plt.imshow(model.W, aspect='auto', cmap='coolwarm')
# plt.colorbar()
# plt.title('Shared Weights W')
# 9. Cluster responsibilities (z)
# plt.subplot(3, 4, 9)
# plt.imshow(model.z.T, aspect='auto', cmap='viridis')
# plt.colorbar()
# plt.xlabel('Task')
# plt.ylabel('Cluster')
# plt.title('Cluster Responsibilities (z)')
# 10. True responsibilities
# plt.subplot(3, 4, 10)
# plt.imshow(true_z.T, aspect='auto', cmap='viridis')
# plt.colorbar()
# plt.xlabel('Task')
# plt.ylabel('Cluster')
# plt.title('True Responsibilities')
plt.tight_layout()
plt.show()
InĀ [31]:
Copied!
def run_gating_simulation(n_simulations=1, activation='tanh', with_plot=False,
n_tasks=10, n_input=5, n_hidden=4,
n_clusters=2, n_features=3):
results = []
for sim in range(n_simulations):
print(f"\nSimulation {sim + 1}/{n_simulations}")
try:
# Generate data with task features
train_data, test_data, task_features, true_z = generate_gating_data(
n_tasks=n_tasks, n_input=n_input, n_hidden=n_hidden,
n_clusters=n_clusters, n_features=n_features, activation=activation
)
# Initialize model
model = MultiTaskNNGating(
n_input=n_input,
n_hidden=n_hidden,
n_tasks=n_tasks,
n_clusters=n_clusters,
n_features=n_features,
activation=activation
)
X_list = [X for X, y in train_data]
y_list = [y for X, y in train_data]
# Fit model
model.fit(train_data, task_features, max_iter=100)
# Evaluate
metrics = evaluate_gating_model(model, test_data, true_z)
results.append(metrics)
print(f"Test MSE: {metrics['test_mse']:.4f}")
# print(f"Clustering Accuracy: {metrics['clustering_accuracy']:.2%}")
if with_plot:
plot_gating_results(model, X_list, y_list, task_features, true_z)
except Exception as e:
print(f"Error in simulation {sim + 1}: {str(e)}")
continue
if not results:
print("Warning: All simulations failed")
return None
return {
'avg_test_mse': np.mean([r['test_mse'] for r in results]),
'avg_clustering_acc': np.mean([r['clustering_accuracy'] for r in results])
}
def run_gating_simulation(n_simulations=1, activation='tanh', with_plot=False,
n_tasks=10, n_input=5, n_hidden=4,
n_clusters=2, n_features=3):
results = []
for sim in range(n_simulations):
print(f"\nSimulation {sim + 1}/{n_simulations}")
try:
# Generate data with task features
train_data, test_data, task_features, true_z = generate_gating_data(
n_tasks=n_tasks, n_input=n_input, n_hidden=n_hidden,
n_clusters=n_clusters, n_features=n_features, activation=activation
)
# Initialize model
model = MultiTaskNNGating(
n_input=n_input,
n_hidden=n_hidden,
n_tasks=n_tasks,
n_clusters=n_clusters,
n_features=n_features,
activation=activation
)
X_list = [X for X, y in train_data]
y_list = [y for X, y in train_data]
# Fit model
model.fit(train_data, task_features, max_iter=100)
# Evaluate
metrics = evaluate_gating_model(model, test_data, true_z)
results.append(metrics)
print(f"Test MSE: {metrics['test_mse']:.4f}")
# print(f"Clustering Accuracy: {metrics['clustering_accuracy']:.2%}")
if with_plot:
plot_gating_results(model, X_list, y_list, task_features, true_z)
except Exception as e:
print(f"Error in simulation {sim + 1}: {str(e)}")
continue
if not results:
print("Warning: All simulations failed")
return None
return {
'avg_test_mse': np.mean([r['test_mse'] for r in results]),
'avg_clustering_acc': np.mean([r['clustering_accuracy'] for r in results])
}
InĀ [32]:
Copied!
# Run simulation with visualization
gating_results = run_gating_simulation(
n_simulations=1,
activation='tanh',
with_plot=True,
n_tasks=10,
n_input=5,
n_hidden=4,
n_clusters=2,
n_features=3
)
# Run simulation with visualization
gating_results = run_gating_simulation(
n_simulations=1,
activation='tanh',
with_plot=True,
n_tasks=10,
n_input=5,
n_hidden=4,
n_clusters=2,
n_features=3
)
Simulation 1/1 Error at iteration 0: index 1 is out of bounds for axis 1 with size 1 Test MSE: 8.5750
InĀ [33]:
Copied!
print("\nFinal Results tanh:")
print(f"Average Test MSE: {gating_results['avg_test_mse']:.4f}")
# print(f"Average Clustering Accuracy: {gating_results['avg_clustering_acc']:.2%}")
print("\nFinal Results tanh:")
print(f"Average Test MSE: {gating_results['avg_test_mse']:.4f}")
# print(f"Average Clustering Accuracy: {gating_results['avg_clustering_acc']:.2%}")
Final Results tanh: Average Test MSE: 8.5750
InĀ [36]:
Copied!
# Run simulation with visualization
gating_results = run_gating_simulation(
n_simulations=1,
activation='linear',
with_plot=True,
n_tasks=10,
n_input=5,
n_hidden=4,
n_clusters=2,
n_features=3
)
# Run simulation with visualization
gating_results = run_gating_simulation(
n_simulations=1,
activation='linear',
with_plot=True,
n_tasks=10,
n_input=5,
n_hidden=4,
n_clusters=2,
n_features=3
)
Simulation 1/1 Error at iteration 0: index 1 is out of bounds for axis 1 with size 1 Test MSE: 0.0115
InĀ [37]:
Copied!
print("\nFinal Results linear:")
print(f"Average Test MSE: {gating_results['avg_test_mse']:.4f}")
# print(f"Average Clustering Accuracy: {gating_results['avg_clustering_acc']:.2%}")
print("\nFinal Results linear:")
print(f"Average Test MSE: {gating_results['avg_test_mse']:.4f}")
# print(f"Average Clustering Accuracy: {gating_results['avg_clustering_acc']:.2%}")
Final Results linear: Average Test MSE: 0.0115