import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load data from CSV
df = pd.read_csv('data_2d_classification.csv')
X = df[['x1', 'x2']].values    # Shape: (N, 2)
y = df['label'].values.reshape(-1, 1)  # Make y shape (N, 1)

# Neural network architecture
input_dim = 2
hidden_dim = 10
output_dim = 1

# Initialize weights
np.random.seed(1)
W1 = np.random.randn(input_dim, hidden_dim) * 0.1
b1 = np.zeros((1, hidden_dim))
W2 = np.random.randn(hidden_dim, output_dim) * 0.1
b2 = np.zeros((1, output_dim))

# Activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_deriv(x):
    sig = sigmoid(x)
    return sig * (1 - sig)

def relu(x):
    return np.maximum(0, x)

def relu_deriv(x):
    return (x > 0).astype(float)

# Training parameters
epochs = 3000
lr = 0.01

for epoch in range(epochs):
    # Forward pass
    Z1 = X @ W1 + b1
    A1 = relu(Z1)
    Z2 = A1 @ W2 + b2
    A2 = sigmoid(Z2)  # Output, shape (N, 1)

    # Compute loss (Binary Cross-Entropy)
    loss = -np.mean(y * np.log(A2 + 1e-8) + (1 - y) * np.log(1 - A2 + 1e-8))

    # Backward pass
    dA2 = A2 - y                    # Shape (N, 1)
    dZ2 = dA2 * sigmoid_deriv(Z2)   # Shape (N, 1)
    dW2 = A1.T @ dZ2 / X.shape[0]   # Shape (hidden_dim, 1)
    db2 = np.mean(dZ2, axis=0, keepdims=True)

    dA1 = dZ2 @ W2.T                # Shape (N, hidden_dim)
    dZ1 = dA1 * relu_deriv(Z1)      # Shape (N, hidden_dim)
    dW1 = X.T @ dZ1 / X.shape[0]    # Shape (input_dim, hidden_dim)
    db1 = np.mean(dZ1, axis=0, keepdims=True)

    # Update weights
    W1 -= lr * dW1
    b1 -= lr * db1
    W2 -= lr * dW2
    b2 -= lr * db2

    # Optional: print loss every 500 epochs
    if epoch % 500 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

# Decision boundary plotting
h = 0.02
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
grid = np.c_[xx.ravel(), yy.ravel()]

# Forward pass for grid
Z1_grid = grid @ W1 + b1
A1_grid = relu(Z1_grid)
Z2_grid = A1_grid @ W2 + b2
A2_grid = sigmoid(Z2_grid).reshape(xx.shape)

# Visualize
N = X.shape[0]
X1 = X[:N//2]
X2 = X[N//2:]

plt.figure(figsize=(8, 6))
plt.contourf(xx, yy, A2_grid, alpha=0.3, levels=np.linspace(0, 1, 11), cmap=plt.cm.RdBu)
plt.scatter(X1[:, 0], X1[:, 1], color='blue', label='Group 0', edgecolor='k')
plt.scatter(X2[:, 0], X2[:, 1], color='red', label='Group 1', edgecolor='k')
plt.contour(xx, yy, A2_grid, levels=[0.5], linewidths=2, colors='k')
plt.legend()
plt.title('Explicit NN Classification with Boundary')
plt.xlabel('x1')
plt.ylabel('x2')
plt.tight_layout()
plt.show()