import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Read data from CSV file
data = pd.read_csv('linear_regression_data.csv')
x = data['x'].values
y = data['y'].values

# Perform linear regression (least squares fitting)
A = np.vstack([x, np.ones(len(x))]).T
m, c = np.linalg.lstsq(A, y, rcond=None)[0]

# To solve for y = m*x + c, we need a matrix A = [[x, 1], [x, 1], ...]
# A is something like:
# [[0. 1.]
#  [1. 1.]
#  [2. 1.]
#  [3. 1.]
#  [4. 1.]
#  [5. 1.]]
# The equation \(y=mx+c\) can be written in matrix form as 
# \(A\cdot \left[\begin{matrix}m\\ c\end{matrix}\right]=y\).

# Regression line
y_pred = m * x + c

# Plot data and fitted line
plt.scatter(x, y, label='Data points')
plt.plot(x, y_pred, color='red', label=f'Fit: y = {m:.2f}x + {c:.2f}')
plt.xlabel('x')
plt.ylabel('y')
plt.title('Linear Regression Example (Using np.linalg.lstsq)')
plt.legend()
plt.show()
