{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "b01d6b3e", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "from numpy import sqrt\n", "import matplotlib.pyplot as plt\n", "import scipy.io as sio\n", "import os\n", "import random\n", "import time\n", "import sys\n", "from matplotlib import rcParams\n", "from scipy.linalg import eigh\n", "from sklearn.linear_model import LinearRegression\n", "from scipy import stats\n", "import statsmodels.api as sm\n", "from statsmodels.formula.api import ols\n", "import pandas as pd\n", "import matplotlib as mpl\n", "import seaborn as sns\n", "\n", "\n", "import statsmodels.formula.api as smf\n", "\n", "mpl.rcParams['figure.dpi'] = 300 \n", "plt.rc('text',usetex=True)" ] }, { "cell_type": "markdown", "id": "aea49adc", "metadata": {}, "source": [ "Matrizes relevantes: \n", "\n", "$$\\mathbf{Y}=\\left[\\begin{array}{c}\n", "Y_{1}\\\\\n", "Y_{2}\\\\\n", "\\vdots\\\\\n", "Y_{n}\n", "\\end{array}\\right] \\mbox{(respostas)}, \\; \\mathbb{X}=\\left[\\begin{array}{ccccc}\n", "1 & X_{11} & X_{12} & \\cdots & X_{1,p-1}\\\\\n", "1 & X_{21} & X_{22} & \\cdots & X_{2,p-1}\\\\\n", "\\vdots & \\vdots & \\vdots & \\vdots & \\vdots\\\\\n", "1 & X_{n1} & X_{n2} & \\cdots & X_{n,p-1}\n", "\\end{array}\\right] \\mbox{(dados das covariaveis)},\\;\\mathbb{X}^{T}\\mathbb{X}.$$\n", "\n", "\n", "$$ \\boldsymbol{\\beta}=\\left[\\begin{array}{c}\n", "\\beta_{1}\\\\\n", "\\beta_{2}\\\\\n", "\\vdots\\\\\n", "\\beta_{n}\n", "\\end{array}\\right] \\mbox{(coeficientes)},\\;\\boldsymbol{\\varepsilon}=\\left[\\begin{array}{c}\n", "\\varepsilon_{1}\\\\\n", "\\varepsilon_{2}\\\\\n", "\\vdots\\\\\n", "\\varepsilon_{n}\n", "\\end{array}\\right] \\mbox{(erros)}. $$\n", "\n", "\n", "Estimadores por mínimos quadrados\n", "\n", "$$ \\boldsymbol{\\hat{\\beta}} = (\\mathbb{X}^{T}\\mathbb{X})^{-1} \\mathbb{X}^{T} \\mathbf{Y} $$\n" ] }, { "cell_type": "code", "execution_count": 2, "id": "184c2c1c", "metadata": {}, "outputs": [], "source": [ "df_sales = pd.read_csv('Advertising.csv')" ] }, { "cell_type": "code", "execution_count": 3, "id": "45ff6200", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
OLS Regression Results
Dep. Variable: sales R-squared: 0.897
Model: OLS Adj. R-squared: 0.896
Method: Least Squares F-statistic: 570.3
Date: Tue, 14 Nov 2023 Prob (F-statistic): 1.58e-96
Time: 07:21:37 Log-Likelihood: -386.18
No. Observations: 200 AIC: 780.4
Df Residuals: 196 BIC: 793.6
Df Model: 3
Covariance Type: nonrobust
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
coef std err t P>|t| [0.025 0.975]
Intercept 2.9389 0.312 9.422 0.000 2.324 3.554
TV 0.0458 0.001 32.809 0.000 0.043 0.049
radio 0.1885 0.009 21.893 0.000 0.172 0.206
newspaper -0.0010 0.006 -0.177 0.860 -0.013 0.011
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Omnibus: 60.414 Durbin-Watson: 2.084
Prob(Omnibus): 0.000 Jarque-Bera (JB): 151.241
Skew: -1.327 Prob(JB): 1.44e-33
Kurtosis: 6.332 Cond. No. 454.


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified." ], "text/plain": [ "\n", "\"\"\"\n", " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: sales R-squared: 0.897\n", "Model: OLS Adj. R-squared: 0.896\n", "Method: Least Squares F-statistic: 570.3\n", "Date: Tue, 14 Nov 2023 Prob (F-statistic): 1.58e-96\n", "Time: 07:21:37 Log-Likelihood: -386.18\n", "No. Observations: 200 AIC: 780.4\n", "Df Residuals: 196 BIC: 793.6\n", "Df Model: 3 \n", "Covariance Type: nonrobust \n", "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "Intercept 2.9389 0.312 9.422 0.000 2.324 3.554\n", "TV 0.0458 0.001 32.809 0.000 0.043 0.049\n", "radio 0.1885 0.009 21.893 0.000 0.172 0.206\n", "newspaper -0.0010 0.006 -0.177 0.860 -0.013 0.011\n", "==============================================================================\n", "Omnibus: 60.414 Durbin-Watson: 2.084\n", "Prob(Omnibus): 0.000 Jarque-Bera (JB): 151.241\n", "Skew: -1.327 Prob(JB): 1.44e-33\n", "Kurtosis: 6.332 Cond. No. 454.\n", "==============================================================================\n", "\n", "Notes:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", "\"\"\"" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model = smf.ols(formula='sales ~ TV + radio + newspaper', data=df_sales)\n", "\n", "result = model.fit()\n", "\n", "result.summary()" ] }, { "cell_type": "code", "execution_count": 4, "id": "2a43f67d", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0TVradionewspapersales
01230.137.869.222.1
1244.539.345.110.4
2317.245.969.39.3
34151.541.358.518.5
45180.810.858.412.9
..................
19519638.23.713.87.6
19619794.24.98.19.7
197198177.09.36.412.8
198199283.642.066.225.5
199200232.18.68.713.4
\n", "

200 rows × 5 columns

\n", "
" ], "text/plain": [ " Unnamed: 0 TV radio newspaper sales\n", "0 1 230.1 37.8 69.2 22.1\n", "1 2 44.5 39.3 45.1 10.4\n", "2 3 17.2 45.9 69.3 9.3\n", "3 4 151.5 41.3 58.5 18.5\n", "4 5 180.8 10.8 58.4 12.9\n", ".. ... ... ... ... ...\n", "195 196 38.2 3.7 13.8 7.6\n", "196 197 94.2 4.9 8.1 9.7\n", "197 198 177.0 9.3 6.4 12.8\n", "198 199 283.6 42.0 66.2 25.5\n", "199 200 232.1 8.6 8.7 13.4\n", "\n", "[200 rows x 5 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_sales" ] }, { "cell_type": "markdown", "id": "6d252882", "metadata": {}, "source": [ "## Efeitos de novas covariáveis em $R^2$ " ] }, { "cell_type": "code", "execution_count": 35, "id": "8a4890fd", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0TVradionewspapersalesx1x2x3x4x5x6x7x8x9x10
01230.137.869.222.11.047096-0.2417730.3632430.451105-1.4412410.842933-0.3447730.328789-1.109157-1.571567
1244.539.345.110.4-0.0023360.9035122.976487-1.030218-0.288372-0.7193790.9543150.036471-1.580977-0.208567
2317.245.969.39.31.861288-0.851597-0.1778130.3778311.028184-0.738243-0.849071-0.175226-0.2227930.274071
34151.541.358.518.51.0640491.4584110.021797-1.358062-0.608630-0.5164120.432031-1.2282250.099671-0.334514
45180.810.858.412.90.3995761.3147291.1871210.4929000.2868640.240289-0.430977-0.4909170.7342730.825744
................................................
19519638.23.713.87.60.5214041.2175020.8859360.985284-0.1276150.8596461.2860440.526341-1.2962322.070419
19619794.24.98.19.70.7670351.165815-0.4571211.8742450.8365190.8340240.3056720.4000580.698448-0.523424
197198177.09.36.412.8-1.6339341.040581-0.366057-0.829202-0.208192-2.6455960.101832-0.1659560.4477291.931378
198199283.642.066.225.5-0.3276190.321214-0.595611-0.8570441.4175860.4879910.9019930.1383151.8574870.192366
199200232.18.68.713.4-0.063888-0.579758-0.485726-0.197697-0.7372411.397878-0.5312590.452656-0.495069-1.602641
\n", "

200 rows × 15 columns

\n", "
" ], "text/plain": [ " Unnamed: 0 TV radio newspaper sales x1 x2 x3 \\\n", "0 1 230.1 37.8 69.2 22.1 1.047096 -0.241773 0.363243 \n", "1 2 44.5 39.3 45.1 10.4 -0.002336 0.903512 2.976487 \n", "2 3 17.2 45.9 69.3 9.3 1.861288 -0.851597 -0.177813 \n", "3 4 151.5 41.3 58.5 18.5 1.064049 1.458411 0.021797 \n", "4 5 180.8 10.8 58.4 12.9 0.399576 1.314729 1.187121 \n", ".. ... ... ... ... ... ... ... ... \n", "195 196 38.2 3.7 13.8 7.6 0.521404 1.217502 0.885936 \n", "196 197 94.2 4.9 8.1 9.7 0.767035 1.165815 -0.457121 \n", "197 198 177.0 9.3 6.4 12.8 -1.633934 1.040581 -0.366057 \n", "198 199 283.6 42.0 66.2 25.5 -0.327619 0.321214 -0.595611 \n", "199 200 232.1 8.6 8.7 13.4 -0.063888 -0.579758 -0.485726 \n", "\n", " x4 x5 x6 x7 x8 x9 x10 \n", "0 0.451105 -1.441241 0.842933 -0.344773 0.328789 -1.109157 -1.571567 \n", "1 -1.030218 -0.288372 -0.719379 0.954315 0.036471 -1.580977 -0.208567 \n", "2 0.377831 1.028184 -0.738243 -0.849071 -0.175226 -0.222793 0.274071 \n", "3 -1.358062 -0.608630 -0.516412 0.432031 -1.228225 0.099671 -0.334514 \n", "4 0.492900 0.286864 0.240289 -0.430977 -0.490917 0.734273 0.825744 \n", ".. ... ... ... ... ... ... ... \n", "195 0.985284 -0.127615 0.859646 1.286044 0.526341 -1.296232 2.070419 \n", "196 1.874245 0.836519 0.834024 0.305672 0.400058 0.698448 -0.523424 \n", "197 -0.829202 -0.208192 -2.645596 0.101832 -0.165956 0.447729 1.931378 \n", "198 -0.857044 1.417586 0.487991 0.901993 0.138315 1.857487 0.192366 \n", "199 -0.197697 -0.737241 1.397878 -0.531259 0.452656 -0.495069 -1.602641 \n", "\n", "[200 rows x 15 columns]" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_sales = pd.read_csv('Advertising.csv')\n", "\n", "num_covars = 10\n", "covars = ['radio']\n", "for i in range(1,num_covars+1):\n", " covars.append(str('x')+str(i)) \n", " \n", "covars\n", "\n", "for covar in covars[1:]:\n", " df_sales[covar] = np.random.normal(size=len(df_sales))\n", " \n", "df_sales" ] }, { "cell_type": "code", "execution_count": 37, "id": "4ff4ec38", "metadata": { "scrolled": false }, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "eq = 'sales ~ radio'\n", "R2list = []\n", "#R2adjustlist = []\n", "\n", "for covar in covars[1:]:\n", "\n", " \n", " model = smf.ols(formula=eq, data=df_sales)\n", "\n", " result = model.fit()\n", " \n", " R2list.append(result.rsquared)\n", " #R2adjustlist.append(result.rsquared_adj)\n", "\n", " #print(eq)\n", " #print('p-value: ')\n", " #print(result.pvalues)\n", " #print('R2: ',result.rsquared)\n", " #print(' ')\n", " \n", " eq = eq+' + '+covar\n", " \n", "fig = plt.figure()\n", "plt.plot(range(1,len(covars)),R2list,'.-') \n", "#plt.plot(range(1,len(covars)),R2adjustlist,'.-') \n", "\n", "for ax in fig.get_axes():\n", "\n", " ax.spines['right'].set_visible(False)\n", " ax.spines['top'].set_visible(False)\n", " \n", "plt.xlabel(r'Número de covariáveis',fontsize=13)\n", "plt.ylabel(r'$R^2$',fontsize=13)\n", "plt.show()\n", "\n" ] }, { "cell_type": "code", "execution_count": 38, "id": "b8720920", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: sales R-squared: 0.366\n", "Model: OLS Adj. R-squared: 0.333\n", "Method: Least Squares F-statistic: 10.93\n", "Date: Tue, 14 Nov 2023 Prob (F-statistic): 1.32e-14\n", "Time: 07:25:20 Log-Likelihood: -568.04\n", "No. Observations: 200 AIC: 1158.\n", "Df Residuals: 189 BIC: 1194.\n", "Df Model: 10 \n", "Covariance Type: nonrobust \n", "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "Intercept 9.3265 0.568 16.411 0.000 8.205 10.448\n", "radio 0.2037 0.021 9.870 0.000 0.163 0.244\n", "x1 -0.3106 0.310 -1.002 0.318 -0.922 0.301\n", "x2 -0.5436 0.271 -2.009 0.046 -1.077 -0.010\n", "x3 -0.2702 0.309 -0.874 0.383 -0.880 0.340\n", "x4 0.5558 0.326 1.704 0.090 -0.088 1.199\n", "x5 -0.2355 0.311 -0.758 0.449 -0.848 0.377\n", "x6 0.1983 0.310 0.640 0.523 -0.413 0.810\n", "x7 0.0845 0.328 0.258 0.797 -0.563 0.732\n", "x8 -0.3288 0.355 -0.926 0.356 -1.030 0.372\n", "x9 0.0894 0.309 0.290 0.772 -0.520 0.699\n", "==============================================================================\n", "Omnibus: 22.530 Durbin-Watson: 1.915\n", "Prob(Omnibus): 0.000 Jarque-Bera (JB): 28.691\n", "Skew: -0.744 Prob(JB): 5.89e-07\n", "Kurtosis: 4.108 Cond. No. 52.3\n", "==============================================================================\n", "\n", "Notes:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" ] } ], "source": [ "print(result.summary())" ] }, { "cell_type": "code", "execution_count": null, "id": "dad53ce8", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" } }, "nbformat": 4, "nbformat_minor": 5 }