{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Análise Multivariada e Aprendizado Não-Supervisionado\n", "\n", "por Cibele Russo.\n", "\n", "ICMC USP São Carlos.\n", "\n", "\n", "## Aula 12b: Análise de Correspondência\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#!pip install --user prince" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Fontes: \n", "\n", "- https://codefying.com/2018/12/21/introduction-to-correspondence-analysis/\n", "- https://pypi.org/project/prince/#correspondence-analysis-ca\n", "\n", "Análise textual onde trechos de alguns autores são analisados pela frequência das letras. Os cinco autores e as letras são mostrados abaixo:" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "from scipy.stats import chi2_contingency\n", "import prince\n", "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "authors = [\"Charles Darwin\", \"Rene Descartes\",\"Thomas Hobbes\", \"Mary Shelley\", \"Mark Twain\"]\n", "initials=['CD1','CD2','CD3','RD1','RD2','RD3','TB1','TB2','TB3','MS1','MS2','MS3','MT1','MT2','MT3']\n", "chars=[\"B\", \"C\", \"D\", \"F\", \"G\", \"H\", \"I\", \"L\", \"M\", \"N\",\"P\", \"R\", \"S\", \"U\", \"W\", \"Y\"]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "sampleCrosstab=[[34, 37, 44, 27, 19, 39, 74, 44, 27, 61, 12, 65, 69,22, 14, 21],\n", " [18, 33, 47, 24, 14, 38, 66, 41, 36,72, 15, 62, 63, 31, 12, 18],\n", " [32, 43, 36, 12, 21, 51, 75, 33, 23, 60, 24, 68, 85,18, 13, 14],\n", " [13, 31, 55, 29, 15, 62, 74, 43, 28,73, 8, 59, 54, 32, 19, 20],\n", " [8, 28, 34, 24, 17, 68, 75, 34, 25, 70, 16, 56, 72,31, 14, 11], \n", " [9, 34, 43, 25, 18, 68, 84, 25, 32, 76,14, 69, 64, 27, 11, 18],\n", " [15, 20, 28, 18, 19, 65, 82, 34, 29, 89, 11, 47, 74,18, 22, 17], \n", " [18, 14, 40, 25, 21, 60, 70, 15, 37,80, 15, 65, 68, 21, 25, 9],\n", " [19, 18, 41, 26, 19, 58, 64, 18, 38, 78, 15, 65, 72,20, 20, 11], \n", " [13, 29, 49, 31, 16, 61, 73, 36, 29,69, 13, 63, 58, 18, 20, 25],\n", " [17, 34, 43, 29, 14, 62, 64, 26, 26, 71, 26, 78, 64, 21, 18, 12],\n", " [13, 22, 43, 16, 11, 70, 68, 46, 35,57, 30, 71, 57, 19, 22, 20],\n", " [16, 18, 56, 13, 27, 67, 61, 43, 20, 63, 14, 43, 67,34, 41, 23], \n", " [15, 21, 66, 21, 19, 50, 62, 50, 24, 68, 14, 40, 58, 31, 36, 26],\n", " [19, 17, 70, 12, 28, 53, 72, 39, 22, 71, 11, 40, 67,25, 41, 17]]" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | B | \n", "C | \n", "D | \n", "F | \n", "G | \n", "H | \n", "I | \n", "L | \n", "M | \n", "N | \n", "P | \n", "R | \n", "S | \n", "U | \n", "W | \n", "Y | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CD1 | \n", "34 | \n", "37 | \n", "44 | \n", "27 | \n", "19 | \n", "39 | \n", "74 | \n", "44 | \n", "27 | \n", "61 | \n", "12 | \n", "65 | \n", "69 | \n", "22 | \n", "14 | \n", "21 | \n", "
CD2 | \n", "18 | \n", "33 | \n", "47 | \n", "24 | \n", "14 | \n", "38 | \n", "66 | \n", "41 | \n", "36 | \n", "72 | \n", "15 | \n", "62 | \n", "63 | \n", "31 | \n", "12 | \n", "18 | \n", "
CD3 | \n", "32 | \n", "43 | \n", "36 | \n", "12 | \n", "21 | \n", "51 | \n", "75 | \n", "33 | \n", "23 | \n", "60 | \n", "24 | \n", "68 | \n", "85 | \n", "18 | \n", "13 | \n", "14 | \n", "
RD1 | \n", "13 | \n", "31 | \n", "55 | \n", "29 | \n", "15 | \n", "62 | \n", "74 | \n", "43 | \n", "28 | \n", "73 | \n", "8 | \n", "59 | \n", "54 | \n", "32 | \n", "19 | \n", "20 | \n", "
RD2 | \n", "8 | \n", "28 | \n", "34 | \n", "24 | \n", "17 | \n", "68 | \n", "75 | \n", "34 | \n", "25 | \n", "70 | \n", "16 | \n", "56 | \n", "72 | \n", "31 | \n", "14 | \n", "11 | \n", "
RD3 | \n", "9 | \n", "34 | \n", "43 | \n", "25 | \n", "18 | \n", "68 | \n", "84 | \n", "25 | \n", "32 | \n", "76 | \n", "14 | \n", "69 | \n", "64 | \n", "27 | \n", "11 | \n", "18 | \n", "
TB1 | \n", "15 | \n", "20 | \n", "28 | \n", "18 | \n", "19 | \n", "65 | \n", "82 | \n", "34 | \n", "29 | \n", "89 | \n", "11 | \n", "47 | \n", "74 | \n", "18 | \n", "22 | \n", "17 | \n", "
TB2 | \n", "18 | \n", "14 | \n", "40 | \n", "25 | \n", "21 | \n", "60 | \n", "70 | \n", "15 | \n", "37 | \n", "80 | \n", "15 | \n", "65 | \n", "68 | \n", "21 | \n", "25 | \n", "9 | \n", "
TB3 | \n", "19 | \n", "18 | \n", "41 | \n", "26 | \n", "19 | \n", "58 | \n", "64 | \n", "18 | \n", "38 | \n", "78 | \n", "15 | \n", "65 | \n", "72 | \n", "20 | \n", "20 | \n", "11 | \n", "
MS1 | \n", "13 | \n", "29 | \n", "49 | \n", "31 | \n", "16 | \n", "61 | \n", "73 | \n", "36 | \n", "29 | \n", "69 | \n", "13 | \n", "63 | \n", "58 | \n", "18 | \n", "20 | \n", "25 | \n", "
MS2 | \n", "17 | \n", "34 | \n", "43 | \n", "29 | \n", "14 | \n", "62 | \n", "64 | \n", "26 | \n", "26 | \n", "71 | \n", "26 | \n", "78 | \n", "64 | \n", "21 | \n", "18 | \n", "12 | \n", "
MS3 | \n", "13 | \n", "22 | \n", "43 | \n", "16 | \n", "11 | \n", "70 | \n", "68 | \n", "46 | \n", "35 | \n", "57 | \n", "30 | \n", "71 | \n", "57 | \n", "19 | \n", "22 | \n", "20 | \n", "
MT1 | \n", "16 | \n", "18 | \n", "56 | \n", "13 | \n", "27 | \n", "67 | \n", "61 | \n", "43 | \n", "20 | \n", "63 | \n", "14 | \n", "43 | \n", "67 | \n", "34 | \n", "41 | \n", "23 | \n", "
MT2 | \n", "15 | \n", "21 | \n", "66 | \n", "21 | \n", "19 | \n", "50 | \n", "62 | \n", "50 | \n", "24 | \n", "68 | \n", "14 | \n", "40 | \n", "58 | \n", "31 | \n", "36 | \n", "26 | \n", "
MT3 | \n", "19 | \n", "17 | \n", "70 | \n", "12 | \n", "28 | \n", "53 | \n", "72 | \n", "39 | \n", "22 | \n", "71 | \n", "11 | \n", "40 | \n", "67 | \n", "25 | \n", "41 | \n", "17 | \n", "