EVI%20-%202018/EVI%2004/Modulo2.ipynb

{
 "cells": [
  {
   "attachments": {
    "logpypan.png": {
     "image/png": "iVBORw0KGgoAAAANSUhEUgAAAhkAAABoCAYAAABc++LGAAAAAXNSR0IArs4c6QAAQABJREFUeAHsvXdwXdedJvg95JwzCQJgJkVSJEWJClS2oq3gKFvutt3r6m73TPeWd2erZqZqumr/2K2t3Z3e7Z6uHU9PW227naMsy7KsnChKpJjFHAGCyDkDDw94+33n3XNx38MD8EABoEjdQz7cc8896f5u+H33l04gzAQ/+RTwKeBTwKeATwGfAj4FFpgCSQvcn9+dTwGfAj4FfAr4FPAp4FPAUCDlWqHD8Ng4egZG0c1fV/8o+obGMDAcxJmmbhy/2InmzkEMjgaRnBRAUU46Vi8rwpZVJSgvyEZ+TgaKczNRlJeBotwM5GVnIBC4Vs7cn6dPAZ8CPgV8CvgUuDYpEPg4qktCE5NoaB/AsfouHLvYgXNNvWjuGkDP4ChGxkIIJKeS2kQJQgrhSf4P8Tdp8uYysDwQSEIgKZm71AapzsQEUii3yc9OQ2VxLuoq87F5ZSk215UTkBQiI+2awVvX5p3mz9qngE8BnwI+BT5xFPhYgYxTjT14+eAlvHO0Eedb+kBYELkgBBDhMIEEwQL/8L81I3G2ZmPL1IR5d9fNmL4IPwBKOwwAUbWJEdSWF+L2G5bjgR0rsX1tBZKSfC1ShPD+X58CPgV8CvgU8Clw5RT4WICMfada8cPXTuPdEy0EFskRycTEuAEUkVOLAIUItrCgwdmajS1TbebdXZsROLFEcjJmoz8RwBFITsbkeBDb1lTgqw9swgM3rURysg82LNX8rU8BnwI+BXwK+BSYLwWuKsiQfcXf/+Ywnnu/nrw+CZMTYw6wIPN3sIDNLA7AsOSaGi/JqGImceuGKvxPX7wVG2pLbSV/61PAp4BPAZ8CPgV8CsyDAlcNZJxr7sW/f2YPzrWNUIIw7IALzXyK4S8twHBRjZlDUnI6sjMC+A9P34Endm2YB0n9qj4FfAr4FPAp4FPAp4AocFVAxuXOAfz537+B5p5RAoxRz5XwAAx5f2jXiDAsAHC2ZmPL1Jx5d9dmZlOR2CFtO9smuq8AbTMoYMF/JND4yqdutI38rU8BnwI+BXwK+BTwKZAABZbc6ECeI//bT/YSYIxPBxi0j0hKSZeZBNLlCkLkoP2A8Td1gIDZxAcFLtIwBqL27OdqN3Nf4ckJGoZO4v/6yR68c6TBduhvfQr4FPAp4FPAp4BPgQQosOR+m3uON2PPyU4ybxp2ukmMPolOHwF87vYafPqWlSgryETXwAhePtCAn795GuMhQgi5qU6JLCJ5FyNYMKE6Ntky7bsVI3mzG6/MU5eH5dESojjj//jRm4y78SXG3MhUBT/5FPAp4FPAp4BPAZ8Cc1BgyUHGix80GPfR8ETQmZo4eQApqSn426/chM/dudad8oryPGxbXc5fKf7jM7sxFoxRgbgYwWYWFmBYYBKeHMflrhE8984JfO2Rm9z5+RmfAj4FfAr4FPAp4FNgZgosKciQIOJ8S69xUY1MSQCDMoyUNNx/Y0UUwPBO+cGb6rDvRDN+sfsCJkMCJ5F2bh8mEwNAVGawh/kTqeq2i1fmNohuF3DqMpjXC3vO4OkHtyKF7q5+8ingU2DxKSCbrIGBAfT29hq1aUR1uvjjXisjiD4TfDdlZmaisLAQaWlp06auOv39/YaGigHk03AaiRAKhQwNi4qKkJqqYI/RSTTs6+szP5+G0bTRnr0Ps7KyIBqmpExBi6nc9HYLXjJJGwc9EJFE5u3wb6kkHr65dtbxHr5lFX7+1lnWmWrndoBFBhjsfjIphAstXWjrHsSy0vxZ5+of9CngU2BhKKCX1549e/DSSy+ZDuMx0YUZ6drtZWhoCGvWrMETTzyB2traaSciBrp79268/PLLJtCgT8NpJIJouGHDBjz++OOorq6eVmF8fNzQ8NVXXzU0jAdEpjX6hBUMDg5iy5YtePLJJ1FeXu6e/ZKCDCFo14jTARgCCgoLXpKf5U4qXkZrjiQFCFKoWomAC9vB4gMM+uCYIUeCIXT2DfkgI94F8st8CiwCBQQyzp49i/3795sXWElJySKMcu12OUnx8MmTJ42U4q677ooLMkTDU6dO4eDBg9i2bRvy8vKu3RNehJmLhsePHzfSHtEwHsgQDUVnS8Pi4uJFmMm126Xoc+zYMYyOjuLee++9miAj4jFiJRhWEqG1SC53DGDrqrIZqdzcPYDJMNuHJQlZYoBhZyWsYYe2Zf7Wp4BPgUWjgD5KJHpdu3YtvvzlL2Pr1q2OW/uiDXlNdSzJ8L/8y78YIJY8gxpX4n1JL/Sl/vTTT+OGG25IiIZ61emT7npPkvSkp6fj4sWLM6qSRENJL0TDr371q2YrxuqnCAUE1PScNjY2TiPJkkoyNDplGc4kIhdI1ynACT67+yw+vXMVL/K0OZqCZ98+Q4PRJI89xxJJMNzpRMZL5hz85FPAp8DSUUBAQ0wyOzsbubm5SzfwNTCSQEZGRgaXQJjdTswyyURp2N43giP1PSjISsWWuiKGFJi9/2uAVDNO0dpjzGVrYWmYk5Pj34cx1BTI0H0Y0VREH1xijklGbdIUwJBUYpKeJvvOduC//PYAbTZsnamJfu+PR83CaRGjT5VfHYAxNSM/51PAp8BSUsD/aoxPbUsXu41fK1KaSB3VHA9N4oMzHfj//nAMvz/QiN5B6wk4W+/X7jHRJVHa6CxVN8QQDEPBQQS1FIafDE1mouFVkGREA4zI9eFFngjhuy8ex+Gz7ZRo1KGiOAedvUN46YN67GZsjbBRk6j21QQYkbn795RPAZ8CPgWuVwo0dg7h5SPN2He+C4WMC9Q7FEQ54xb5aYoCI6FhNA80Iic9H8typxuKTtX0c0sOMsSmI6osy7CdrdAk0eGBcx04cJ7BuuiJElBMb8cw1FwqRfJ0k22nAtuXkze78co8db11rJuqurd5VY0HaEy5/8enwOwUkBj76NGjxlBsbGwMcu3asWMHNm7caKzTZ2/tH72aFNAXmRhrR/8oXRv5UvCocJP5TirJT4cM0T3FV3O6Czq2jNvfPdWGo/VdSOUq1MNjIQyMeAMnLuhw8+psku//tsFmtA+1oYDMvSy3CpkpszsMxBtggh+sQ2MD6BzuoBooHcUZpR41frwW08vUx4Xus8jPKEBV7vJ5t5/e4/VbsqQgw6wFYowuLFhwgICH4U+aSKAsiPwn5W0d20YXw5Z58sq6jZzjUWXOcXfj1LGgYkaA4ekrgo5Mr/4fnwKzUUA6yrfffhv/8A//YNzj5BXx7W9/G+vXr/dBxmyE+xgc0xN/5GI3fsnVoXuGxpDiGIrJ5Dw7PQWf3VmDB29chuSk6w9mnGvpx1vHWlCQk45b89KpOgmhj4Dr45BGKT14s/4NvHz+RRRmFmFd0TqsKlyFFQW1qMhdhpy0XHogxrcAGAuNome0Gy39l9HQW48LvRfR2N+AbRXb8OjqJ5wP2sTPkvGp0TrYivbBTmyp2GHGTrz1J6vmkoKMCGkt03a2ZmPLVIN5d9fJXDUJhjuRT9Zd4Z/tR6aAvoblN97a2oqRkRETH0b7M+ktP/KAfgcLRgF9S1yit9ubx5rR3D2CVAdMjPNAUU4attUWOdfx+gIZ/cNBvPlhKxo6B/HEzSsoTQ7jVapNuj8mNhndw1042noIFwkSggzK2NrfiLcuvobi7DKsLFqF1YVrUEvAUZVXjVxKOgRK2ggEmvovob67Huf7zqOZbQaDAwSOKShhu5zUPAITGbXO710vA8eB0SG0DTUTbDRhddH6Bbv/rreOlhZk8DpGPZbmunovLvPurs18fCQYdkbX203gn8/iUEAvIlmkKykfz/J6cUb2e/2oFNA6Sum8dhkpAVdikcwXQDpVCNehAMOApmOXerH7VCvqSnNw35ZlONnYgxA/8HqHuFo2AUfSVT7xjuF2dFHFsbPyZnxm3RMGLJylyuJC9zl8cPl97L20G/mUcKwooE1fdjn6x/pRT4lFF9tN0OYvi5KOSgKQ1QQkNQQj1XkrUJ5bifRAJsMjePlMIndPgMp0qkx6zqO+5yJWUaoy5TmZSPtPTp2l
    }
   },
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# *Introducción a Pandas en los cuadernos de Jupyter*\n",
    "![logpypan.png](attachment:logpypan.png)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### La estructura de Datos `Serie`"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`Arreglo` unidimensional con etiquetas en los ejes (incluidas series de tiempo). Los parámetros de una `Serie` son: `data` (matriz, diccionario o escalar), `index` (arreglo de índices), `dtype` (`numpy.dtype` o `None`) y `copy` (booleano o por defecto `False`)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "Importamos la biblioteca `Pandas`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "pd.Series?"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "Podemos convertir una `lista` en una `serie` y pandas asigna de manera inmediata una lista de índices que empieza en 0."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      Tigre\n",
       "1        Oso\n",
       "2    Camello\n",
       "dtype: object"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "animales = ['Tigre', 'Oso', 'Camello']\n",
    "pd.Series(animales)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    1\n",
       "1    2\n",
       "2    3\n",
       "dtype: int64"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "numeros = [1, 2, 3]\n",
    "pd.Series(numeros)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    Tigre\n",
       "1      Oso\n",
       "2     None\n",
       "dtype: object"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "animales = ['Tigre', 'Oso', None]\n",
    "pd.Series(animales)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "Es importante saber como `NumPy` y `Pandas` manejan los datos faltantes. En Python tenemos el tipo `None` para indicar un dato faltante. Si tenemos una lista de números, `Pandas` automáticamente convierte este valor `None` en un valor designado como `NaN`, que significa `Not a Number`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    1.0\n",
       "1    2.0\n",
       "2    NaN\n",
       "dtype: float64"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "numeros = [1, 2, None]\n",
    "pd.Series(numeros)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "Importamos la biblioteca `NumPy`. También es importante saber que `NaN` no es `None`. Cuando hacemos un test para saber si `NaN` es `NaN` tambien obtendremos `False`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "np.nan == None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.nan == np.nan"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "Se necesita la función especial `isnan` de `NumPy` para chequear la presencia de un `no número` en nuestros datos."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "True\n",
      "True\n",
      "True\n"
     ]
    }
   ],
   "source": [
    "print(np.isnan(np.nan))\n",
    "print(None is None)\n",
    "print(np.nan is np.nan)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "¿Cómo creamos una `serie` en `Pandas`? Podemos utilizar una estructura de datos `diccionario` con sus `claves` y convertirlo en una `serie`, donde los índices de la `serie` son las claves del diccionario."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Béisbol                   Cuba\n",
       "Capoeira                Brasil\n",
       "Corrida de Toros        España\n",
       "Golf                   Escocia\n",
       "Pelota Vasca        País Vasco\n",
       "Rayuela                  Chile\n",
       "Rugby                    Gales\n",
       "Sumo                     Japón\n",
       "dtype: object"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "deportes = {'Capoeira': 'Brasil',\n",
    "          'Rayuela': 'Chile',\n",
    "          'Pelota Vasca': 'País Vasco',\n",
    "          'Béisbol': 'Cuba',\n",
    "           'Rugby': 'Gales',\n",
    "           'Golf': 'Escocia',\n",
    "           'Corrida de Toros': 'España',\n",
    "           'Sumo': 'Japón'}\n",
    "s = pd.Series(deportes)\n",
    "s"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "Luego, podemos chequear la lista de índices con el atributo `.index`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['Béisbol', 'Capoeira', 'Corrida de Toros', 'Golf', 'Pelota Vasca',\n",
       "       'Rayuela', 'Rugby', 'Sumo'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s.index"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "En este otro ejemplo, pasamos directamente una lista con su conjunto de índices para crear la `Serie`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "India        Tigre\n",
       "America        Oso\n",
       "Africa     Camello\n",
       "dtype: object"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s = pd.Series(['Tigre', 'Oso', 'Camello'], index=['India', 'America', 'Africa'])\n",
    "s"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "Aquí tenemos un ejemplo de un elemento nuevo en la lista de índices que no tiene un valor asignado, no existe un país asociado al índice `Natación` y `Pandas` representa este valor faltante con `NaN`. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Capoeira            Brasil\n",
       "Sumo                 Japón\n",
       "Pelota Vasca    País Vasco\n",
       "Natación               NaN\n",
       "dtype: object"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "deportes = {'Capoeira': 'Brasil',\n",
    "          'Rayuela': 'Chile',\n",
    "          'Pelota Vasca': 'País Vasco',\n",
    "          'Béisbol': 'Cuba',\n",
    "           'Rugby': 'Gales',\n",
    "           'Golf': 'Escocia',\n",
    "           'Corrida de Toros': 'España',\n",
    "           'Sumo': 'Japón'}\n",
    "s = pd.Series(deportes, index=['Capoeira', 'Sumo', 'Pelota Vasca',  'Natación'])\n",
    "s"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Búsqueda en una `Serie`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Béisbol                   Cuba\n",
       "Capoeira                Brasil\n",
       "Corrida de Toros        España\n",
       "Golf                   Escocia\n",
       "Pelota Vasca        País Vasco\n",
       "Rayuela                  Chile\n",
       "Rugby                    Gales\n",
       "Sumo                     Japón\n",
       "dtype: object"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "deportes = {'Capoeira': 'Brasil',\n",
    "          'Rayuela': 'Chile',\n",
    "          'Pelota Vasca': 'País Vasco',\n",
    "          'Béisbol': 'Cuba',\n",
    "           'Rugby': 'Gales',\n",
    "           'Golf': 'Escocia',\n",
    "           'Corrida de Toros': 'España',\n",
    "           'Sumo': 'Japón'}\n",
    "s = pd.Series(deportes)\n",
    "s"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "Podemos hacer búsquedas en las `series` por posición de índices o por etiqueta de índices. Si queremos hacer búsqueda por ubicación numérica (empezando desde 0) utilizamos el atributo `iloc`. Si por otra parte, hacemos búqueda por etiqueta de índice entonces usamos el atributo `loc`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'País Vasco'"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s.iloc[4]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'País Vasco'"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s.loc['Pelota Vasca']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "`Pandas` trata de que el código sea más legible. Si le pasamos por parámetro un valor numérico a la `Serie` esta se comportará como si la búsqueda se hace con el atributo `iloc`, si en cambio le pasamos un objeto, hará la búsqueda por etiqueta como con el atributo `loc`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'País Vasco'"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s[4]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'País Vasco'"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s['Pelota Vasca']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "¿Qué pasa cuando tenemos una lista de índices que son enteros?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "99         Brasil\n",
       "100         Chile\n",
       "101    País Vasco\n",
       "102          Cuba\n",
       "103         Gales\n",
       "104       Escocia\n",
       "105        España\n",
       "106         Japón\n",
       "dtype: object"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "deportes = {99: 'Brasil',\n",
    "          100: 'Chile',\n",
    "          101: 'País Vasco',\n",
    "          102: 'Cuba',\n",
    "           103: 'Gales',\n",
    "           104: 'Escocia',\n",
    "           105: 'España',\n",
    "           106: 'Japón'}\n",
    "s = pd.Series(deportes)\n",
    "s"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "Cuando tenemos un caso como este es más seguro utilizar los atributos `iloc` o `loc` según sea el caso. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "ename": "KeyError",
     "evalue": "0",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-19-6188cad29e89>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0ms\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;31m#Esta instrucción no llamará s.iloc[0] como esperaríamos y va a generar un error\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m/anaconda3/lib/python3.6/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m    621\u001b[0m         \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_apply_if_callable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    622\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 623\u001b[0;31m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    624\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    625\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mis_scalar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/anaconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_value\u001b[0;34m(self, series, key)\u001b[0m\n\u001b[1;32m   2558\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2559\u001b[0m             return self._engine.get_value(s, k,\n\u001b[0;32m-> 2560\u001b[0;31m                                           tz=getattr(series.dtype, 'tz', None))\n\u001b[0m\u001b[1;32m   2561\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2562\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minferred_type\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'integer'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'boolean'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_value\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_value\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;31mKeyError\u001b[0m: 0"
     ]
    }
   ],
   "source": [
    "s[0] #Esta instrucción no llamará s.iloc[0] como esperaríamos y va a generar un error"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "s.iloc[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "s.loc[99]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "Ya que sabemos hacer búsquedas en las `Series`, ahora vamos a trabajar con los datos (encontrar valores, resumir los datos o transformarlos)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    105.0\n",
       "1    223.0\n",
       "2      5.0\n",
       "3    102.0\n",
       "4     27.0\n",
       "5   -126.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s = pd.Series([105.00, 223.00, 5, 102.00, 27, -126])\n",
    "s"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "Una forma de trabajar es iterar sobre un conjunto de datos e invocar una operación de interés"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "336.0\n"
     ]
    }
   ],
   "source": [
    "total = 0\n",
    "for elemento in s:\n",
    "    total+=elemento\n",
    "print(total)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "Con `NumPy` podemos tener acceso a las funciones universales binarias o unarias (vectorizadas, cálculos más rápidos). En este ejemplo, `np.sum` hará la suma de todos los elementos en la `serie`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "336.0\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "\n",
    "total = np.sum(s)\n",
    "print(total)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "También podemos generar una `serie` grande de números aleatorios y con el método `.head()` podemos desplegar un encabezado con los 5 primeros elementos de la `serie` y con `len` chequear el tamaño de la misma."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0     42\n",
      "1    872\n",
      "2    956\n",
      "3    330\n",
      "4      9\n",
      "dtype: int64\n",
      "10000\n"
     ]
    }
   ],
   "source": [
    "s = pd.Series(np.random.randint(0,1000,10000))\n",
    "print(s.head())\n",
    "print(len(s))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "Los cuadernos de `Jupyter` tienen **funciones mágicas** que pueden ser útiles. Una de ellas es `%%timeit` que nos servirá para ver cuál de los dos métodos para sumar elementos de una `serie` es más rápido.\n",
    "Basta con tipear el símbolo `%` y la tecla `Tab` para obtener una lista de las **funciones mágicas** de `Jupyter`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "803 µs ± 52.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit -n 100\n",
    "sumar = 0\n",
    "for elemento in s:\n",
    "    sumar+=elemento"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "128 µs ± 21.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit -n 100\n",
    "sumar = np.sum(s)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "`NumPy` y `Pandas` tienen el `broadcasting`, se puede aplicar una operación a cada valor de la `serie` y modificarla."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     44\n",
       "1    874\n",
       "2    958\n",
       "3    332\n",
       "4     11\n",
       "dtype: int64"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s+=2 #Suma 2 a cada elemento de la serie usando broadcasting\n",
    "s.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "Una manera poco eficiente de hacer esto es iterar sobre cada elemento de la `serie` para hacer la suma. El método `.iteritems()` devuelve un iterador sobre los pares `(key, value)` (clave, valor) de un diccionario, en este caso de nuestra `serie` s."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:2: FutureWarning: set_value is deprecated and will be removed in a future release. Please use .at[] or .iat[] accessors instead\n",
      "  \n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "0     46\n",
       "1    876\n",
       "2    960\n",
       "3    334\n",
       "4     13\n",
       "dtype: int64"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "for etiqueta, valor in s.iteritems():\n",
    "    s.set_value(etiqueta, valor+2)\n",
    "s.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%timeit -n 10\n",
    "s = pd.Series(np.random.randint(0,1000,10000))\n",
    "for etiqueta, valor in s.iteritems():\n",
    "    s.loc[etiqueta]= valor+2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%timeit -n 10\n",
    "s = pd.Series(np.random.randint(0,1000,10000))\n",
    "s+=2\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "Podemos agregar elementos a una `serie` de la siguiente forma:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0           1\n",
       "1           2\n",
       "2           3\n",
       "Animal    Oso\n",
       "dtype: object"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "s = pd.Series([1, 2, 3])\n",
    "s.loc['Animal'] = 'Oso'\n",
    "s"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "Este es un ejemplo de una `serie` donde los valores del conjunto de índices no son únicos. Esto hace que las tablas de datos funcionen diferente y es por ello que agregar nuevos elementos debe hacerse con el método `append`, que en primera instancia, no modificará la `serie` sino que devuelve una nueva `serie` con los elementos que se agregaron."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "deportes_originales = pd.Series({'Capoeira': 'Brasil',\n",
    "          'Rayuela': 'Chile',\n",
    "          'Pelota Vasca': 'País Vasco',\n",
    "          'Béisbol': 'Cuba',\n",
    "           'Rugby': 'Gales',\n",
    "           'Golf': 'Escocia',\n",
    "           'Corrida de Toros': 'España',\n",
    "           'Sumo': 'Japón'})\n",
    "paises_que_aman_el_beisbol = pd.Series(['Venezuela',\n",
    "                                      'USA',\n",
    "                                      'Cuba',\n",
    "                                      'Puerto Rico',\n",
    "                                       'Dominicana'], \n",
    "                                   index=['Béisbol',\n",
    "                                          'Béisbol',\n",
    "                                          'Béisbol',\n",
    "                                          'Béisbol',\n",
    "                                          'Béisbol'])\n",
    "todos_los_paises = deportes_originales.append(paises_que_aman_el_beisbol)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Béisbol                   Cuba\n",
       "Capoeira                Brasil\n",
       "Corrida de Toros        España\n",
       "Golf                   Escocia\n",
       "Pelota Vasca        País Vasco\n",
       "Rayuela                  Chile\n",
       "Rugby                    Gales\n",
       "Sumo                     Japón\n",
       "dtype: object"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "deportes_originales"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Béisbol      Venezuela\n",
       "Béisbol            USA\n",
       "Béisbol           Cuba\n",
       "Béisbol    Puerto Rico\n",
       "Béisbol     Dominicana\n",
       "dtype: object"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "paises_que_aman_el_beisbol"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Béisbol                    Cuba\n",
       "Capoeira                 Brasil\n",
       "Corrida de Toros         España\n",
       "Golf                    Escocia\n",
       "Pelota Vasca         País Vasco\n",
       "Rayuela                   Chile\n",
       "Rugby                     Gales\n",
       "Sumo                      Japón\n",
       "Béisbol               Venezuela\n",
       "Béisbol                     USA\n",
       "Béisbol                    Cuba\n",
       "Béisbol             Puerto Rico\n",
       "Béisbol              Dominicana\n",
       "dtype: object"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "todos_los_paises"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Béisbol           Cuba\n",
       "Béisbol      Venezuela\n",
       "Béisbol            USA\n",
       "Béisbol           Cuba\n",
       "Béisbol    Puerto Rico\n",
       "Béisbol     Dominicana\n",
       "dtype: object"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "todos_los_paises.loc['Béisbol']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "### La estructura de datos `DataFrame`"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "El `DataFrame` o Tabla de Datos es el corazón de la biblioteca `Pandas`. Es el objeto primario para el análisis de datos. Es una especie de **arreglo bidimensional** con etiquetas en los ejes. En este ejemplo, crearemos tres diccionarios que serán luego las filas de nuestro `DataFrame`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Artículo comprado</th>\n",
       "      <th>Costo</th>\n",
       "      <th>Nombre</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Tienda 1</th>\n",
       "      <td>Libro</td>\n",
       "      <td>1200</td>\n",
       "      <td>Adelis</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Tienda 1</th>\n",
       "      <td>Raspberry pi 3</td>\n",
       "      <td>15000</td>\n",
       "      <td>Miguel</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Tienda 2</th>\n",
       "      <td>Balón</td>\n",
       "      <td>5000</td>\n",
       "      <td>Jaime</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Artículo comprado  Costo  Nombre\n",
       "Tienda 1             Libro   1200  Adelis\n",
       "Tienda 1    Raspberry pi 3  15000  Miguel\n",
       "Tienda 2             Balón   5000   Jaime"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "compra_1 = pd.Series({'Nombre': 'Adelis',\n",
    "                        'Artículo comprado': 'Libro',\n",
    "                        'Costo': 1200})\n",
    "compra_2 = pd.Series({'Nombre': 'Miguel',\n",
    "                        'Artículo comprado': 'Raspberry pi 3',\n",
    "                        'Costo': 15000})\n",
    "compra_3 = pd.Series({'Nombre': 'Jaime',\n",
    "                        'Artículo comprado': 'Balón',\n",
    "                        'Costo': 5000})\n",
    "df = pd.DataFrame([compra_1, compra_2, compra_3], index=['Tienda 1', 'Tienda 1', 'Tienda 2'])\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "En un `DataFrame` también se puede extraer información usando los atributos `loc` y `iloc`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Artículo comprado    Balón\n",
       "Costo                 5000\n",
       "Nombre               Jaime\n",
       "Name: Tienda 2, dtype: object"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc['Tienda 2']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "También podemos chequear el tipo de dato usando la función `type` de Python."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.series.Series"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(df.loc['Tienda 2'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "En los `DataFrame` también se pueden tener listas de índices no únicos. En el ejemplo, hay dos índices con el mismo nombre `Tienda 1`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Artículo comprado</th>\n",
       "      <th>Costo</th>\n",
       "      <th>Nombre</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Tienda 1</th>\n",
       "      <td>Libro</td>\n",
       "      <td>1200</td>\n",
       "      <td>Adelis</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Tienda 1</th>\n",
       "      <td>Raspberry pi 3</td>\n",
       "      <td>15000</td>\n",
       "      <td>Miguel</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Artículo comprado  Costo  Nombre\n",
       "Tienda 1             Libro   1200  Adelis\n",
       "Tienda 1    Raspberry pi 3  15000  Miguel"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc['Tienda 1']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "También podemos seleccionar columnas agregando un parámetro extra al atributo `loc`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Tienda 1     1200\n",
       "Tienda 1    15000\n",
       "Name: Costo, dtype: int64"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc['Tienda 1', 'Costo']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "Usar el atributo `.T` para obtener la transpuesta del `DataFrame` o Tabla de Datos."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Tienda 1</th>\n",
       "      <th>Tienda 1</th>\n",
       "      <th>Tienda 2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Artículo comprado</th>\n",
       "      <td>Libro</td>\n",
       "      <td>Raspberry pi 3</td>\n",
       "      <td>Balón</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Costo</th>\n",
       "      <td>1200</td>\n",
       "      <td>15000</td>\n",
       "      <td>5000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Nombre</th>\n",
       "      <td>Adelis</td>\n",
       "      <td>Miguel</td>\n",
       "      <td>Jaime</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  Tienda 1        Tienda 1 Tienda 2\n",
       "Artículo comprado    Libro  Raspberry pi 3    Balón\n",
       "Costo                 1200           15000     5000\n",
       "Nombre              Adelis          Miguel    Jaime"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.T"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "Usando `.T.loc[]` se puede seleccionar una columna usando como parámetro la etiqueta de su nombre. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Tienda 1     1200\n",
       "Tienda 1    15000\n",
       "Tienda 2     5000\n",
       "Name: Costo, dtype: object"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.T.loc['Costo']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Tienda 1     1200\n",
       "Tienda 1    15000\n",
       "Tienda 2     5000\n",
       "Name: Costo, dtype: int64"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['Costo']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Tienda 1     1200\n",
       "Tienda 1    15000\n",
       "Name: Costo, dtype: int64"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc['Tienda 1']['Costo']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "`loc` también tiene soporte para rebanar o seleccionar del `DataFrame` con la notación `[]`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Nombre</th>\n",
       "      <th>Costo</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Tienda 1</th>\n",
       "      <td>Adelis</td>\n",
       "      <td>1200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Tienda 1</th>\n",
       "      <td>Miguel</td>\n",
       "      <td>15000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Tienda 2</th>\n",
       "      <td>Jaime</td>\n",
       "      <td>5000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          Nombre  Costo\n",
       "Tienda 1  Adelis   1200\n",
       "Tienda 1  Miguel  15000\n",
       "Tienda 2   Jaime   5000"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc[:,['Nombre', 'Costo']]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "También podemos eliminar datos del `DataFrame` con la función `drop()`. Esta función toma un solo parámetro que es el índice del conjunto de datos que deseamos eliminar. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Artículo comprado</th>\n",
       "      <th>Costo</th>\n",
       "      <th>Nombre</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Tienda 2</th>\n",
       "      <td>Balón</td>\n",
       "      <td>5000</td>\n",
       "      <td>Jaime</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Artículo comprado  Costo Nombre\n",
       "Tienda 2             Balón   5000  Jaime"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.drop('Tienda 1')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "Podemos ver que nuestro `DataFrame` original sigue intacto. Solo hicimos una **extracción** de información."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Artículo comprado</th>\n",
       "      <th>Costo</th>\n",
       "      <th>Nombre</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Tienda 1</th>\n",
       "      <td>Libro</td>\n",
       "      <td>1200</td>\n",
       "      <td>Adelis</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Tienda 1</th>\n",
       "      <td>Raspberry pi 3</td>\n",
       "      <td>15000</td>\n",
       "      <td>Miguel</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Tienda 2</th>\n",
       "      <td>Balón</td>\n",
       "      <td>5000</td>\n",
       "      <td>Jaime</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Artículo comprado  Costo  Nombre\n",
       "Tienda 1             Libro   1200  Adelis\n",
       "Tienda 1    Raspberry pi 3  15000  Miguel\n",
       "Tienda 2             Balón   5000   Jaime"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "También podemos hacer una copia del `DataFrame` con la función `copy()` para guardar la extracción de información. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Artículo comprado</th>\n",
       "      <th>Costo</th>\n",
       "      <th>Nombre</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Tienda 2</th>\n",
       "      <td>Balón</td>\n",
       "      <td>5000</td>\n",
       "      <td>Jaime</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Artículo comprado  Costo Nombre\n",
       "Tienda 2             Balón   5000  Jaime"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "copiar_df = df.copy()\n",
    "copiar_df = copiar_df.drop('Tienda 1')\n",
    "copiar_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "copiar_df.drop?"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "Podemos eliminar una columna de manera sencilla, usando simplemente la palabra clave `del` y el índice o nombre de la comuna. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Artículo comprado</th>\n",
       "      <th>Nombre</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Tienda 2</th>\n",
       "      <td>Balón</td>\n",
       "      <td>Jaime</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Artículo comprado Nombre\n",
       "Tienda 2             Balón  Jaime"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "del copiar_df['Costo']\n",
    "copiar_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "Finalmente, es muy sencillo agregar una columna al `DataFrame`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Artículo comprado</th>\n",
       "      <th>Costo</th>\n",
       "      <th>Nombre</th>\n",
       "      <th>Ubicación</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Tienda 1</th>\n",
       "      <td>Libro</td>\n",
       "      <td>1200</td>\n",
       "      <td>Adelis</td>\n",
       "      <td>Venezuela</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Tienda 1</th>\n",
       "      <td>Raspberry pi 3</td>\n",
       "      <td>15000</td>\n",
       "      <td>Miguel</td>\n",
       "      <td>Chile</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Tienda 2</th>\n",
       "      <td>Balón</td>\n",
       "      <td>5000</td>\n",
       "      <td>Jaime</td>\n",
       "      <td>Argentina</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Artículo comprado  Costo  Nombre  Ubicación\n",
       "Tienda 1             Libro   1200  Adelis  Venezuela\n",
       "Tienda 1    Raspberry pi 3  15000  Miguel      Chile\n",
       "Tienda 2             Balón   5000   Jaime  Argentina"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['Ubicación'] = ['Venezuela', 'Chile', 'Argentina']\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "### Lectura de un `DataFrame`"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "Usemos `!cat` para leer un archivo de formato `CSV`. Nota: `!cat` funciona para **Linux** y **Mac** pero puede no funcionar para **Windows** :("
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15\r\n",
      ",№ Summer,01 !,02 !,03 !,Total,№ Winter,01 !,02 !,03 !,Total,№ Games,01 !,02 !,03 !,Combined total\r\n",
      "Afghanistan (AFG),13,0,0,2,2,0,0,0,0,0,13,0,0,2,2\r\n",
      "Algeria (ALG),12,5,2,8,15,3,0,0,0,0,15,5,2,8,15\r\n",
      "Argentina (ARG),23,18,24,28,70,18,0,0,0,0,41,18,24,28,70\r\n",
      "Armenia (ARM),5,1,2,9,12,6,0,0,0,0,11,1,2,9,12\r\n",
      "Australasia (ANZ) [ANZ],2,3,4,5,12,0,0,0,0,0,2,3,4,5,12\r\n",
      "Australia (AUS) [AUS] [Z],25,139,152,177,468,18,5,3,4,12,43,144,155,181,480\r\n",
      "Austria (AUT),26,18,33,35,86,22,59,78,81,218,48,77,111,116,304\r\n",
      "Azerbaijan (AZE),5,6,5,15,26,5,0,0,0,0,10,6,5,15,26\r\n",
      "Bahamas (BAH),15,5,2,5,12,0,0,0,0,0,15,5,2,5,12\r\n",
      "Bahrain (BRN),8,0,0,1,1,0,0,0,0,0,8,0,0,1,1\r\n",
      "Barbados (BAR) [BAR],11,0,0,1,1,0,0,0,0,0,11,0,0,1,1\r\n",
      "Belarus (BLR),5,12,24,39,75,6,6,4,5,15,11,18,28,44,90\r\n",
      "Belgium (BEL),25,37,52,53,142,20,1,1,3,5,45,38,53,56,147\r\n",
      "Bermuda (BER),17,0,0,1,1,7,0,0,0,0,24,0,0,1,1\r\n",
      "Bohemia (BOH) [BOH] [Z],3,0,1,3,4,0,0,0,0,0,3,0,1,3,4\r\n",
      "Botswana (BOT),9,0,1,0,1,0,0,0,0,0,9,0,1,0,1\r\n",
      "Brazil (BRA),21,23,30,55,108,7,0,0,0,0,28,23,30,55,108\r\n",
      "British West Indies (BWI) [BWI],1,0,0,2,2,0,0,0,0,0,1,0,0,2,2\r\n",
      "Bulgaria (BUL) [H],19,51,85,78,214,19,1,2,3,6,38,52,87,81,220\r\n",
      "Burundi (BDI),5,1,0,0,1,0,0,0,0,0,5,1,0,0,1\r\n",
      "Cameroon (CMR),13,3,1,1,5,1,0,0,0,0,14,3,1,1,5\r\n",
      "Canada (CAN),25,59,99,121,279,22,62,56,52,170,47,121,155,173,449\r\n",
      "Chile (CHI) [I],22,2,7,4,13,16,0,0,0,0,38,2,7,4,13\r\n",
      "China (CHN) [CHN],9,201,146,126,473,10,12,22,19,53,19,213,168,145,526\r\n",
      "Colombia (COL),18,2,6,11,19,1,0,0,0,0,19,2,6,11,19\r\n",
      "Costa Rica (CRC),14,1,1,2,4,6,0,0,0,0,20,1,1,2,4\r\n",
      "Ivory Coast (CIV) [CIV],12,0,1,0,1,0,0,0,0,0,12,0,1,0,1\r\n",
      "Croatia (CRO),6,6,7,10,23,7,4,6,1,11,13,10,13,11,34\r\n",
      "Cuba (CUB) [Z],19,72,67,70,209,0,0,0,0,0,19,72,67,70,209\r\n",
      "Cyprus (CYP),9,0,1,0,1,10,0,0,0,0,19,0,1,0,1\r\n",
      "Czech Republic (CZE) [CZE],5,14,15,15,44,6,7,9,8,24,11,21,24,23,68\r\n",
      "Czechoslovakia (TCH) [TCH],16,49,49,45,143,16,2,8,15,25,32,51,57,60,168\r\n",
      "Denmark (DEN) [Z],26,43,68,68,179,13,0,1,0,1,39,43,69,68,180\r\n",
      "Djibouti (DJI) [B],7,0,0,1,1,0,0,0,0,0,7,0,0,1,1\r\n",
      "Dominican Republic (DOM),13,3,2,1,6,0,0,0,0,0,13,3,2,1,6\r\n",
      "Ecuador (ECU),13,1,1,0,2,0,0,0,0,0,13,1,1,0,2\r\n",
      "Egypt (EGY) [EGY] [Z],21,7,9,10,26,1,0,0,0,0,22,7,9,10,26\r\n",
      "Eritrea (ERI),4,0,0,1,1,0,0,0,0,0,4,0,0,1,1\r\n",
      "Estonia (EST),11,9,9,15,33,9,4,2,1,7,20,13,11,16,40\r\n",
      "Ethiopia (ETH),12,21,7,17,45,2,0,0,0,0,14,21,7,17,45\r\n",
      "Finland (FIN),24,101,84,117,302,22,42,62,57,161,46,143,146,174,463\r\n",
      "France (FRA) [O] [P] [Z],27,202,223,246,671,22,31,31,47,109,49,233,254,293,780\r\n",
      "Gabon (GAB),9,0,1,0,1,0,0,0,0,0,9,0,1,0,1\r\n",
      "Georgia (GEO),5,6,5,14,25,6,0,0,0,0,11,6,5,14,25\r\n",
      "Germany (GER) [GER] [Z],15,174,182,217,573,11,78,78,53,209,26,252,260,270,782\r\n",
      "United Team of Germany (EUA) [EUA],3,28,54,36,118,3,8,6,5,19,6,36,60,41,137\r\n",
      "East Germany (GDR) [GDR],5,153,129,127,409,6,39,36,35,110,11,192,165,162,519\r\n",
      "West Germany (FRG) [FRG],5,56,67,81,204,6,11,15,13,39,11,67,82,94,243\r\n",
      "Ghana (GHA) [GHA],13,0,1,3,4,1,0,0,0,0,14,0,1,3,4\r\n",
      "Great Britain (GBR) [GBR] [Z],27,236,272,272,780,22,10,4,12,26,49,246,276,284,806\r\n",
      "Greece (GRE) [Z],27,30,42,39,111,18,0,0,0,0,45,30,42,39,111\r\n",
      "Grenada (GRN),8,1,0,0,1,0,0,0,0,0,8,1,0,0,1\r\n",
      "Guatemala (GUA),13,0,1,0,1,1,0,0,0,0,14,0,1,0,1\r\n",
      "Guyana (GUY) [GUY],16,0,0,1,1,0,0,0,0,0,16,0,0,1,1\r\n",
      "Haiti (HAI) [J],14,0,1,1,2,0,0,0,0,0,14,0,1,1,2\r\n",
      "Hong Kong (HKG) [HKG],15,1,1,1,3,4,0,0,0,0,19,1,1,1,3\r\n",
      "Hungary (HUN),25,167,144,165,476,22,0,2,4,6,47,167,146,169,482\r\n",
      "Iceland (ISL),19,0,2,2,4,17,0,0,0,0,36,0,2,2,4\r\n",
      "India (IND) [F],23,9,6,11,26,9,0,0,0,0,32,9,6,11,26\r\n",
      "Indonesia (INA),14,6,10,11,27,0,0,0,0,0,14,6,10,11,27\r\n",
      "Iran (IRI) [K],15,15,20,25,60,10,0,0,0,0,25,15,20,25,60\r\n",
      "Iraq (IRQ),13,0,0,1,1,0,0,0,0,0,13,0,0,1,1\r\n",
      "Ireland (IRL),20,9,8,12,29,6,0,0,0,0,26,9,8,12,29\r\n",
      "Israel (ISR),15,1,1,5,7,6,0,0,0,0,21,1,1,5,7\r\n",
      "Italy (ITA) [M] [S],26,198,166,185,549,22,37,34,43,114,48,235,200,228,663\r\n",
      "Jamaica (JAM) [JAM],16,17,30,20,67,7,0,0,0,0,23,17,30,20,67\r\n",
      "Japan (JPN),21,130,126,142,398,20,10,17,18,45,41,140,143,160,443\r\n",
      "Kazakhstan (KAZ),5,16,17,19,52,6,1,3,3,7,11,17,20,22,59\r\n",
      "Kenya (KEN),13,25,32,29,86,3,0,0,0,0,16,25,32,29,86\r\n",
      "North Korea (PRK),9,14,12,21,47,8,0,1,1,2,17,14,13,22,49\r\n",
      "South Korea (KOR),16,81,82,80,243,17,26,17,10,53,33,107,99,90,296\r\n",
      "Kuwait (KUW),12,0,0,2,2,0,0,0,0,0,12,0,0,2,2\r\n",
      "Kyrgyzstan (KGZ),5,0,1,2,3,6,0,0,0,0,11,0,1,2,3\r\n",
      "Latvia (LAT),10,3,11,5,19,10,0,4,3,7,20,3,15,8,26\r\n",
      "Lebanon (LIB),16,0,2,2,4,16,0,0,0,0,32,0,2,2,4\r\n",
      "Liechtenstein (LIE),16,0,0,0,0,18,2,2,5,9,34,2,2,5,9\r\n",
      "Lithuania (LTU),8,6,5,10,21,8,0,0,0,0,16,6,5,10,21\r\n",
      "Luxembourg (LUX) [O],22,1,1,0,2,8,0,2,0,2,30,1,3,0,4\r\n",
      "Macedonia (MKD),5,0,0,1,1,5,0,0,0,0,10,0,0,1,1\r\n",
      "Malaysia (MAS) [MAS],12,0,3,3,6,0,0,0,0,0,12,0,3,3,6\r\n",
      "Mauritius (MRI),8,0,0,1,1,0,0,0,0,0,8,0,0,1,1\r\n",
      "Mexico (MEX),22,13,21,28,62,8,0,0,0,0,30,13,21,28,62\r\n",
      "Moldova (MDA),5,0,2,5,7,6,0,0,0,0,11,0,2,5,7\r\n",
      "Mongolia (MGL),12,2,9,13,24,13,0,0,0,0,25,2,9,13,24\r\n",
      "Montenegro (MNE),2,0,1,0,1,2,0,0,0,0,4,0,1,0,1\r\n",
      "Morocco (MAR),13,6,5,11,22,6,0,0,0,0,19,6,5,11,22\r\n",
      "Mozambique (MOZ),9,1,0,1,2,0,0,0,0,0,9,1,0,1,2\r\n",
      "Namibia (NAM),6,0,4,0,4,0,0,0,0,0,6,0,4,0,4\r\n",
      "Netherlands (NED) [Z],25,77,85,104,266,20,37,38,35,110,45,114,123,139,376\r\n",
      "Netherlands Antilles (AHO) [AHO] [I],13,0,1,0,1,2,0,0,0,0,15,0,1,0,1\r\n",
      "New Zealand (NZL) [NZL],22,42,18,39,99,15,0,1,0,1,37,42,19,39,100\r\n",
      "Niger (NIG),11,0,0,1,1,0,0,0,0,0,11,0,0,1,1\r\n",
      "Nigeria (NGR),15,3,8,12,23,0,0,0,0,0,15,3,8,12,23\r\n",
      "Norway (NOR) [Q],24,56,49,43,148,22,118,111,100,329,46,174,160,143,477\r\n",
      "Pakistan (PAK),16,3,3,4,10,2,0,0,0,0,18,3,3,4,10\r\n",
      "Panama (PAN),16,1,0,2,3,0,0,0,0,0,16,1,0,2,3\r\n",
      "Paraguay (PAR),11,0,1,0,1,1,0,0,0,0,12,0,1,0,1\r\n",
      "Peru (PER) [L],17,1,3,0,4,2,0,0,0,0,19,1,3,0,4\r\n",
      "Philippines (PHI),20,0,2,7,9,4,0,0,0,0,24,0,2,7,9\r\n",
      "Poland (POL),20,64,82,125,271,22,6,7,7,20,42,70,89,132,291\r\n",
      "Portugal (POR),23,4,8,11,23,7,0,0,0,0,30,4,8,11,23\r\n",
      "Puerto Rico (PUR),17,0,2,6,8,6,0,0,0,0,23,0,2,6,8\r\n",
      "Qatar (QAT),8,0,0,4,4,0,0,0,0,0,8,0,0,4,4\r\n",
      "Romania (ROU),20,88,94,119,301,20,0,0,1,1,40,88,94,120,302\r\n",
      "Russia (RUS) [RUS],5,132,121,142,395,6,49,40,35,124,11,181,161,177,519\r\n",
      "Russian Empire (RU1) [RU1],3,1,4,3,8,0,0,0,0,0,3,1,4,3,8\r\n",
      "Soviet Union (URS) [URS],9,395,319,296,1010,9,78,57,59,194,18,473,376,355,1204\r\n",
      "Unified Team (EUN) [EUN],1,45,38,29,112,1,9,6,8,23,2,54,44,37,135\r\n",
      "Saudi Arabia (KSA),10,0,1,2,3,0,0,0,0,0,10,0,1,2,3\r\n",
      "Senegal (SEN),13,0,1,0,1,5,0,0,0,0,18,0,1,0,1\r\n",
      "Serbia (SRB) [SRB],3,1,2,4,7,2,0,0,0,0,5,1,2,4,7\r\n",
      "Serbia and Montenegro (SCG) [SCG],3,2,4,3,9,3,0,0,0,0,6,2,4,3,9\r\n",
      "Singapore (SIN),15,0,2,2,4,0,0,0,0,0,15,0,2,2,4\r\n",
      "Slovakia (SVK) [SVK],5,7,9,8,24,6,2,2,1,5,11,9,11,9,29\r\n",
      "Slovenia (SLO),6,4,6,9,19,7,2,4,9,15,13,6,10,18,34\r\n",
      "South Africa (RSA),18,23,26,27,76,6,0,0,0,0,24,23,26,27,76\r\n",
      "Spain (ESP) [Z],22,37,59,35,131,19,1,0,1,2,41,38,59,36,133\r\n",
      "Sri Lanka (SRI) [SRI],16,0,2,0,2,0,0,0,0,0,16,0,2,0,2\r\n",
      "Sudan (SUD),11,0,1,0,1,0,0,0,0,0,11,0,1,0,1\r\n",
      "Suriname (SUR) [E],11,1,0,1,2,0,0,0,0,0,11,1,0,1,2\r\n",
      "Sweden (SWE) [Z],26,143,164,176,483,22,50,40,54,144,48,193,204,230,627\r\n",
      "Switzerland (SUI),27,47,73,65,185,22,50,40,48,138,49,97,113,113,323\r\n",
      "Syria (SYR),12,1,1,1,3,0,0,0,0,0,12,1,1,1,3\r\n",
      "Chinese Taipei (TPE) [TPE] [TPE2],13,2,7,12,21,11,0,0,0,0,24,2,7,12,21\r\n",
      "Tajikistan (TJK),5,0,1,2,3,4,0,0,0,0,9,0,1,2,3\r\n",
      "Tanzania (TAN) [TAN],12,0,2,0,2,0,0,0,0,0,12,0,2,0,2\r\n",
      "Thailand (THA),15,7,6,11,24,3,0,0,0,0,18,7,6,11,24\r\n",
      "Togo (TOG),9,0,0,1,1,1,0,0,0,0,10,0,0,1,1\r\n",
      "Tonga (TGA),8,0,1,0,1,1,0,0,0,0,9,0,1,0,1\r\n",
      "Trinidad and Tobago (TRI) [TRI],16,2,5,11,18,3,0,0,0,0,19,2,5,11,18\r\n",
      "Tunisia (TUN),13,3,3,4,10,0,0,0,0,0,13,3,3,4,10\r\n",
      "Turkey (TUR),21,39,25,24,88,16,0,0,0,0,37,39,25,24,88\r\n",
      "Uganda (UGA),14,2,3,2,7,0,0,0,0,0,14,2,3,2,7\r\n",
      "Ukraine (UKR),5,33,27,55,115,6,2,1,4,7,11,35,28,59,122\r\n",
      "United Arab Emirates (UAE),8,1,0,0,1,0,0,0,0,0,8,1,0,0,1\r\n",
      "United States (USA) [P] [Q] [R] [Z],26,976,757,666,2399,22,96,102,84,282,48,1072,859,750,2681\r\n",
      "Uruguay (URU),20,2,2,6,10,1,0,0,0,0,21,2,2,6,10\r\n",
      "Uzbekistan (UZB),5,5,5,10,20,6,1,0,0,1,11,6,5,10,21\r\n",
      "Venezuela (VEN),17,2,2,8,12,4,0,0,0,0,21,2,2,8,12\r\n",
      "Vietnam (VIE),14,0,2,0,2,0,0,0,0,0,14,0,2,0,2\r\n",
      "Virgin Islands (ISV),11,0,1,0,1,7,0,0,0,0,18,0,1,0,1\r\n",
      "Yugoslavia (YUG) [YUG],16,26,29,28,83,14,0,3,1,4,30,26,32,29,87\r\n",
      "Independent Olympic Participants (IOP) [IOP],1,0,1,2,3,0,0,0,0,0,1,0,1,2,3\r\n",
      "Zambia (ZAM) [ZAM],12,0,1,1,2,0,0,0,0,0,12,0,1,1,2\r\n",
      "Zimbabwe (ZIM) [ZIM],12,3,4,1,8,1,0,0,0,0,13,3,4,1,8\r\n",
      "Mixed team (ZZX) [ZZX],3,8,5,4,17,0,0,0,0,0,3,8,5,4,17\r\n",
      "Totals,27,4809,4775,5130,14714,22,959,958,948,2865,49,5768,5733,6078,17579\r\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "hola\r\n"
     ]
    }
   ],
   "source": [
    "!cat olympics.csv"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "Pero ... no hay que preocuparse mucho por eso! Podemos leer este archivo en formato `CSV` en un `DataFrame` usando la función `read_csv`. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>10</th>\n",
       "      <th>11</th>\n",
       "      <th>12</th>\n",
       "      <th>13</th>\n",
       "      <th>14</th>\n",
       "      <th>15</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>№ Summer</td>\n",
       "      <td>01 !</td>\n",
       "      <td>02 !</td>\n",
       "      <td>03 !</td>\n",
       "      <td>Total</td>\n",
       "      <td>№ Winter</td>\n",
       "      <td>01 !</td>\n",
       "      <td>02 !</td>\n",
       "      <td>03 !</td>\n",
       "      <td>Total</td>\n",
       "      <td>№ Games</td>\n",
       "      <td>01 !</td>\n",
       "      <td>02 !</td>\n",
       "      <td>03 !</td>\n",
       "      <td>Combined total</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Afghanistan (AFG)</td>\n",
       "      <td>13</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>13</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Algeria (ALG)</td>\n",
       "      <td>12</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>8</td>\n",
       "      <td>15</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>15</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>8</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Argentina (ARG)</td>\n",
       "      <td>23</td>\n",
       "      <td>18</td>\n",
       "      <td>24</td>\n",
       "      <td>28</td>\n",
       "      <td>70</td>\n",
       "      <td>18</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>41</td>\n",
       "      <td>18</td>\n",
       "      <td>24</td>\n",
       "      <td>28</td>\n",
       "      <td>70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Armenia (ARM)</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>12</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>11</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   0         1     2     3     4      5         6     7     8  \\\n",
       "0                NaN  № Summer  01 !  02 !  03 !  Total  № Winter  01 !  02 !   \n",
       "1  Afghanistan (AFG)        13     0     0     2      2         0     0     0   \n",
       "2      Algeria (ALG)        12     5     2     8     15         3     0     0   \n",
       "3    Argentina (ARG)        23    18    24    28     70        18     0     0   \n",
       "4      Armenia (ARM)         5     1     2     9     12         6     0     0   \n",
       "\n",
       "      9     10       11    12    13    14              15  \n",
       "0  03 !  Total  № Games  01 !  02 !  03 !  Combined total  \n",
       "1     0      0       13     0     0     2               2  \n",
       "2     0      0       15     5     2     8              15  \n",
       "3     0      0       41    18    24    28              70  \n",
       "4     0      0       11     1     2     9              12  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "df = pd.read_csv('olympics.csv')\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "Aquí podemos ignorar la primera fila del `DataFrame` para dejar más limpia la tabla de información no relevante."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>№ Summer</th>\n",
       "      <th>01 !</th>\n",
       "      <th>02 !</th>\n",
       "      <th>03 !</th>\n",
       "      <th>Total</th>\n",
       "      <th>№ Winter</th>\n",
       "      <th>01 !.1</th>\n",
       "      <th>02 !.1</th>\n",
       "      <th>03 !.1</th>\n",
       "      <th>Total.1</th>\n",
       "      <th>№ Games</th>\n",
       "      <th>01 !.2</th>\n",
       "      <th>02 !.2</th>\n",
       "      <th>03 !.2</th>\n",
       "      <th>Combined total</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Afghanistan (AFG)</th>\n",
       "      <td>13</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>13</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Algeria (ALG)</th>\n",
       "      <td>12</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>8</td>\n",
       "      <td>15</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>15</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>8</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Argentina (ARG)</th>\n",
       "      <td>23</td>\n",
       "      <td>18</td>\n",
       "      <td>24</td>\n",
       "      <td>28</td>\n",
       "      <td>70</td>\n",
       "      <td>18</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>41</td>\n",
       "      <td>18</td>\n",
       "      <td>24</td>\n",
       "      <td>28</td>\n",
       "      <td>70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Armenia (ARM)</th>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>12</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>11</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Australasia (ANZ) [ANZ]</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>12</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                         № Summer  01 !  02 !  03 !  Total  № Winter  01 !.1  \\\n",
       "Afghanistan (AFG)              13     0     0     2      2         0       0   \n",
       "Algeria (ALG)                  12     5     2     8     15         3       0   \n",
       "Argentina (ARG)                23    18    24    28     70        18       0   \n",
       "Armenia (ARM)                   5     1     2     9     12         6       0   \n",
       "Australasia (ANZ) [ANZ]         2     3     4     5     12         0       0   \n",
       "\n",
       "                         02 !.1  03 !.1  Total.1  № Games  01 !.2  02 !.2  \\\n",
       "Afghanistan (AFG)             0       0        0       13       0       0   \n",
       "Algeria (ALG)                 0       0        0       15       5       2   \n",
       "Argentina (ARG)               0       0        0       41      18      24   \n",
       "Armenia (ARM)                 0       0        0       11       1       2   \n",
       "Australasia (ANZ) [ANZ]       0       0        0        2       3       4   \n",
       "\n",
       "                         03 !.2  Combined total  \n",
       "Afghanistan (AFG)             2               2  \n",
       "Algeria (ALG)                 8              15  \n",
       "Argentina (ARG)              28              70  \n",
       "Armenia (ARM)                 9              12  \n",
       "Australasia (ANZ) [ANZ]       5              12  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv('olympics.csv', index_col = 0, skiprows=1)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "El atributo `.columns` nos permite ver el nombre de las comlumnas del `DataFrame` y el atributo `.rename` modificar el nombre."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['№ Summer', '01 !', '02 !', '03 !', 'Total', '№ Winter', '01 !.1',\n",
       "       '02 !.1', '03 !.1', 'Total.1', '№ Games', '01 !.2', '02 !.2', '03 !.2',\n",
       "       'Combined total'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th># Summer</th>\n",
       "      <th>Gold</th>\n",
       "      <th>Silver</th>\n",
       "      <th>Bronze</th>\n",
       "      <th>Total</th>\n",
       "      <th># Winter</th>\n",
       "      <th>Gold.1</th>\n",
       "      <th>Silver.1</th>\n",
       "      <th>Bronze.1</th>\n",
       "      <th>Total.1</th>\n",
       "      <th># Games</th>\n",
       "      <th>Gold.2</th>\n",
       "      <th>Silver.2</th>\n",
       "      <th>Bronze.2</th>\n",
       "      <th>Combined total</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Afghanistan (AFG)</th>\n",
       "      <td>13</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>13</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Algeria (ALG)</th>\n",
       "      <td>12</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>8</td>\n",
       "      <td>15</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>15</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>8</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Argentina (ARG)</th>\n",
       "      <td>23</td>\n",
       "      <td>18</td>\n",
       "      <td>24</td>\n",
       "      <td>28</td>\n",
       "      <td>70</td>\n",
       "      <td>18</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>41</td>\n",
       "      <td>18</td>\n",
       "      <td>24</td>\n",
       "      <td>28</td>\n",
       "      <td>70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Armenia (ARM)</th>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>12</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>11</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Australasia (ANZ) [ANZ]</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>12</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                         # Summer  Gold  Silver  Bronze  Total  # Winter  \\\n",
       "Afghanistan (AFG)              13     0       0       2      2         0   \n",
       "Algeria (ALG)                  12     5       2       8     15         3   \n",
       "Argentina (ARG)                23    18      24      28     70        18   \n",
       "Armenia (ARM)                   5     1       2       9     12         6   \n",
       "Australasia (ANZ) [ANZ]         2     3       4       5     12         0   \n",
       "\n",
       "                         Gold.1  Silver.1  Bronze.1  Total.1  # Games  Gold.2  \\\n",
       "Afghanistan (AFG)             0         0         0        0       13       0   \n",
       "Algeria (ALG)                 0         0         0        0       15       5   \n",
       "Argentina (ARG)               0         0         0        0       41      18   \n",
       "Armenia (ARM)                 0         0         0        0       11       1   \n",
       "Australasia (ANZ) [ANZ]       0         0         0        0        2       3   \n",
       "\n",
       "                         Silver.2  Bronze.2  Combined total  \n",
       "Afghanistan (AFG)               0         2               2  \n",
       "Algeria (ALG)                   2         8              15  \n",
       "Argentina (ARG)                24        28              70  \n",
       "Armenia (ARM)                   2         9              12  \n",
       "Australasia (ANZ) [ANZ]         4         5              12  "
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "for col in df.columns:\n",
    "    if col[:2]=='01':\n",
    "        df.rename(columns={col:'Gold' + col[4:]}, inplace=True)\n",
    "    if col[:2]=='02':\n",
    "        df.rename(columns={col:'Silver' + col[4:]}, inplace=True)\n",
    "    if col[:2]=='03':\n",
    "        df.rename(columns={col:'Bronze' + col[4:]}, inplace=True)\n",
    "    if col[:1]=='№':\n",
    "        df.rename(columns={col:'#' + col[1:]}, inplace=True) \n",
    "\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Haciendo búsquedas en un `DataFrame`"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "Podemos buscar en el `DataFrame` con una **máscara Booleana** qué países tienen (`True`) o no (`False`) una medalla de oro."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Afghanistan (AFG)                               False\n",
       "Algeria (ALG)                                    True\n",
       "Argentina (ARG)                                  True\n",
       "Armenia (ARM)                                    True\n",
       "Australasia (ANZ) [ANZ]                          True\n",
       "Australia (AUS) [AUS] [Z]                        True\n",
       "Austria (AUT)                                    True\n",
       "Azerbaijan (AZE)                                 True\n",
       "Bahamas (BAH)                                    True\n",
       "Bahrain (BRN)                                   False\n",
       "Barbados (BAR) [BAR]                            False\n",
       "Belarus (BLR)                                    True\n",
       "Belgium (BEL)                                    True\n",
       "Bermuda (BER)                                   False\n",
       "Bohemia (BOH) [BOH] [Z]                         False\n",
       "Botswana (BOT)                                  False\n",
       "Brazil (BRA)                                     True\n",
       "British West Indies (BWI) [BWI]                 False\n",
       "Bulgaria (BUL) [H]                               True\n",
       "Burundi (BDI)                                    True\n",
       "Cameroon (CMR)                                   True\n",
       "Canada (CAN)                                     True\n",
       "Chile (CHI) [I]                                  True\n",
       "China (CHN) [CHN]                                True\n",
       "Colombia (COL)                                   True\n",
       "Costa Rica (CRC)                                 True\n",
       "Ivory Coast (CIV) [CIV]                         False\n",
       "Croatia (CRO)                                    True\n",
       "Cuba (CUB) [Z]                                   True\n",
       "Cyprus (CYP)                                    False\n",
       "                                                ...  \n",
       "Sri Lanka (SRI) [SRI]                           False\n",
       "Sudan (SUD)                                     False\n",
       "Suriname (SUR) [E]                               True\n",
       "Sweden (SWE) [Z]                                 True\n",
       "Switzerland (SUI)                                True\n",
       "Syria (SYR)                                      True\n",
       "Chinese Taipei (TPE) [TPE] [TPE2]                True\n",
       "Tajikistan (TJK)                                False\n",
       "Tanzania (TAN) [TAN]                            False\n",
       "Thailand (THA)                                   True\n",
       "Togo (TOG)                                      False\n",
       "Tonga (TGA)                                     False\n",
       "Trinidad and Tobago (TRI) [TRI]                  True\n",
       "Tunisia (TUN)                                    True\n",
       "Turkey (TUR)                                     True\n",
       "Uganda (UGA)                                     True\n",
       "Ukraine (UKR)                                    True\n",
       "United Arab Emirates (UAE)                       True\n",
       "United States (USA) [P] [Q] [R] [Z]              True\n",
       "Uruguay (URU)                                    True\n",
       "Uzbekistan (UZB)                                 True\n",
       "Venezuela (VEN)                                  True\n",
       "Vietnam (VIE)                                   False\n",
       "Virgin Islands (ISV)                            False\n",
       "Yugoslavia (YUG) [YUG]                           True\n",
       "Independent Olympic Participants (IOP) [IOP]    False\n",
       "Zambia (ZAM) [ZAM]                              False\n",
       "Zimbabwe (ZIM) [ZIM]                             True\n",
       "Mixed team (ZZX) [ZZX]                           True\n",
       "Totals                                           True\n",
       "Name: Gold, Length: 147, dtype: bool"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['Gold'] > 0"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "La función `.where()` toma una máscara booleana como condición en el argumento, la aplica al `DataFrame`, y devuelve un `DataFrame` de la misma forma. En nuestro ejemplo, reemplaza con `NaN` los casos `False` y con su valor original, los casos `True`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th># Summer</th>\n",
       "      <th>Gold</th>\n",
       "      <th>Silver</th>\n",
       "      <th>Bronze</th>\n",
       "      <th>Total</th>\n",
       "      <th># Winter</th>\n",
       "      <th>Gold.1</th>\n",
       "      <th>Silver.1</th>\n",
       "      <th>Bronze.1</th>\n",
       "      <th>Total.1</th>\n",
       "      <th># Games</th>\n",
       "      <th>Gold.2</th>\n",
       "      <th>Silver.2</th>\n",
       "      <th>Bronze.2</th>\n",
       "      <th>Combined total</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Afghanistan (AFG)</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Algeria (ALG)</th>\n",
       "      <td>12.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>15.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Argentina (ARG)</th>\n",
       "      <td>23.0</td>\n",
       "      <td>18.0</td>\n",
       "      <td>24.0</td>\n",
       "      <td>28.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>18.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>41.0</td>\n",
       "      <td>18.0</td>\n",
       "      <td>24.0</td>\n",
       "      <td>28.0</td>\n",
       "      <td>70.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Armenia (ARM)</th>\n",
       "      <td>5.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>12.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Australasia (ANZ) [ANZ]</th>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>12.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                         # Summer  Gold  Silver  Bronze  Total  # Winter  \\\n",
       "Afghanistan (AFG)             NaN   NaN     NaN     NaN    NaN       NaN   \n",
       "Algeria (ALG)                12.0   5.0     2.0     8.0   15.0       3.0   \n",
       "Argentina (ARG)              23.0  18.0    24.0    28.0   70.0      18.0   \n",
       "Armenia (ARM)                 5.0   1.0     2.0     9.0   12.0       6.0   \n",
       "Australasia (ANZ) [ANZ]       2.0   3.0     4.0     5.0   12.0       0.0   \n",
       "\n",
       "                         Gold.1  Silver.1  Bronze.1  Total.1  # Games  Gold.2  \\\n",
       "Afghanistan (AFG)           NaN       NaN       NaN      NaN      NaN     NaN   \n",
       "Algeria (ALG)               0.0       0.0       0.0      0.0     15.0     5.0   \n",
       "Argentina (ARG)             0.0       0.0       0.0      0.0     41.0    18.0   \n",
       "Armenia (ARM)               0.0       0.0       0.0      0.0     11.0     1.0   \n",
       "Australasia (ANZ) [ANZ]     0.0       0.0       0.0      0.0      2.0     3.0   \n",
       "\n",
       "                         Silver.2  Bronze.2  Combined total  \n",
       "Afghanistan (AFG)             NaN       NaN             NaN  \n",
       "Algeria (ALG)                 2.0       8.0            15.0  \n",
       "Argentina (ARG)              24.0      28.0            70.0  \n",
       "Armenia (ARM)                 2.0       9.0            12.0  \n",
       "Australasia (ANZ) [ANZ]       4.0       5.0            12.0  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "only_gold = df.where(df['Gold'] > 0)\n",
    "only_gold.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "Podemos contar cuántas países hay medallas de oro hay en total con `count()`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "100"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "only_gold['Gold'].count()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Si contamos sobre los datos originales, veremos que hay 147 países. Cuenta los países para los cuales la máscara Booleana dio `False` >.<"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "147"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['Gold'].count()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "Podemos establecer otro tipo de condiciones para hacer búsquedas más complejas. Por ejemplo, buscar la cantidad de países que han ganado medalla de oro alguna vez."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "101"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df[(df['Gold'] > 0) | (df['Gold.1'] > 0)])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br>\n",
    "Buscar qué países han ganado sólo medallas de oro en Invierno y nunca en Verano."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th># Summer</th>\n",
       "      <th>Gold</th>\n",
       "      <th>Silver</th>\n",
       "      <th>Bronze</th>\n",
       "      <th>Total</th>\n",
       "      <th># Winter</th>\n",
       "      <th>Gold.1</th>\n",
       "      <th>Silver.1</th>\n",
       "      <th>Bronze.1</th>\n",
       "      <th>Total.1</th>\n",
       "      <th># Games</th>\n",
       "      <th>Gold.2</th>\n",
       "      <th>Silver.2</th>\n",
       "      <th>Bronze.2</th>\n",
       "      <th>Combined total</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Liechtenstein (LIE)</th>\n",
       "      <td>16</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>18</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>9</td>\n",
       "      <td>34</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                     # Summer  Gold  Silver  Bronze  Total  # Winter  Gold.1  \\\n",
       "Liechtenstein (LIE)        16     0       0       0      0        18       2   \n",
       "\n",
       "                     Silver.1  Bronze.1  Total.1  # Games  Gold.2  Silver.2  \\\n",
       "Liechtenstein (LIE)         2         5        9       34       2         2   \n",
       "\n",
       "                     Bronze.2  Combined total  \n",
       "Liechtenstein (LIE)         5               9  "
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[(df['Gold.1'] > 0) & (df['Gold'] == 0)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}