{ "cells": [ { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import numpy as np\n", "from numpy import nan\n", "from pandas import Series,DataFrame\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "data = Series(['one','two', np.nan, 'four'])" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 one\n", "1 two\n", "2 NaN\n", "3 four\n", "dtype: object" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 False\n", "1 False\n", "2 True\n", "3 False\n", "dtype: bool" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# null値は簡単に見つけられます。\n", "data.isnull()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 one\n", "1 two\n", "3 four\n", "dtype: object" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# null値は簡単に取り除けます。\n", "data.dropna()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# DataFrameの場合をみてみます。\n", "dframe = DataFrame([[1,2,3],[np.nan,5,6],[7,np.nan,9],[np.nan,np.nan,np.nan]])" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
0123
1NaN56
27NaN9
3NaNNaNNaN
\n", "
" ], "text/plain": [ " 0 1 2\n", "0 1 2 3\n", "1 NaN 5 6\n", "2 7 NaN 9\n", "3 NaN NaN NaN" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dframe" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [], "source": [ "clean_dframe = dframe.dropna()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
0123
\n", "
" ], "text/plain": [ " 0 1 2\n", "0 1 2 3" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clean_dframe" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
0123
1NaN56
27NaN9
\n", "
" ], "text/plain": [ " 0 1 2\n", "0 1 2 3\n", "1 NaN 5 6\n", "2 7 NaN 9" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 取り除き方を指定することもできます。すべてNaNの行がなくなります。\n", "dframe.dropna(how='all')" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0
1
2
3
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: []\n", "Index: [0, 1, 2, 3]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 軸を指定することも可能です。\n", "dframe.dropna(axis=1)\n", "# どの列にも必ず1つはnull値があるので、すべての列がなくなります。" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123
0123NaN
12NaN56
2NaN7NaN9
31NaNNaNNaN
\n", "
" ], "text/plain": [ " 0 1 2 3\n", "0 1 2 3 NaN\n", "1 2 NaN 5 6\n", "2 NaN 7 NaN 9\n", "3 1 NaN NaN NaN" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#閾値を決めることも可能です。\n", "dframe2 = DataFrame([[1,2,3,nan],[2,nan,5,6],[nan,7,nan,9],[1,nan,nan,nan]])\n", "dframe2" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123
0123NaN
12NaN56
2NaN7NaN9
\n", "
" ], "text/plain": [ " 0 1 2 3\n", "0 1 2 3 NaN\n", "1 2 NaN 5 6\n", "2 NaN 7 NaN 9" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# nullではない値が2個以上必要\n", "dframe2.dropna(thresh=2)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123
0123NaN
12NaN56
\n", "
" ], "text/plain": [ " 0 1 2 3\n", "0 1 2 3 NaN\n", "1 2 NaN 5 6" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 同じく3個以上\n", "dframe2.dropna(thresh=3)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123
01231
12156
21719
31111
\n", "
" ], "text/plain": [ " 0 1 2 3\n", "0 1 2 3 1\n", "1 2 1 5 6\n", "2 1 7 1 9\n", "3 1 1 1 1" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# null値を埋められます。\n", "dframe2.fillna(1)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123
01233
12156
20729
31123
\n", "
" ], "text/plain": [ " 0 1 2 3\n", "0 1 2 3 3\n", "1 2 1 5 6\n", "2 0 7 2 9\n", "3 1 1 2 3" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 列ごとに埋める値を変えられます。\n", "dframe2.fillna({0:0,1:1,2:2,3:3})" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123
0123NaN
12NaN56
2NaN7NaN9
31NaNNaNNaN
\n", "
" ], "text/plain": [ " 0 1 2 3\n", "0 1 2 3 NaN\n", "1 2 NaN 5 6\n", "2 NaN 7 NaN 9\n", "3 1 NaN NaN NaN" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dframe2" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# inplace=Trueにすると、元のDataFrameを変更してくれます。\n", "# dframe2 = dframe2.fillna(....)と同じ\n", "dframe2.fillna(0,inplace=True)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123
01230
12056
20709
31000
\n", "
" ], "text/plain": [ " 0 1 2 3\n", "0 1 2 3 0\n", "1 2 0 5 6\n", "2 0 7 0 9\n", "3 1 0 0 0" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dframe2" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.4.3" } }, "nbformat": 4, "nbformat_minor": 0 }