In [2]:
import numpy as np
from pandas import Series, DataFrame
import pandas as pd
from numpy.random import randn
In [3]:
#簡単なSeriesを作ってみましょう。
ser1 = Series([1,2,3,4],index=['A','B','C','D'])
In [4]:
ser1
Out[4]:
A    1
B    2
C    3
D    4
dtype: int64
In [5]:
#  reindexを使って、indexを変えられます。
ser2 = ser1.reindex(['A','B','C','D','E','F'])
In [7]:
#自動的に、nullが入ります。
ser2
Out[7]:
A     1
B     2
C     3
D     4
E   NaN
F   NaN
dtype: float64
In [8]:
# 新しいindexの値を埋めることもできます。
ser2.reindex(['A','B','C','D','E','F','G'],fill_value=0)
Out[8]:
A     1
B     2
C     3
D     4
E   NaN
F   NaN
G     0
dtype: float64
In [10]:
ser3 = Series(['USA','Mexico','Canada'],index=[0,5,10])

ser3
Out[10]:
0        USA
5     Mexico
10    Canada
dtype: object
In [11]:
# ffillは、forward fillの略です。
ser3.reindex(range(15),method='ffill')
Out[11]:
0        USA
1        USA
2        USA
3        USA
4        USA
5     Mexico
6     Mexico
7     Mexico
8     Mexico
9     Mexico
10    Canada
11    Canada
12    Canada
13    Canada
14    Canada
dtype: object
In [13]:
# 行と列の両方について、Reindexを考えます。
# reshapeを使ってDataFrameを作ってみます。
dframe = DataFrame(randn(25).reshape((5,5)),index=['A','B','D','E','F'],columns=['col1','col2','col3','col4','col5'])
dframe
Out[13]:
col1 col2 col3 col4 col5
A 0.650892 1.571965 1.726236 -0.423697 0.110843
B 1.037495 0.037039 -1.368314 -1.041347 1.337007
D -1.289793 0.652301 0.548770 0.636829 -0.349049
E -0.573696 0.108592 0.863125 -0.109940 -0.577262
F 1.160227 -0.546492 1.172700 0.071298 1.222122
In [18]:
# Cを忘れました。
new_index = ['A','B','C','D','E','F']
dframe2 = dframe.reindex(new_index)
dframe2
Out[18]:
col1 col2 col3 col4 col5
A 0.650892 1.571965 1.726236 -0.423697 0.110843
B 1.037495 0.037039 -1.368314 -1.041347 1.337007
C NaN NaN NaN NaN NaN
D -1.289793 0.652301 0.548770 0.636829 -0.349049
E -0.573696 0.108592 0.863125 -0.109940 -0.577262
F 1.160227 -0.546492 1.172700 0.071298 1.222122
In [16]:
#列にも同じような操作ができます。
new_columns = ['col1','col2','col3','col4','col5','col6']

dframe2.reindex(columns=new_columns)
Out[16]:
col1 col2 col3 col4 col5 col6
A 0.650892 1.571965 1.726236 -0.423697 0.110843 NaN
B 1.037495 0.037039 -1.368314 -1.041347 1.337007 NaN
C NaN NaN NaN NaN NaN NaN
D -1.289793 0.652301 0.548770 0.636829 -0.349049 NaN
E -0.573696 0.108592 0.863125 -0.109940 -0.577262 NaN
F 1.160227 -0.546492 1.172700 0.071298 1.222122 NaN
In [17]:
# ixを使うと、素早くReindexが可能です。
dframe
Out[17]:
col1 col2 col3 col4 col5
A 0.650892 1.571965 1.726236 -0.423697 0.110843
B 1.037495 0.037039 -1.368314 -1.041347 1.337007
D -1.289793 0.652301 0.548770 0.636829 -0.349049
E -0.573696 0.108592 0.863125 -0.109940 -0.577262
F 1.160227 -0.546492 1.172700 0.071298 1.222122
In [20]:
dframe.ix[new_index, new_columns]
Out[20]:
col1 col2 col3 col4 col5 col6
A 0.650892 1.571965 1.726236 -0.423697 0.110843 NaN
B 1.037495 0.037039 -1.368314 -1.041347 1.337007 NaN
C NaN NaN NaN NaN NaN NaN
D -1.289793 0.652301 0.548770 0.636829 -0.349049 NaN
E -0.573696 0.108592 0.863125 -0.109940 -0.577262 NaN
F 1.160227 -0.546492 1.172700 0.071298 1.222122 NaN
In [ ]: