In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
In [3]:
dframe_wine = pd.read_csv('winequality-red.csv',sep=';')
dframe_wine.head()
Out[3]:
fixed acidity volatile acidity citric acid residual sugar chlorides free sulfur dioxide total sulfur dioxide density pH sulphates alcohol quality
0 7.4 0.70 0.00 1.9 0.076 11 34 0.9978 3.51 0.56 9.4 5
1 7.8 0.88 0.00 2.6 0.098 25 67 0.9968 3.20 0.68 9.8 5
2 7.8 0.76 0.04 2.3 0.092 15 54 0.9970 3.26 0.65 9.8 5
3 11.2 0.28 0.56 1.9 0.075 17 60 0.9980 3.16 0.58 9.8 6
4 7.4 0.70 0.00 1.9 0.076 11 34 0.9978 3.51 0.56 9.4 5
In [4]:
def ranker(df):
    df['alc_content_rank'] = np.arange(len(df)) + 1
    return df
In [6]:
# アルコール度数で並べ替えます。
dframe_wine.sort('alcohol',ascending=False,inplace=True)
# groupbyのあと、先ほどの関数を適用します。
dframe_wine = dframe_wine.groupby('quality').apply(ranker)
In [7]:
dframe_wine.head()
Out[7]:
fixed acidity volatile acidity citric acid residual sugar chlorides free sulfur dioxide total sulfur dioxide density pH sulphates alcohol quality alc_content_rank
652 15.9 0.36 0.65 7.5 0.096 22 71 0.99760 2.98 0.84 14.9 5 1
588 5.0 0.42 0.24 2.0 0.060 19 50 0.99170 3.72 0.74 14.0 8 1
142 5.2 0.34 0.00 1.8 0.050 27 63 0.99160 3.68 0.79 14.0 6 1
144 5.2 0.34 0.00 1.8 0.050 27 63 0.99160 3.68 0.79 14.0 6 2
1270 5.0 0.38 0.01 1.6 0.048 26 60 0.99084 3.70 0.75 14.0 6 3
In [9]:
# qualityごとの本数を計算します。
num_of_qual = dframe_wine['quality'].value_counts()
num_of_qual
Out[9]:
5    681
6    638
7    199
4     53
8     18
3     10
dtype: int64
In [13]:
# それぞれのランクから、一番アルコール度数が高いワインを抽出します。
dframe_wine[dframe_wine.alc_content_rank == 1].sort('quality')
Out[13]:
fixed acidity volatile acidity citric acid residual sugar chlorides free sulfur dioxide total sulfur dioxide density pH sulphates alcohol quality alc_content_rank
899 8.3 1.02 0.02 3.4 0.084 6 11 0.99892 3.48 0.49 11.0 3 1
45 4.6 0.52 0.15 2.1 0.054 8 65 0.99340 3.90 0.56 13.1 4 1
652 15.9 0.36 0.65 7.5 0.096 22 71 0.99760 2.98 0.84 14.9 5 1
142 5.2 0.34 0.00 1.8 0.050 27 63 0.99160 3.68 0.79 14.0 6 1
821 4.9 0.42 0.00 2.1 0.048 16 42 0.99154 3.71 0.74 14.0 7 1
588 5.0 0.42 0.24 2.0 0.060 19 50 0.99170 3.72 0.74 14.0 8 1