import numpy as np
import pandas as pd
from pandas import Series,DataFrame
# サンプルデータは、次のURLからダウンロードできます。
url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/'
# 保存したデータを読み込みます。
dframe_wine = pd.read_csv('winequality-red.csv',sep=';')
dframe_wine.head()
fixed acidity | volatile acidity | citric acid | residual sugar | chlorides | free sulfur dioxide | total sulfur dioxide | density | pH | sulphates | alcohol | quality | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 7.4 | 0.70 | 0.00 | 1.9 | 0.076 | 11 | 34 | 0.9978 | 3.51 | 0.56 | 9.4 | 5 |
1 | 7.8 | 0.88 | 0.00 | 2.6 | 0.098 | 25 | 67 | 0.9968 | 3.20 | 0.68 | 9.8 | 5 |
2 | 7.8 | 0.76 | 0.04 | 2.3 | 0.092 | 15 | 54 | 0.9970 | 3.26 | 0.65 | 9.8 | 5 |
3 | 11.2 | 0.28 | 0.56 | 1.9 | 0.075 | 17 | 60 | 0.9980 | 3.16 | 0.58 | 9.8 | 6 |
4 | 7.4 | 0.70 | 0.00 | 1.9 | 0.076 | 11 | 34 | 0.9978 | 3.51 | 0.56 | 9.4 | 5 |
# 平均アルコール度数
dframe_wine['alcohol'].mean()
10.422983114446502
# 関数を定義します。
def max_to_min(arr):
return arr.max() - arr.min()
# ワインのqualityでまとめます。
wino = dframe_wine.groupby('quality')
wino.describe()
alcohol | chlorides | citric acid | density | fixed acidity | free sulfur dioxide | pH | residual sugar | sulphates | total sulfur dioxide | volatile acidity | ||
---|---|---|---|---|---|---|---|---|---|---|---|---|
quality | ||||||||||||
3 | count | 10.000000 | 10.000000 | 10.000000 | 10.000000 | 10.000000 | 10.000000 | 10.000000 | 10.000000 | 10.000000 | 10.000000 | 10.000000 |
mean | 9.955000 | 0.122500 | 0.171000 | 0.997464 | 8.360000 | 11.000000 | 3.398000 | 2.635000 | 0.570000 | 24.900000 | 0.884500 | |
std | 0.818009 | 0.066241 | 0.250664 | 0.002002 | 1.770875 | 9.763879 | 0.144052 | 1.401596 | 0.122020 | 16.828877 | 0.331256 | |
min | 8.400000 | 0.061000 | 0.000000 | 0.994710 | 6.700000 | 3.000000 | 3.160000 | 1.200000 | 0.400000 | 9.000000 | 0.440000 | |
25% | 9.725000 | 0.079000 | 0.005000 | 0.996150 | 7.150000 | 5.000000 | 3.312500 | 1.875000 | 0.512500 | 12.500000 | 0.647500 | |
50% | 9.925000 | 0.090500 | 0.035000 | 0.997565 | 7.500000 | 6.000000 | 3.390000 | 2.100000 | 0.545000 | 15.000000 | 0.845000 | |
75% | 10.575000 | 0.143000 | 0.327500 | 0.998770 | 9.875000 | 14.500000 | 3.495000 | 3.100000 | 0.615000 | 42.500000 | 1.010000 | |
max | 11.000000 | 0.267000 | 0.660000 | 1.000800 | 11.600000 | 34.000000 | 3.630000 | 5.700000 | 0.860000 | 49.000000 | 1.580000 | |
4 | count | 53.000000 | 53.000000 | 53.000000 | 53.000000 | 53.000000 | 53.000000 | 53.000000 | 53.000000 | 53.000000 | 53.000000 | 53.000000 |
mean | 10.265094 | 0.090679 | 0.174151 | 0.996542 | 7.779245 | 12.264151 | 3.381509 | 2.694340 | 0.596415 | 36.245283 | 0.693962 | |
std | 0.934776 | 0.076192 | 0.201030 | 0.001575 | 1.626624 | 9.025926 | 0.181441 | 1.789436 | 0.239391 | 27.583374 | 0.220110 | |
min | 9.000000 | 0.045000 | 0.000000 | 0.993400 | 4.600000 | 3.000000 | 2.740000 | 1.300000 | 0.330000 | 7.000000 | 0.230000 | |
25% | 9.600000 | 0.067000 | 0.030000 | 0.995650 | 6.800000 | 6.000000 | 3.300000 | 1.900000 | 0.490000 | 14.000000 | 0.530000 | |
50% | 10.000000 | 0.080000 | 0.090000 | 0.996500 | 7.500000 | 11.000000 | 3.370000 | 2.100000 | 0.560000 | 26.000000 | 0.670000 | |
75% | 11.000000 | 0.089000 | 0.270000 | 0.997450 | 8.400000 | 15.000000 | 3.500000 | 2.800000 | 0.600000 | 49.000000 | 0.870000 | |
max | 13.100000 | 0.610000 | 1.000000 | 1.001000 | 12.500000 | 41.000000 | 3.900000 | 12.900000 | 2.000000 | 119.000000 | 1.130000 | |
5 | count | 681.000000 | 681.000000 | 681.000000 | 681.000000 | 681.000000 | 681.000000 | 681.000000 | 681.000000 | 681.000000 | 681.000000 | 681.000000 |
mean | 9.899706 | 0.092736 | 0.243686 | 0.997104 | 8.167254 | 16.983847 | 3.304949 | 2.528855 | 0.620969 | 56.513950 | 0.577041 | |
std | 0.736521 | 0.053707 | 0.180003 | 0.001589 | 1.563988 | 10.955446 | 0.150618 | 1.359753 | 0.171062 | 36.993116 | 0.164801 | |
min | 8.500000 | 0.039000 | 0.000000 | 0.992560 | 5.000000 | 3.000000 | 2.880000 | 1.200000 | 0.370000 | 6.000000 | 0.180000 | |
25% | 9.400000 | 0.074000 | 0.090000 | 0.996200 | 7.100000 | 9.000000 | 3.200000 | 1.900000 | 0.530000 | 26.000000 | 0.460000 | |
50% | 9.700000 | 0.081000 | 0.230000 | 0.997000 | 7.800000 | 15.000000 | 3.300000 | 2.200000 | 0.580000 | 47.000000 | 0.580000 | |
75% | 10.200000 | 0.094000 | 0.360000 | 0.997900 | 8.900000 | 23.000000 | 3.400000 | 2.600000 | 0.660000 | 84.000000 | 0.670000 | |
max | 14.900000 | 0.611000 | 0.790000 | 1.003150 | 15.900000 | 68.000000 | 3.740000 | 15.500000 | 1.980000 | 155.000000 | 1.330000 | |
6 | count | 638.000000 | 638.000000 | 638.000000 | 638.000000 | 638.000000 | 638.000000 | 638.000000 | 638.000000 | 638.000000 | 638.000000 | 638.000000 |
mean | 10.629519 | 0.084956 | 0.273824 | 0.996615 | 8.347179 | 15.711599 | 3.318072 | 2.477194 | 0.675329 | 40.869906 | 0.497484 | |
std | 1.049639 | 0.039563 | 0.195108 | 0.002000 | 1.797849 | 9.940911 | 0.153995 | 1.441576 | 0.158650 | 25.038250 | 0.160962 | |
min | 8.400000 | 0.034000 | 0.000000 | 0.990070 | 4.700000 | 1.000000 | 2.860000 | 0.900000 | 0.400000 | 6.000000 | 0.160000 | |
25% | 9.800000 | 0.068250 | 0.090000 | 0.995402 | 7.000000 | 8.000000 | 3.220000 | 1.900000 | 0.580000 | 23.000000 | 0.380000 | |
50% | 10.500000 | 0.078000 | 0.260000 | 0.996560 | 7.900000 | 14.000000 | 3.320000 | 2.200000 | 0.640000 | 35.000000 | 0.490000 | |
75% | 11.300000 | 0.088000 | 0.430000 | 0.997893 | 9.400000 | 21.000000 | 3.410000 | 2.500000 | 0.750000 | 54.000000 | 0.600000 | |
max | 14.000000 | 0.415000 | 0.780000 | 1.003690 | 14.300000 | 72.000000 | 4.010000 | 15.400000 | 1.950000 | 165.000000 | 1.040000 | |
7 | count | 199.000000 | 199.000000 | 199.000000 | 199.000000 | 199.000000 | 199.000000 | 199.000000 | 199.000000 | 199.000000 | 199.000000 | 199.000000 |
mean | 11.465913 | 0.076588 | 0.375176 | 0.996104 | 8.872362 | 14.045226 | 3.290754 | 2.720603 | 0.741256 | 35.020101 | 0.403920 | |
std | 0.961933 | 0.029456 | 0.194432 | 0.002176 | 1.992483 | 10.175255 | 0.150101 | 1.371509 | 0.135639 | 33.191206 | 0.145224 | |
min | 9.200000 | 0.012000 | 0.000000 | 0.990640 | 4.900000 | 3.000000 | 2.920000 | 1.200000 | 0.390000 | 7.000000 | 0.120000 | |
25% | 10.800000 | 0.062000 | 0.305000 | 0.994765 | 7.400000 | 6.000000 | 3.200000 | 2.000000 | 0.650000 | 17.500000 | 0.300000 | |
50% | 11.500000 | 0.073000 | 0.400000 | 0.995770 | 8.800000 | 11.000000 | 3.280000 | 2.300000 | 0.740000 | 27.000000 | 0.370000 | |
75% | 12.100000 | 0.087000 | 0.490000 | 0.997360 | 10.100000 | 18.000000 | 3.380000 | 2.750000 | 0.830000 | 43.000000 | 0.485000 | |
max | 14.000000 | 0.358000 | 0.760000 | 1.003200 | 15.600000 | 54.000000 | 3.780000 | 8.900000 | 1.360000 | 289.000000 | 0.915000 | |
8 | count | 18.000000 | 18.000000 | 18.000000 | 18.000000 | 18.000000 | 18.000000 | 18.000000 | 18.000000 | 18.000000 | 18.000000 | 18.000000 |
mean | 12.094444 | 0.068444 | 0.391111 | 0.995212 | 8.566667 | 13.277778 | 3.267222 | 2.577778 | 0.767778 | 33.444444 | 0.423333 | |
std | 1.224011 | 0.011678 | 0.199526 | 0.002378 | 2.119656 | 11.155613 | 0.200640 | 1.295038 | 0.115379 | 25.433240 | 0.144914 | |
min | 9.800000 | 0.044000 | 0.030000 | 0.990800 | 5.000000 | 3.000000 | 2.880000 | 1.400000 | 0.630000 | 12.000000 | 0.260000 | |
25% | 11.325000 | 0.062000 | 0.302500 | 0.994175 | 7.250000 | 6.000000 | 3.162500 | 1.800000 | 0.690000 | 16.000000 | 0.335000 | |
50% | 12.150000 | 0.070500 | 0.420000 | 0.994940 | 8.250000 | 7.500000 | 3.230000 | 2.100000 | 0.740000 | 21.500000 | 0.370000 | |
75% | 12.875000 | 0.075500 | 0.530000 | 0.997200 | 10.225000 | 16.500000 | 3.350000 | 2.600000 | 0.820000 | 43.000000 | 0.472500 | |
max | 14.000000 | 0.086000 | 0.720000 | 0.998800 | 12.600000 | 42.000000 | 3.720000 | 6.400000 | 1.100000 | 88.000000 | 0.850000 |
wino.agg(max_to_min)
fixed acidity | volatile acidity | citric acid | residual sugar | chlorides | free sulfur dioxide | total sulfur dioxide | density | pH | sulphates | alcohol | |
---|---|---|---|---|---|---|---|---|---|---|---|
quality | |||||||||||
3 | 4.9 | 1.140 | 0.66 | 4.5 | 0.206 | 31 | 40 | 0.00609 | 0.47 | 0.46 | 2.6 |
4 | 7.9 | 0.900 | 1.00 | 11.6 | 0.565 | 38 | 112 | 0.00760 | 1.16 | 1.67 | 4.1 |
5 | 10.9 | 1.150 | 0.79 | 14.3 | 0.572 | 65 | 149 | 0.01059 | 0.86 | 1.61 | 6.4 |
6 | 9.6 | 0.880 | 0.78 | 14.5 | 0.381 | 71 | 159 | 0.01362 | 1.15 | 1.55 | 5.6 |
7 | 10.7 | 0.795 | 0.76 | 7.7 | 0.346 | 51 | 282 | 0.01256 | 0.86 | 0.97 | 4.8 |
8 | 7.6 | 0.590 | 0.69 | 5.0 | 0.042 | 39 | 76 | 0.00800 | 0.84 | 0.47 | 4.2 |
# agg関数には文字列も渡せます。
wino.agg('mean')
fixed acidity | volatile acidity | citric acid | residual sugar | chlorides | free sulfur dioxide | total sulfur dioxide | density | pH | sulphates | alcohol | |
---|---|---|---|---|---|---|---|---|---|---|---|
quality | |||||||||||
3 | 8.360000 | 0.884500 | 0.171000 | 2.635000 | 0.122500 | 11.000000 | 24.900000 | 0.997464 | 3.398000 | 0.570000 | 9.955000 |
4 | 7.779245 | 0.693962 | 0.174151 | 2.694340 | 0.090679 | 12.264151 | 36.245283 | 0.996542 | 3.381509 | 0.596415 | 10.265094 |
5 | 8.167254 | 0.577041 | 0.243686 | 2.528855 | 0.092736 | 16.983847 | 56.513950 | 0.997104 | 3.304949 | 0.620969 | 9.899706 |
6 | 8.347179 | 0.497484 | 0.273824 | 2.477194 | 0.084956 | 15.711599 | 40.869906 | 0.996615 | 3.318072 | 0.675329 | 10.629519 |
7 | 8.872362 | 0.403920 | 0.375176 | 2.720603 | 0.076588 | 14.045226 | 35.020101 | 0.996104 | 3.290754 | 0.741256 | 11.465913 |
8 | 8.566667 | 0.423333 | 0.391111 | 2.577778 | 0.068444 | 13.277778 | 33.444444 | 0.995212 | 3.267222 | 0.767778 | 12.094444 |
# 元のデータに戻ります。
dframe_wine.head()
fixed acidity | volatile acidity | citric acid | residual sugar | chlorides | free sulfur dioxide | total sulfur dioxide | density | pH | sulphates | alcohol | quality | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 7.4 | 0.70 | 0.00 | 1.9 | 0.076 | 11 | 34 | 0.9978 | 3.51 | 0.56 | 9.4 | 5 |
1 | 7.8 | 0.88 | 0.00 | 2.6 | 0.098 | 25 | 67 | 0.9968 | 3.20 | 0.68 | 9.8 | 5 |
2 | 7.8 | 0.76 | 0.04 | 2.3 | 0.092 | 15 | 54 | 0.9970 | 3.26 | 0.65 | 9.8 | 5 |
3 | 11.2 | 0.28 | 0.56 | 1.9 | 0.075 | 17 | 60 | 0.9980 | 3.16 | 0.58 | 9.8 | 6 |
4 | 7.4 | 0.70 | 0.00 | 1.9 | 0.076 | 11 | 34 | 0.9978 | 3.51 | 0.56 | 9.4 | 5 |
# 新しい列を付け加えます。
dframe_wine['qual/alc ratio'] = dframe_wine['quality']/dframe_wine['alcohol']
dframe_wine.head()
fixed acidity | volatile acidity | citric acid | residual sugar | chlorides | free sulfur dioxide | total sulfur dioxide | density | pH | sulphates | alcohol | quality | qual/alc ratio | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 7.4 | 0.70 | 0.00 | 1.9 | 0.076 | 11 | 34 | 0.9978 | 3.51 | 0.56 | 9.4 | 5 | 0.531915 |
1 | 7.8 | 0.88 | 0.00 | 2.6 | 0.098 | 25 | 67 | 0.9968 | 3.20 | 0.68 | 9.8 | 5 | 0.510204 |
2 | 7.8 | 0.76 | 0.04 | 2.3 | 0.092 | 15 | 54 | 0.9970 | 3.26 | 0.65 | 9.8 | 5 | 0.510204 |
3 | 11.2 | 0.28 | 0.56 | 1.9 | 0.075 | 17 | 60 | 0.9980 | 3.16 | 0.58 | 9.8 | 6 | 0.612245 |
4 | 7.4 | 0.70 | 0.00 | 1.9 | 0.076 | 11 | 34 | 0.9978 | 3.51 | 0.56 | 9.4 | 5 | 0.531915 |
# groupbyの代わりにPivotテーブルを使うこともできます。
dframe_wine.pivot_table(index=['quality'])
alcohol | chlorides | citric acid | density | fixed acidity | free sulfur dioxide | pH | qual/alc ratio | residual sugar | sulphates | total sulfur dioxide | volatile acidity | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
quality | ||||||||||||
3 | 9.955000 | 0.122500 | 0.171000 | 0.997464 | 8.360000 | 11.000000 | 3.398000 | 0.303286 | 2.635000 | 0.570000 | 24.900000 | 0.884500 |
4 | 10.265094 | 0.090679 | 0.174151 | 0.996542 | 7.779245 | 12.264151 | 3.381509 | 0.392724 | 2.694340 | 0.596415 | 36.245283 | 0.693962 |
5 | 9.899706 | 0.092736 | 0.243686 | 0.997104 | 8.167254 | 16.983847 | 3.304949 | 0.507573 | 2.528855 | 0.620969 | 56.513950 | 0.577041 |
6 | 10.629519 | 0.084956 | 0.273824 | 0.996615 | 8.347179 | 15.711599 | 3.318072 | 0.569801 | 2.477194 | 0.675329 | 40.869906 | 0.497484 |
7 | 11.465913 | 0.076588 | 0.375176 | 0.996104 | 8.872362 | 14.045226 | 3.290754 | 0.614855 | 2.720603 | 0.741256 | 35.020101 | 0.403920 |
8 | 12.094444 | 0.068444 | 0.391111 | 0.995212 | 8.566667 | 13.277778 | 3.267222 | 0.668146 | 2.577778 | 0.767778 | 33.444444 | 0.423333 |
%matplotlib inline
dframe_wine.plot(kind='scatter',x='quality',y='alcohol')
<matplotlib.axes._subplots.AxesSubplot at 0x108492dd8>