import numpy as np
import random

def make_data_by_sin_gaussian(x):
    # y=sin (x)を計算し、ガウス分布に従うノイズを加える
    y = np.sin(x) # argument is a 1-d array -> returned variable is a 1-d array 
    e = [random.gauss(0, 0.5) for i in range(len(y))] # 内包表記 (generate random numbers using list comprehension)
    # e = np.random.randn(len(x))*0.2
    y += e
    return y


# データ数
n_tr = 10
x = np.linspace(0., np.pi*2., n_tr) # リスト 0から２πまでをn_tr等分した値を一次元配列


y = make_data_by_sin_gaussian(x)

# データの描画
import matplotlib.pyplot as plt
plt.plot(x,y, "ob")

# sinの描画
#n_tr = 100
#x = np.linspace(0., np.pi*2., n_tr)
#plt.plot(x, np.sin(x))

[<matplotlib.lines.Line2D at 0x7fce849c6190>]

e = [random.gauss(0, 0.2) for i in range(len(y))]


# トレーニングデータの数 (上で行っていれば必要ない)
n_tr = 30
x = np.linspace(0., np.pi*2., n_tr)

# データの作成
y = make_data_by_sin_gaussian(x) #　データ


# linear_modelへ適用するために
# numpyの配列形状を変更

print(x)
print(x.reshape(-1,1)) # それぞれのデータを各行に（つまり「転置」）


from sklearn import linear_model as lm
# モデル（オブジェクト）を作成
rlf = lm.LinearRegression()

x_tr = x.reshape(-1,1)
y_tr = y.reshape(-1,1)

#フィッティング (データは縦ベクトルとして与える必要がある)
rlf.fit(x_tr, y_tr)

# 一次の係数の表示
print(rlf.coef_)
# 切片の表示
print(rlf.intercept_)

[[-0.28510259]]
[0.8396268]


# 直線をデータと合わせて表示

# 直線なので(x0,y0) - (x1, y1) を引くために、[x0,x1]を用意し、[y0,y1]を作成
x_fit = np.array([0, np.pi*2])
y_fit = rlf.predict(x_fit.reshape(-1,1))

plt.plot(x_tr,y, 'ob')
plt.plot(x_fit, y_fit)

[<matplotlib.lines.Line2D at 0x7fce848dc760>]


# 説明変数の作成 (x, x^2, ...x^m) 行列
m=20 # フィッティング関数の次数
power_matrix_x = np.vander(x, m+1)


# show the vander matrix
power_matrix_x


import sklearn.linear_model as lm
import matplotlib.pyplot as plt

# 関数フィッティング
lrp = lm.LinearRegression()
lrp.fit(power_matrix_x, y) # 以上が学習（フィッティングにより係数を決定）
# 予測 (下の実線)
x_lrp = np.linspace(0., np.pi*2.0, 100)   # 検証用のxのデータ
y_lrp = lrp.predict(np.vander(x_lrp, m+1)) # using learned machine, predict y for x

# データの描画
plt.plot(x_tr, y, "ob", ms=8)
# 近似曲線の描画
plt.plot(x_lrp, y_lrp)
# 基にしたsin(x)をプロット
plt.plot(x_lrp, np.sin(x_lrp))

[<matplotlib.lines.Line2D at 0x7fce843395e0>]


# 高次のべき多項式による近似
plt.plot(x, y , 'ok', ms=7) # データの描画
for deg in [3,4, 10]: # 複数の次数で近似
    lrp.fit(np.vander(x, deg +1), y)
    y_lrp = lrp.predict(np.vander(x_lrp, deg+1))
    plt.plot(x_lrp, y_lrp,
            label='degree ' + str(deg))
    plt.legend(loc=2)
    plt.ylim(-1.2, 3.0)
    # モデルの係数表示
    #print('  '. join(['%.2f' % c for c in lrp.coef_]))


# もっと高次に
plt.plot(x, y , 'ok', ms=7) # データの描画
for deg in [8,n_tr-1]: # 複数の次数で近似
    lrp.fit(np.vander(x, deg +1), y)
    y_lrp = lrp.predict(np.vander(x_lrp, deg+1))
    plt.plot(x_lrp, y_lrp,
            label='degree ' + str(deg))
    plt.legend()
    plt.ylim(-1.2, 1.2)


lrp.coef_

array([ 5.48260586e-03, -1.51855220e-01,  1.76568595e+00, -1.12026688e+01,
        4.21742812e+01, -9.57789028e+01,  1.27057293e+02, -8.95749940e+01,
        2.63061958e+01,  0.00000000e+00])


# show again the vander matrix created above
power_matrix_x

array([[  0.        ,   0.        ,   0.        ,   1.        ],
       [  0.34026092,   0.48738787,   0.6981317 ,   1.        ],
       [  2.72208739,   1.94955149,   1.3962634 ,   1.        ],
       [  9.18704494,   4.38649084,   2.0943951 ,   1.        ],
       [ 21.77669912,   7.79820595,   2.7925268 ,   1.        ],
       [ 42.53261547,  12.18469679,   3.4906585 ,   1.        ],
       [ 73.49635954,  17.54596338,   4.1887902 ,   1.        ],
       [116.70949686,  23.88200571,   4.88692191,   1.        ],
       [174.21359298,  31.19282379,   5.58505361,   1.        ],
       [248.05021344,  39.4784176 ,   6.28318531,   1.        ]])

関数近似¶

データの準備¶

べき多項式関数近似¶

線形近似¶

サンプルデータの作成¶

scikit-learnのlinear_modelを利用¶

べき多項式近似¶

補足 (過学習について)¶

演習課題2 (Exercise 2)¶

補足： Mathematical Description to calculate the coefficients of the fitting function¶