# サンプルデータの作製

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# 平面上で適当に200個の点を選ぶ 座標値(x,y)の200個の組 X[200,2]
X = np.random.randn(200, 2)
# ターゲットt[x[i],y[i]]の値を x[i]+ y[i] > 1 のときtrue, それ以外 false => それデータとしてtrue or falseの境界をsvcで見つける
# 右辺の評価式がtrueかfalseかを配列としてyに与える (ここでは200個の一次元は配列)
y = X[:, 0] + X[:, 1] > 1

# 位置をランダムに動かす: 練習として試してみよう
noise = (np.random.rand(200,2)-0.5)*2.5
X = X + noise

# We display the points with their true labels.
fig, ax = plt.subplots(1, 1, figsize=(5, 5))
cmap = plt.cm.PiYG # Blues
ax.scatter(X[:, 0], X[:, 1],
           s=50, c=.5 + .5 * y, # c, edgecolorは色指定, cmapは色調の指定
           edgecolors='k',
           lw=1, cmap=cmap,
           vmin=0, vmax=1)

<matplotlib.collections.PathCollection at 0x7f3fccbd87c0>


import sklearn.svm as svm
# We train the classifier.
#est = svm.LinearSVC()
est = svm.SVC(kernel='linear')
est.fit(X, y); # サポートベクトルマシーンに学習させる


# We generate a grid in the square [-3,3 ]^2.
xx, yy = np.meshgrid(np.linspace(-3, 3, 500),
                     np.linspace(-3, 3, 500))

# This function takes a SVM estimator as input.

def plot_decision_function(est, title):
    # evaluate the decision function on the grid.
    Z = est.decision_function(np.c_[xx.ravel(),
                                    yy.ravel()])
    Z = Z.reshape(xx.shape)
    cmap = plt.cm.PiYG # Blues

    # We display the decision function on the grid.
    fig, ax = plt.subplots(1, 1, figsize=(8, 8))
    # ax.imshow(Z,
    #          extent=(xx.min(), xx.max(),
    #                  yy.min(), yy.max()),
    #          aspect='auto',
    #          origin='lower',
    #          cmap=cmap)

    # We display the boundaries.
    ax.contour(xx, yy, Z, levels=[0],
               linewidths=2,
               colors='k')

    # We display the points with their true labels.
    ax.scatter(X[:, 0], X[:, 1],
               s=50, c=.5 + .5 * y,
               edgecolors='k',
               lw=1, cmap=cmap,
               vmin=0, vmax=1)
    ax.axhline(0, color='k', ls='--')
    ax.axvline(0, color='k', ls='--')
    ax.axis([-3, 3, -3, 3])
    ax.set_axis_off()
    ax.set_title(title)
    ax.scatter(est.support_vectors_[:,0], est.support_vectors_[:,1],
                facecolors='none', edgecolors='r',s=30)
    # 注："support_vectors_"にサポートベクトルに該当するデータが入っている


ax = plot_decision_function(
    est, "Linearly separable, linear SVC")


import sklearn.model_selection as ms

y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0) # 第１象限と第３象限のみfalse

# We train the classifier.
est = ms.GridSearchCV(svm.LinearSVC(),
                      {'C': np.logspace(-3., 3., 10)})
est.fit(X, y)
print("Score: {0:.1f}".format(
      ms.cross_val_score(est, X, y).mean()))

# We plot the decision function.
ax = plot_decision_function(
    est, "XOR, linear SVC")


y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0)

# determine the hyper-parameters C and gamma, and then create the SVM machine object with the optimal parameters
#est = ms.GridSearchCV(
#    svm.SVC(), {'C': np.logspace(-3., 3., 10),
#                'gamma': np.logspace(-3., 3., 10)})
est = svm.SVC(kernel="rbf")
est.fit(X, y)
print("Score: {0:.3f}".format(
      ms.cross_val_score(est, X, y).mean()))

#plot_decision_function(
#    est.best_estimator_, "XOR, non-linear SVC")
plot_decision_function(
    est, "XOR, non-linear SVC")

Score: 0.890


# データ作成用関数
import random
def make_data_by_sin_gaussian(x):
    # y=sin (x)を計算し、ガウス分布に従うノイズを加える
    y = np.sin(x)
    e = [random.gauss(0, 0.3) for i in range(len(y))]
    # e = np.random.randn(len(x))*0.2
    y += e
    return y


import numpy as np
import random
import matplotlib.pyplot as plt
from sklearn import svm

x_max = 9.  # 予測の範囲の上限

# トレーニングデータ作成
n_tr = 40
x = np.linspace(0., np.pi*2., n_tr) # リスト 0から２πまでをn_tr等分した値を一次元配列
y = make_data_by_sin_gaussian(x)
X = x[:, np.newaxis]
   
# 学習を行う
svr = svm.SVR(kernel='rbf')
svr.fit(X, y)
  
# テストデータとして説明変数(x_test)を[0,x_max]の範囲の1000の点を用意し、推定値を計算し(結果はy_test)、
# 回帰曲線(x_test,y_test)を描く
x_test = np.linspace(0, x_max, 1000)
y_test = svr.predict(x_test[:, np.newaxis])
    
#グラフにプロットする。
plt.scatter(x, y)
plt.plot(x_test, y_test)
# サポートベクトルの表示（データポイントに赤で上書き）
plt.scatter(X[svr.support_], y[svr.support_], c="red") 
plt.show()
# print(X)


import numpy as np
import matplotlib.pyplot as plt
np.random.seed(8)
rng = np.random.RandomState(0)

# Generate sample data
X = 4 * np.pi * np.random.random(100) - 2 * np.pi
y = np.sinc(X)
y += 0.25 * (0.5 - rng.rand(X.shape[0]))  # add noise

# X = X[:, None]

plt.plot(X, y, "bo")

[<matplotlib.lines.Line2D at 0x7f08731bfd00>]


from sklearn.kernel_ridge import KernelRidge

x_max = 9.  # 予測の範囲の上限
# トレーニングデータ作成
n_tr = 40
x = np.linspace(0., np.pi*2., n_tr) # リスト 0から２πまでをn_tr等分した値を一次元配列
y = make_data_by_sin_gaussian(x)
X = x[:, np.newaxis]

kernel_ridge = KernelRidge(kernel="rbf") # 新しいのはこの部分(svrをこれに入れ替えるだけ)

# 学習を行う
kernel_ridge.fit(X, y)
  
# 回帰曲線を描く
x_test = np.linspace(0, x_max, 10000)
y_test = kernel_ridge.predict(x_test[:, np.newaxis])
    
#グラフにプロットする。
plt.scatter(x, y)
plt.plot(x_test, y_test)
plt.show()


from sklearn_rvm import EMRVR
x_max = 9.  # 予測の範囲の上限

# トレーニングデータ作成
n_tr = 50
x = np.linspace(0., np.pi*2., n_tr) # リスト 0から２πまでをn_tr等分した値を一次元配列
y = make_data_by_sin_gaussian(x)
X = x[:, np.newaxis]
   
# 学習を行う
rvr = EMRVR(kernel='rbf', gamma="scale")
rvr.fit(X, y)
  
# テスト
x_test = np.linspace(0, x_max, 1000)
y_test, y_std = rvr.predict(x_test[:, np.newaxis], return_std=True) # standard dev. can be also obtained if "return_std" is set to be True
    
# plot prediction and the the width of std. dev.
plt.scatter(x, y)
plt.plot(x_test, y_test)
plt.fill_between(x_test, y_test - y_std, y_test + y_std, color="darkorange", alpha=0.2)


# show relevance vectors by red circles
relevance_vectors_idx = rvr.relevance_
plt.scatter(X[relevance_vectors_idx], y[relevance_vectors_idx], s=80, facecolors="none", edgecolors="r",
            label="relevance vectors")

# sin(x)
x_orig = np.linspace(0., x_max, 1000)
plt.plot(x_orig, np.sin(x_orig), color="#a000a0", linestyle="--")
plt.show()


%%html
<link rel="stylesheet" type="text/css" href="custom.css">

scikit-learnによるサポートベクトルマシン分類(SVC)とサポートベクトル回帰(SVR)¶

前置き¶

サポートベクトルマシン概説¶

skitlearnにおけるサポートベクトルマシンの利用例¶

訓練データ (1)¶

サポートベクトル識別 (1) 線形¶

訓練データ(2) 線形な境界を引けない場合¶

非線形変換を含む識別機 (カーネル法の導入)¶

回帰問題に対するサポートベクトルマシンの利用 (SVR: Support Vector Regression)¶

scikit-learn所収のSVRの利用¶

Exercise 5¶

(参考) skitlearnのカーネルリッジ回帰¶

Relevance Vector Machine¶

偏ったデータの場合¶