Home>

I am trying to implement the least squares method while changing the siple code on the following site.
https://medium.com/micin-developers/decipher-github-lr-sw-40e519a13c0a

Sample code
# Import required libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
#Generation of observation points (generate y with noise for x at equal intervals)
np.random.seed (0)
x = (np.arange (51)/50) [:, np.newaxis]
noise = (np.random.rand (51)/3) [:, np.newaxis]
y = (x * 2) + noise
# ------------------------------------------------- ---
Get an approximate straight line with the solver of # scikit-learn
clf = LinearRegression (fit_intercept = True)
clf.fit (X = x, y = y)
y_hat = clf.predict (x)
# ------------------------------------------------- ---
# Solve the least squares equation to get the coefficients of the approximate linear equation
x_ = np.concatenate ([x, np.ones (np.shape (x))],

 axis = 1)
w = np.dot (np.linalg.inv (np.dot (x_.T, x_)), np.dot (x_.T, y))
y_hat_ = np.dot (x_, w)
# ------------------------------------------------- ---
# Draw the approximate straight line obtained by learning with x, y
cmap = plt.get_cmap ("tab10")
plt.figure (figsize = (12,16), dpi = 100)
plt.subplot (2, 1, 1)
plt.grid (which ='major', color = [0.7, 0.7, 0.7],linestyle ='-')
plt.scatter (x, y, s = 300, alpha = 0.7, color = cmap (0), label ='observation point')
plt.plot (x, y_hat, linewidth = 10, alpha = 0.7, color = cmap (1), label ='approximate curve by scikit-learn')
plt.plot (x, y_hat_, linewidth = 8, alpha = 0.3, color = cmap (2), label ='approximate curve by least squares', linestyle ='-')
plt.legend (font size = 15, loc ='lower right')
plt.ylim ([0, 3.5])
print (weight = [% .3f,% .3f]'% (clf.coef_, clf.intercept_) solved by'scikit-learn)
print ('weight solved by least squares = [% .3f,% .3f]'% (w [0],

 w [1]))
plt.show ()
Modified code

I just replaced the observation point part of the sample code with the experimental value.

# Import required libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
# measured value
x = np.array ([6.26379, 8.57417, 8.66527, 8.75069, 11.6708, 12.3487, 14.5032, 15.7422, 21.7646, 23.0518, 26.5069, 26.4035, 26.321, 23.0045, 19.2654, 17.9425, 14.5669, 13.513, 10.4902, 9.95136, 9.77395])
y = np.array ([3.709910308, 3.300454417, 3.219869361, 2.879991517, 2.250120678, 2.24981186, 1.859931899, 1.839996231, 1.560029151, 1.360016958, 1.210037387, 1.527926405, 1.320005022, 1.340038138, 1.618120234, 1.410033737, 1.83006856, 1.849465)
# ------------------------------------------------- ---
Get an approximate straight line with the solver of # scikit-learn
clf = LinearRegression (fit_intercept = True)
clf.fit (X = x, y = y)
y_hat = clf.predict (x)
# ------------------------------------------------- ---
# Solve the least squares equation to get the coefficients of the approximate linear equation
x_ = np.concatenate ([x, np.ones (np.shape (x))],

 axis = 1)w = np.dot (np.linalg.inv (np.dot (x_.T, x_)), np.dot (x_.T, y))
y_hat_ = np.dot (x_, w)
# ------------------------------------------------- ---
# Draw the approximate straight line obtained by learning with x, y
cmap = plt.get_cmap ("tab10")
plt.figure (figsize = (12,16), dpi = 100)
plt.subplot (2, 1, 1)
plt.grid (which ='major', color = [0.7, 0.7, 0.7],

 linestyle ='-')
plt.scatter (x, y, s = 300, alpha = 0.7, color = cmap (0), label ='measured')
plt.plot (x, y_hat, linewidth = 10, alpha = 0.7, color = cmap (1), label ='approximate curve by scikit-learn')
plt.plot (x, y_hat_, linewidth = 8, alpha = 0.3, color = cmap (2), label ='approximate curve by least squares', linestyle ='-')
plt.legend (font size = 15, loc ='lower right')
plt.ylim ([0, 3.5])
print (weight = [% .3f,% .3f]'% (clf.coef_, clf.intercept_) solved by'scikit-learn)
print ('weight solved by least squares = [% .3f,% .3f]'% (w [0],

 w [1]))
plt.show ()


When I run this code, I get the following error message:

error
Expected 2D array, got 1D array instead:
array = [6.26379 8.57417 8.66527 8.75069 11.6708 12.3487 14.5032 15.7422
 21.7646 23.0518 26.5069 26.4035 26.321 23.0045 19.2654 17.9425
 14.5669 13.513 10.4902 9.95136 9.77395].
Reshape your data either using array.reshape (-1, 1) if your data has a single feature or array.reshape (1, -1) if it contains a single sample.

In this case, how should the code be modified?
I would appreciate it if you could teach me.
Thanks for your cooperation.

  • Answer # 1

    I don't understand the contents, but I guess from the original code, I think I should add the following code.

    :
    x = np.array ([6.26379, 8.57417, 8.66527, 8.75069, 11.6708, 12.3487, 14.5032, 15.7422, 21.7646, 23.0518, 26.5069, 26.4035, 26.321, 23.0045, 19.2654, 17.9425, 14.5669, 13.513, 10.4902, 9.95136, 9.77395])
    y = np.array ([3.709910308, 3.300454417, 3.219869361, 2.879991517, 2.250120678, 2.24981186, 1.859931899, 1.839996231, 1.560029151, 1.360016958, 1.210037387, 1.527926405, 1.320005022, 1.340038138, 1.618120234, 1.410033737, 1.83006856, 1.849465)
    # Add the following
    x = x [:, np.newaxis]
    y = y [:, np.newaxis]