I have x, x2, y datasets
When performing crossvalidation to determine hyperparameters in X, y regression problems
I want to finally return to y with x being autoscaling and x2 being X = x + x2 without processing.
When I try the following, an error occurs at the stage of X = x + x2.
Probably, there is no data number when extracting x and x2 randomly, but how can I extract the same sample number from two data frames randomly and between data frames?
x:
No.  a  b 

1  3.1 3.1  1.2 
2  10.0  2.2 
3  1.1  1.2 
Four  3.5  3.2 
x2:
No.  c  d 

1  0  1 
2  0  0 
3  0  1 
Four  1  0 
When extracting at random, I want to unify only the data numbers of both x and x2 data.
def double_cross_validation (gs_cv, x, x2, y, outer_fold_number, do_autoscaling = True, random_state = 0):
"" "
Double CrossValidation (DCV)
Estimate yvalues in DCV
Parameters

gs_cv: object of GridSearchCV (sklearn.model_selection.GridSearchCV)
for more details, please go to https://scikitlearn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html
x: numpy.array or pandas.DataFrame
m x n matrix of Xvariables of training data,
m is the number of training sammples and
n is the number of Xvariables
y: numpy.array or pandas.DataFrame
m x 1 vector of a Yvariable of training data
outer_fold_number: int
Fold number in outer CV (fold number in inner CV is included in gs_cv)
do_autoscaling: bool
flag of autoscaling, if True, do autoscaling
random_state: int
random seed, if None, random seed is not set
Returns

estimated_y: numpy.array
estimated yvalues in DCV
"" "
x = np.array (x)
x2 = np.array (x2)
y = np.array (y)
# how to divide datase in outer CV
min_number = math.floor (x.shape [0]/outer_fold_number)
mod_number = x.shape [0] min_number * outer_fold_number
index = np.matlib.repmat (np.arange (1, outer_fold_number + 1, 1), 1, min_number) .ravel ()
if mod_number! = 0:
index = np.r_ [index, np.arange (1, mod_number + 1, 1)]
if random_state! = None:
np.random.seed (random_state)
fold_index_in_outer_cv = np.random.permutation (index)
np.random.seed (0)
estimated_y = np.zeros (len (y))
for fold_number_in_outer_cv in np.arange (1, outer_fold_number + 1, 1):
print (fold_number_in_outer_cv,'/', outer_fold_number)
# divide training data and test data
x_train = x [fold_index_in_outer_cv! = fold_number_in_outer_cv,:]. copy ()
x2_train = x2 [fold_index_in_outer_cv! = fold_number_in_outer_cv,:].copy ()
y_train = y [fold_index_in_outer_cv! = fold_number_in_outer_cv] .copy ()
x_test = x [fold_index_in_outer_cv == fold_number_in_outer_cv,:].copy ()
x2_test = x2 [fold_index_in_outer_cv == fold_number_in_outer_cv,:].copy ()
# shuffle samples
if random_state! = 999:
np.random.seed (0)
random_numbers = np.random.permutation (np.arange (x_train.shape [0]))
x_train = x_train [random_numbers,:]
x2_train = x2_train [random_numbers,:]
y_train = y_train [random_numbers]
np.random.seed (0)
# autoscaling
if do_autoscaling:
autoscaled_x_train_pre = (x_train x_train.mean (axis = 0))/x_train.std (axis = 0, ddof = 1)
autoscaled_x_train = np.concatenate ([autoscaled_x_train_pre, x2_train],axis = 1)
autoscaled_y_train = (y_train y_train.mean ())/y_train.std (ddof = 1)
autoscaled_x_test_pre = (x_test x_train.mean (axis = 0))/x_train.std (axis = 0, ddof = 1)
autoscaled_x_train = np.concatenate ([[autoscaled_x_test_pre],
[x2_test]],
axis = 1)
else: else:
autoscaled_x_train = x_train.copy ()
autoscaled_y_train = y_train.copy ()
autoscaled_x_test = x_test.copy ()
# inner CV
gs_cv.fit (autoscaled_x_train, autoscaled_y_train)
#modeling
model = getattr (gs_cv,'estimator')
hyperparameters = list (gs_cv.best_params_.keys ())
for hyperparameters in hyperparameters:
setattr (model, hyperparameter, gs_cv.best_params_ [hyperparameter])
model.fit (autoscaled_x_train, autoscaled_y_train)
#prediction
estimated_y_test = np.ndarray.flatten (model.predict (autoscaled_x_test))
if do_autoscaling:
estimated_y_test = estimated_y_test * y_train.std (ddof = 1) + y_train.mean ()
estimated_y [fold_index_in_outer_cv == fold_number_in_outer_cv] = estimated_y_test # store
return estimated_y
#Settings
inner_fold_number = 10 # "fold_number"fold crossvalidation (CV) for inter CV
outer_fold_number = 54 # "fold_number"fold CV for outer CV
parameters = {
"C": [2 ** n for n in range (5, 11)],
#Candidates of C
"epsilon": [2 ** n for n in range (10, 1)],
#Candidates of epsilon
"gamma": [2 ** n for n in range (20, 11)],
#Candidates of gamma
}
# DCV
inner_cv = GridSearchCV (svm.SVR (), parameters, scoring ='max_error', n_jobs = 1, cv = inner_fold_number)
y_pred = double_cross_validation (gs_cv = inner_cv, x = x, x2 = x2, y = y, outer_fold_number = outer_fold_number, do_autoscaling = True, random_state = 0)
# Generalization performance from DCV using SVR
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
print (r2_score (y, y_pred))
print (mean_absolute_error (y, y_pred))
error:
ValueError Traceback (most recent call last)
99 # DCV
100 inner_cv = GridSearchCV (svm.SVR (), parameters, scoring ='max_error', n_jobs = 1, cv = inner_fold_number)
>101 y_pred = double_cross_validation (gs_cv = inner_cv, x = x, x2 = x2, y = y, outer_fold_number = outer_fold_number, do_autoscaling = True, random_state = 0)
102
103 # Generalization performance from DCV using SVR
66 autoscaled_y_train = (y_train y_train.mean ())/y_train.std (ddof = 1)
67 autoscaled_x_test_pre = (x_test x_train.mean (axis = 0))/x_train.std (axis = 0, ddof = 1)
>68 autoscaled_x_train = np.concatenate ([[autoscaled_x_test_pre],
[x2_test]],
axis = 1)
69 else:
70 autoscaled_x_train = x_train.copy ()
ValueError: all the input array dimensions except for the concatenation axis must match exactly
Thank you.
] (76b8eeb2cfb84fe71c8d4025542ffe90.jpeg)

Answer # 1
Postscriptautoscaled_x_train = np.insert (autoscaled_x_train_pre, x2_train)
Looking at the error, there are only two arguments for np.insert.
Since arr, obj, values are required as arguments, but there are only two
TypeError: insert () missing 1 required positional argument:'values'
It is said that.
Please specify the required arguments.