#============================================= # Regression by scikit-learn functions #============================================= infile =D:\tkProg\tkProg.main\tkprog_COE\regression\boston.xlsx outfile=D:\tkProg\tkProg.main\tkprog_COE\regression/boston-predict.xlsx method=linear Fraction of test data=0.3 Seed of random() for split data= nmaxiter=1000 For Ridge/LASSO/Elastic Net: alpha=0.1 l1_ratio=0.5 For Random Forest Regression: max_depth=1000 n_estimators=100 max_features=auto For Multilayer Perceptoron Regression: hidden layer sizes=5,5,5 mlp_solver=lbfgs mlp_activation=relu Plot options: plot_boxplot=0 plot_heatmap=0 plot_pairplot=0 Read [D:\tkProg\tkProg.main\tkprog_COE\regression\boston.xlsx] Data check isnull() MEDV CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX PTRATIO B LSTAT 0 False False False False False False False False False False False False False False 1 False False False False False False False False False False False False False False 2 False False False False False False False False False False False False False False 3 False False False False False False False False False False False False False False 4 False False False False False False False False False False False False False False .. ... ... ... ... ... ... ... ... ... ... ... ... ... ... 501 False False False False False False False False False False False False False False 502 False False False False False False False False False False False False False False 503 False False False False False False False False False False False False False False 504 False False False False False False False False False False False False False False 505 False False False False False False False False False False False False False False [506 rows x 14 columns] isna() MEDV CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX PTRATIO B LSTAT 0 False False False False False False False False False False False False False False 1 False False False False False False False False False False False False False False 2 False False False False False False False False False False False False False False 3 False False False False False False False False False False False False False False 4 False False False False False False False False False False False False False False .. ... ... ... ... ... ... ... ... ... ... ... ... ... ... 501 False False False False False False False False False False False False False False 502 False False False False False False False False False False False False False False 503 False False False False False False False False False False False False False False 504 False False False False False False False False False False False False False False 505 False False False False False False False False False False False False False False [506 rows x 14 columns] ndata=506 ndescriptors=13 all_labels=['MEDV', 'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT'] x_labels =['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT'] o_label =MEDV Split to training and test data Number of training data:354 Number of test data:152 Covariances of standardized values ( 0, 1) ( MEDV, CRIM): 0.1741 ( 0, 2) ( MEDV, ZN): 0.3101 ( 0, 3) ( MEDV, INDUS): 0.3192 ( 0, 4) ( MEDV, CHAS): 0.3314 ( 0, 5) ( MEDV, NOX): 0.2469 ( 0, 6) ( MEDV, RM): 0.2195 ( 0, 7) ( MEDV, AGE): 0.2256 ( 0, 8) ( MEDV, DIS): 0.0612 ( 0, 9) ( MEDV, RAD): 0.1926 ( 0, 10) ( MEDV, TAX): 0.1866 ( 0, 11) ( MEDV, PTRATIO): 0.2190 ( 0, 12) ( MEDV, B): 0.1598 ( 0, 13) ( MEDV, LSTAT): -0.0131 ( 1, 2) ( CRIM, ZN): 0.2586 ( 1, 3) ( CRIM, INDUS): 0.2777 ( 1, 4) ( CRIM, CHAS): 0.2975 ( 1, 5) ( CRIM, NOX): 0.2605 ( 1, 6) ( CRIM, RM): 0.1364 ( 1, 7) ( CRIM, AGE): 0.1927 ( 1, 8) ( CRIM, DIS): 0.1336 ( 1, 9) ( CRIM, RAD): 0.1750 ( 1, 10) ( CRIM, TAX): 0.1896 ( 1, 11) ( CRIM, PTRATIO): 0.1747 ( 1, 12) ( CRIM, B): 0.0715 ( 1, 13) ( CRIM, LSTAT): 0.1107 ++( 2, 3) ( ZN, INDUS): 0.6024 ++( 2, 4) ( ZN, CHAS): 0.6322 ( 2, 5) ( ZN, NOX): 0.4363 ( 2, 6) ( ZN, RM): 0.1318 ( 2, 7) ( ZN, AGE): 0.1402 ( 2, 8) ( ZN, DIS): -0.1777 ( 2, 9) ( ZN, RAD): 0.0536 ( 2, 10) ( ZN, TAX): 0.0238 ( 2, 11) ( ZN, PTRATIO): 0.0898 ( 2, 12) ( ZN, B): 0.0663 ( 2, 13) ( ZN, LSTAT): 0.1048 ++( 3, 4) ( INDUS, CHAS): 0.7328 ++( 3, 5) ( INDUS, NOX): 0.5500 ( 3, 6) ( INDUS, RM): 0.1401 ( 3, 7) ( INDUS, AGE): 0.1096 ( 3, 8) ( INDUS, DIS): -0.2033 ( 3, 9) ( INDUS, RAD): 0.0571 ( 3, 10) ( INDUS, TAX): 0.0046 ( 3, 11) ( INDUS, PTRATIO): 0.0934 ( 3, 12) ( INDUS, B): 0.1058 ( 3, 13) ( INDUS, LSTAT): 0.1952 ++( 4, 5) ( CHAS, NOX): 0.5606 ( 4, 6) ( CHAS, RM): 0.1525 ( 4, 7) ( CHAS, AGE): 0.1630 ( 4, 8) ( CHAS, DIS): -0.1459 ( 4, 9) ( CHAS, RAD): 0.0822 ( 4, 10) ( CHAS, TAX): 0.0413 ( 4, 11) ( CHAS, PTRATIO): 0.1079 ( 4, 12) ( CHAS, B): 0.1083 ( 4, 13) ( CHAS, LSTAT): 0.1784 ( 5, 6) ( NOX, RM): 0.1490 ( 5, 7) ( NOX, AGE): 0.1525 ( 5, 8) ( NOX, DIS): -0.0357 ( 5, 9) ( NOX, RAD): 0.1237 ( 5, 10) ( NOX, TAX): 0.0787 ( 5, 11) ( NOX, PTRATIO): 0.1418 ( 5, 12) ( NOX, B): 0.1053 ( 5, 13) ( NOX, LSTAT): 0.2024 ( 6, 7) ( RM, AGE): 0.3130 ( 6, 8) ( RM, DIS): 0.3220 ( 6, 9) ( RM, RAD): 0.3087 ( 6, 10) ( RM, TAX): 0.2984 ( 6, 11) ( RM, PTRATIO): 0.2920 ( 6, 12) ( RM, B): 0.2559 ( 6, 13) ( RM, LSTAT): -0.0313 ++( 7, 8) ( AGE, DIS): 0.5554 ( 7, 9) ( AGE, RAD): 0.4226 ( 7, 10) ( AGE, TAX): 0.4474 ( 7, 11) ( AGE, PTRATIO): 0.3701 ( 7, 12) ( AGE, B): 0.2647 ( 7, 13) ( AGE, LSTAT): -0.0642 ++( 8, 9) ( DIS, RAD): 0.5507 ++( 8, 10) ( DIS, TAX): 0.6351 ( 8, 11) ( DIS, PTRATIO): 0.4360 ( 8, 12) ( DIS, B): 0.3327 ( 8, 13) ( DIS, LSTAT): -0.0933 ( 9, 10) ( RAD, TAX): 0.4661 ( 9, 11) ( RAD, PTRATIO): 0.3878 ( 9, 12) ( RAD, B): 0.2795 ( 9, 13) ( RAD, LSTAT): -0.0362 (10, 11) ( TAX, PTRATIO): 0.4018 (10, 12) ( TAX, B): 0.2651 (10, 13) ( TAX, LSTAT): -0.0646 (11, 12) ( PTRATIO, B): 0.2494 (11, 13) ( PTRATIO, LSTAT): -0.0215 (12, 13) ( B, LSTAT): -0.0415 Execute regression Standaridization Fit Calculate predicted values Scores: Mean absolute error (MAE): training 3.17 test: 3.53 Mean squared error (MSE) : training 21.9 test: 22.4 Root MSE (RMSE) : training 4.68 test: 4.74 R^2 score : training 0.736 test: 0.741 Parameters: intercept: 22.21864406779661 coefficients CRIM: -0.9932 ZN: 1.107 INDUS: 0.3304 CHAS: 0.8623 NOX: -2.348 RM: 2.52 AGE: 0.1699 DIS: -3.15 RAD: 2.671 TAX: -2.048 PTRATIO: -2.017 B: 0.6818 LSTAT: -3.759 Save predict data to [D:\tkProg\tkProg.main\tkprog_COE\regression/boston-predict.xlsx] plot Plot index - input/prediction Plot input - prediction Press ENTER to terminate