# Import the relevant libraries
import pandas as pd
import numpy as np
data_preprocessed = pd.read_csv('Absenteeism-preprocessed.csv')
data_preprocessed.head()
| Reason_1 | Reason_2 | Reason_3 | Reason_4 | Month Value | Day of the Week | Transportation Expense | Distance to Work | Age | Daily Work Load Average | Body Mass Index | Education | Children | Pet | Absenteeism Time in Hours | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 0 | 1 | 7 | 1 | 289 | 36 | 33 | 239.554 | 30 | 0 | 2 | 1 | 4 |
| 1 | 0 | 0 | 0 | 0 | 7 | 1 | 118 | 13 | 50 | 239.554 | 31 | 0 | 1 | 0 | 0 |
| 2 | 0 | 0 | 0 | 1 | 7 | 2 | 179 | 51 | 38 | 239.554 | 31 | 0 | 0 | 0 | 2 |
| 3 | 1 | 0 | 0 | 0 | 7 | 3 | 279 | 5 | 39 | 239.554 | 24 | 0 | 2 | 0 | 4 |
| 4 | 0 | 0 | 0 | 1 | 7 | 3 | 289 | 36 | 33 | 239.554 | 30 | 0 | 2 | 1 | 2 |
# Base line classification creation
data_preprocessed['Absenteeism Time in Hours'].median()
3.0
targets =np.where(data_preprocessed['Absenteeism Time in Hours']>
data_preprocessed['Absenteeism Time in Hours'].median(), 1, 0)
targets
array([1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0,
1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1,
0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1,
0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,
0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0,
1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1,
0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1,
1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1,
0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0,
0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0,
0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1,
1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0,
1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0,
1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1,
1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1,
1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1,
0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1,
1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1,
1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1,
1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1,
1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,
1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1,
0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1,
0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0,
1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1,
1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0])
data_preprocessed['Absenteeism Time in Hours'] = targets
data_preprocessed.head()
| Reason_1 | Reason_2 | Reason_3 | Reason_4 | Month Value | Day of the Week | Transportation Expense | Distance to Work | Age | Daily Work Load Average | Body Mass Index | Education | Children | Pet | Absenteeism Time in Hours | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 0 | 1 | 7 | 1 | 289 | 36 | 33 | 239.554 | 30 | 0 | 2 | 1 | 1 |
| 1 | 0 | 0 | 0 | 0 | 7 | 1 | 118 | 13 | 50 | 239.554 | 31 | 0 | 1 | 0 | 0 |
| 2 | 0 | 0 | 0 | 1 | 7 | 2 | 179 | 51 | 38 | 239.554 | 31 | 0 | 0 | 0 | 0 |
| 3 | 1 | 0 | 0 | 0 | 7 | 3 | 279 | 5 | 39 | 239.554 | 24 | 0 | 2 | 0 | 1 |
| 4 | 0 | 0 | 0 | 1 | 7 | 3 | 289 | 36 | 33 | 239.554 | 30 | 0 | 2 | 1 | 0 |
targets.sum()/ targets.shape[0]
0.45571428571428574
data_with_targets = data_preprocessed.drop(['Absenteeism Time in Hours'], axis=1)
data_with_targets is data_preprocessed
False
data_with_targets.head()
| Reason_1 | Reason_2 | Reason_3 | Reason_4 | Month Value | Day of the Week | Transportation Expense | Distance to Work | Age | Daily Work Load Average | Body Mass Index | Education | Children | Pet | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 0 | 1 | 7 | 1 | 289 | 36 | 33 | 239.554 | 30 | 0 | 2 | 1 |
| 1 | 0 | 0 | 0 | 0 | 7 | 1 | 118 | 13 | 50 | 239.554 | 31 | 0 | 1 | 0 |
| 2 | 0 | 0 | 0 | 1 | 7 | 2 | 179 | 51 | 38 | 239.554 | 31 | 0 | 0 | 0 |
| 3 | 1 | 0 | 0 | 0 | 7 | 3 | 279 | 5 | 39 | 239.554 | 24 | 0 | 2 | 0 |
| 4 | 0 | 0 | 0 | 1 | 7 | 3 | 289 | 36 | 33 | 239.554 | 30 | 0 | 2 | 1 |
#Selecting input for regression
data_with_targets.shape
(700, 14)
data_with_targets.iloc[:,0:14]
| Reason_1 | Reason_2 | Reason_3 | Reason_4 | Month Value | Day of the Week | Transportation Expense | Distance to Work | Age | Daily Work Load Average | Body Mass Index | Education | Children | Pet | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 0 | 1 | 7 | 1 | 289 | 36 | 33 | 239.554 | 30 | 0 | 2 | 1 |
| 1 | 0 | 0 | 0 | 0 | 7 | 1 | 118 | 13 | 50 | 239.554 | 31 | 0 | 1 | 0 |
| 2 | 0 | 0 | 0 | 1 | 7 | 2 | 179 | 51 | 38 | 239.554 | 31 | 0 | 0 | 0 |
| 3 | 1 | 0 | 0 | 0 | 7 | 3 | 279 | 5 | 39 | 239.554 | 24 | 0 | 2 | 0 |
| 4 | 0 | 0 | 0 | 1 | 7 | 3 | 289 | 36 | 33 | 239.554 | 30 | 0 | 2 | 1 |
| 5 | 0 | 0 | 0 | 1 | 10 | 2 | 179 | 51 | 38 | 239.554 | 31 | 0 | 0 | 0 |
| 6 | 0 | 0 | 0 | 1 | 7 | 4 | 361 | 52 | 28 | 239.554 | 27 | 0 | 1 | 4 |
| 7 | 0 | 0 | 0 | 1 | 7 | 4 | 260 | 50 | 36 | 239.554 | 23 | 0 | 4 | 0 |
| 8 | 0 | 0 | 1 | 0 | 6 | 6 | 155 | 12 | 34 | 239.554 | 25 | 0 | 2 | 0 |
| 9 | 0 | 0 | 0 | 1 | 7 | 0 | 235 | 11 | 37 | 239.554 | 29 | 1 | 1 | 1 |
| 10 | 1 | 0 | 0 | 0 | 7 | 0 | 260 | 50 | 36 | 239.554 | 23 | 0 | 4 | 0 |
| 11 | 1 | 0 | 0 | 0 | 7 | 1 | 260 | 50 | 36 | 239.554 | 23 | 0 | 4 | 0 |
| 12 | 1 | 0 | 0 | 0 | 7 | 2 | 260 | 50 | 36 | 239.554 | 23 | 0 | 4 | 0 |
| 13 | 1 | 0 | 0 | 0 | 7 | 2 | 179 | 51 | 38 | 239.554 | 31 | 0 | 0 | 0 |
| 14 | 0 | 0 | 0 | 1 | 7 | 2 | 179 | 51 | 38 | 239.554 | 31 | 0 | 0 | 0 |
| 15 | 1 | 0 | 0 | 0 | 7 | 4 | 246 | 25 | 41 | 239.554 | 23 | 0 | 0 | 0 |
| 16 | 0 | 0 | 0 | 1 | 7 | 4 | 179 | 51 | 38 | 239.554 | 31 | 0 | 0 | 0 |
| 17 | 0 | 0 | 1 | 0 | 7 | 0 | 179 | 51 | 38 | 239.554 | 31 | 0 | 0 | 0 |
| 18 | 1 | 0 | 0 | 0 | 7 | 3 | 189 | 29 | 33 | 239.554 | 25 | 0 | 2 | 2 |
| 19 | 0 | 0 | 0 | 1 | 5 | 4 | 248 | 25 | 47 | 205.917 | 32 | 0 | 2 | 1 |
| 20 | 1 | 0 | 0 | 0 | 12 | 1 | 330 | 16 | 28 | 205.917 | 25 | 1 | 0 | 0 |
| 21 | 1 | 0 | 0 | 0 | 3 | 6 | 179 | 51 | 38 | 205.917 | 31 | 0 | 0 | 0 |
| 22 | 1 | 0 | 0 | 0 | 10 | 3 | 361 | 52 | 28 | 205.917 | 27 | 0 | 1 | 4 |
| 23 | 0 | 0 | 0 | 1 | 8 | 4 | 260 | 50 | 36 | 205.917 | 23 | 0 | 4 | 0 |
| 24 | 0 | 0 | 1 | 0 | 8 | 0 | 289 | 36 | 33 | 205.917 | 30 | 0 | 2 | 1 |
| 25 | 0 | 0 | 0 | 1 | 8 | 0 | 361 | 52 | 28 | 205.917 | 27 | 0 | 1 | 4 |
| 26 | 0 | 0 | 0 | 1 | 4 | 2 | 289 | 36 | 33 | 205.917 | 30 | 0 | 2 | 1 |
| 27 | 0 | 0 | 0 | 1 | 12 | 1 | 157 | 27 | 29 | 205.917 | 22 | 0 | 0 | 0 |
| 28 | 0 | 0 | 1 | 0 | 8 | 2 | 289 | 36 | 33 | 205.917 | 30 | 0 | 2 | 1 |
| 29 | 0 | 0 | 0 | 1 | 8 | 4 | 179 | 51 | 38 | 205.917 | 31 | 0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 670 | 0 | 0 | 0 | 1 | 4 | 1 | 155 | 12 | 34 | 246.288 | 25 | 0 | 2 | 0 |
| 671 | 0 | 0 | 1 | 0 | 4 | 3 | 225 | 26 | 28 | 246.288 | 24 | 0 | 1 | 2 |
| 672 | 1 | 0 | 0 | 0 | 4 | 3 | 118 | 13 | 50 | 246.288 | 31 | 0 | 1 | 0 |
| 673 | 0 | 0 | 0 | 1 | 4 | 4 | 179 | 26 | 30 | 246.288 | 19 | 1 | 0 | 0 |
| 674 | 0 | 0 | 0 | 1 | 7 | 3 | 235 | 11 | 37 | 237.656 | 29 | 1 | 1 | 1 |
| 675 | 0 | 0 | 1 | 0 | 9 | 2 | 225 | 15 | 41 | 237.656 | 28 | 1 | 2 | 2 |
| 676 | 0 | 0 | 0 | 1 | 9 | 2 | 235 | 16 | 32 | 237.656 | 25 | 1 | 0 | 0 |
| 677 | 1 | 0 | 0 | 0 | 9 | 2 | 118 | 10 | 37 | 237.656 | 28 | 0 | 0 | 0 |
| 678 | 0 | 0 | 0 | 1 | 9 | 2 | 235 | 20 | 43 | 237.656 | 38 | 0 | 1 | 0 |
| 679 | 1 | 0 | 0 | 0 | 10 | 4 | 179 | 26 | 30 | 237.656 | 19 | 1 | 0 | 0 |
| 680 | 0 | 0 | 0 | 1 | 10 | 4 | 291 | 31 | 40 | 237.656 | 25 | 0 | 1 | 1 |
| 681 | 1 | 0 | 0 | 0 | 10 | 4 | 225 | 15 | 41 | 237.656 | 28 | 1 | 2 | 2 |
| 682 | 0 | 0 | 1 | 0 | 11 | 0 | 300 | 26 | 43 | 237.656 | 25 | 0 | 2 | 1 |
| 683 | 0 | 0 | 0 | 1 | 11 | 0 | 225 | 15 | 41 | 237.656 | 28 | 1 | 2 | 2 |
| 684 | 0 | 0 | 0 | 1 | 11 | 0 | 179 | 26 | 30 | 237.656 | 19 | 1 | 0 | 0 |
| 685 | 0 | 0 | 0 | 1 | 5 | 0 | 118 | 13 | 50 | 237.656 | 31 | 0 | 1 | 0 |
| 686 | 1 | 0 | 0 | 0 | 5 | 1 | 118 | 13 | 50 | 237.656 | 31 | 0 | 1 | 0 |
| 687 | 0 | 0 | 0 | 1 | 5 | 1 | 118 | 10 | 37 | 237.656 | 28 | 0 | 0 | 0 |
| 688 | 0 | 0 | 0 | 0 | 5 | 1 | 118 | 13 | 50 | 237.656 | 31 | 0 | 1 | 0 |
| 689 | 0 | 0 | 0 | 1 | 5 | 2 | 179 | 26 | 30 | 237.656 | 19 | 1 | 0 | 0 |
| 690 | 0 | 0 | 0 | 0 | 5 | 2 | 378 | 49 | 36 | 237.656 | 21 | 0 | 2 | 4 |
| 691 | 0 | 1 | 0 | 0 | 5 | 4 | 179 | 22 | 40 | 237.656 | 22 | 1 | 2 | 0 |
| 692 | 1 | 0 | 0 | 0 | 5 | 0 | 155 | 12 | 34 | 237.656 | 25 | 0 | 2 | 0 |
| 693 | 1 | 0 | 0 | 0 | 5 | 0 | 235 | 16 | 32 | 237.656 | 25 | 1 | 0 | 0 |
| 694 | 0 | 0 | 0 | 1 | 5 | 2 | 291 | 31 | 40 | 237.656 | 25 | 0 | 1 | 1 |
| 695 | 1 | 0 | 0 | 0 | 5 | 2 | 179 | 22 | 40 | 237.656 | 22 | 1 | 2 | 0 |
| 696 | 1 | 0 | 0 | 0 | 5 | 2 | 225 | 26 | 28 | 237.656 | 24 | 0 | 1 | 2 |
| 697 | 1 | 0 | 0 | 0 | 5 | 3 | 330 | 16 | 28 | 237.656 | 25 | 1 | 0 | 0 |
| 698 | 0 | 0 | 0 | 1 | 5 | 3 | 235 | 16 | 32 | 237.656 | 25 | 1 | 0 | 0 |
| 699 | 0 | 0 | 0 | 1 | 5 | 3 | 291 | 31 | 40 | 237.656 | 25 | 0 | 1 | 1 |
700 rows × 14 columns
data_with_targets.iloc[:,: -1]
| Reason_1 | Reason_2 | Reason_3 | Reason_4 | Month Value | Day of the Week | Transportation Expense | Distance to Work | Age | Daily Work Load Average | Body Mass Index | Education | Children | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 0 | 1 | 7 | 1 | 289 | 36 | 33 | 239.554 | 30 | 0 | 2 |
| 1 | 0 | 0 | 0 | 0 | 7 | 1 | 118 | 13 | 50 | 239.554 | 31 | 0 | 1 |
| 2 | 0 | 0 | 0 | 1 | 7 | 2 | 179 | 51 | 38 | 239.554 | 31 | 0 | 0 |
| 3 | 1 | 0 | 0 | 0 | 7 | 3 | 279 | 5 | 39 | 239.554 | 24 | 0 | 2 |
| 4 | 0 | 0 | 0 | 1 | 7 | 3 | 289 | 36 | 33 | 239.554 | 30 | 0 | 2 |
| 5 | 0 | 0 | 0 | 1 | 10 | 2 | 179 | 51 | 38 | 239.554 | 31 | 0 | 0 |
| 6 | 0 | 0 | 0 | 1 | 7 | 4 | 361 | 52 | 28 | 239.554 | 27 | 0 | 1 |
| 7 | 0 | 0 | 0 | 1 | 7 | 4 | 260 | 50 | 36 | 239.554 | 23 | 0 | 4 |
| 8 | 0 | 0 | 1 | 0 | 6 | 6 | 155 | 12 | 34 | 239.554 | 25 | 0 | 2 |
| 9 | 0 | 0 | 0 | 1 | 7 | 0 | 235 | 11 | 37 | 239.554 | 29 | 1 | 1 |
| 10 | 1 | 0 | 0 | 0 | 7 | 0 | 260 | 50 | 36 | 239.554 | 23 | 0 | 4 |
| 11 | 1 | 0 | 0 | 0 | 7 | 1 | 260 | 50 | 36 | 239.554 | 23 | 0 | 4 |
| 12 | 1 | 0 | 0 | 0 | 7 | 2 | 260 | 50 | 36 | 239.554 | 23 | 0 | 4 |
| 13 | 1 | 0 | 0 | 0 | 7 | 2 | 179 | 51 | 38 | 239.554 | 31 | 0 | 0 |
| 14 | 0 | 0 | 0 | 1 | 7 | 2 | 179 | 51 | 38 | 239.554 | 31 | 0 | 0 |
| 15 | 1 | 0 | 0 | 0 | 7 | 4 | 246 | 25 | 41 | 239.554 | 23 | 0 | 0 |
| 16 | 0 | 0 | 0 | 1 | 7 | 4 | 179 | 51 | 38 | 239.554 | 31 | 0 | 0 |
| 17 | 0 | 0 | 1 | 0 | 7 | 0 | 179 | 51 | 38 | 239.554 | 31 | 0 | 0 |
| 18 | 1 | 0 | 0 | 0 | 7 | 3 | 189 | 29 | 33 | 239.554 | 25 | 0 | 2 |
| 19 | 0 | 0 | 0 | 1 | 5 | 4 | 248 | 25 | 47 | 205.917 | 32 | 0 | 2 |
| 20 | 1 | 0 | 0 | 0 | 12 | 1 | 330 | 16 | 28 | 205.917 | 25 | 1 | 0 |
| 21 | 1 | 0 | 0 | 0 | 3 | 6 | 179 | 51 | 38 | 205.917 | 31 | 0 | 0 |
| 22 | 1 | 0 | 0 | 0 | 10 | 3 | 361 | 52 | 28 | 205.917 | 27 | 0 | 1 |
| 23 | 0 | 0 | 0 | 1 | 8 | 4 | 260 | 50 | 36 | 205.917 | 23 | 0 | 4 |
| 24 | 0 | 0 | 1 | 0 | 8 | 0 | 289 | 36 | 33 | 205.917 | 30 | 0 | 2 |
| 25 | 0 | 0 | 0 | 1 | 8 | 0 | 361 | 52 | 28 | 205.917 | 27 | 0 | 1 |
| 26 | 0 | 0 | 0 | 1 | 4 | 2 | 289 | 36 | 33 | 205.917 | 30 | 0 | 2 |
| 27 | 0 | 0 | 0 | 1 | 12 | 1 | 157 | 27 | 29 | 205.917 | 22 | 0 | 0 |
| 28 | 0 | 0 | 1 | 0 | 8 | 2 | 289 | 36 | 33 | 205.917 | 30 | 0 | 2 |
| 29 | 0 | 0 | 0 | 1 | 8 | 4 | 179 | 51 | 38 | 205.917 | 31 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 670 | 0 | 0 | 0 | 1 | 4 | 1 | 155 | 12 | 34 | 246.288 | 25 | 0 | 2 |
| 671 | 0 | 0 | 1 | 0 | 4 | 3 | 225 | 26 | 28 | 246.288 | 24 | 0 | 1 |
| 672 | 1 | 0 | 0 | 0 | 4 | 3 | 118 | 13 | 50 | 246.288 | 31 | 0 | 1 |
| 673 | 0 | 0 | 0 | 1 | 4 | 4 | 179 | 26 | 30 | 246.288 | 19 | 1 | 0 |
| 674 | 0 | 0 | 0 | 1 | 7 | 3 | 235 | 11 | 37 | 237.656 | 29 | 1 | 1 |
| 675 | 0 | 0 | 1 | 0 | 9 | 2 | 225 | 15 | 41 | 237.656 | 28 | 1 | 2 |
| 676 | 0 | 0 | 0 | 1 | 9 | 2 | 235 | 16 | 32 | 237.656 | 25 | 1 | 0 |
| 677 | 1 | 0 | 0 | 0 | 9 | 2 | 118 | 10 | 37 | 237.656 | 28 | 0 | 0 |
| 678 | 0 | 0 | 0 | 1 | 9 | 2 | 235 | 20 | 43 | 237.656 | 38 | 0 | 1 |
| 679 | 1 | 0 | 0 | 0 | 10 | 4 | 179 | 26 | 30 | 237.656 | 19 | 1 | 0 |
| 680 | 0 | 0 | 0 | 1 | 10 | 4 | 291 | 31 | 40 | 237.656 | 25 | 0 | 1 |
| 681 | 1 | 0 | 0 | 0 | 10 | 4 | 225 | 15 | 41 | 237.656 | 28 | 1 | 2 |
| 682 | 0 | 0 | 1 | 0 | 11 | 0 | 300 | 26 | 43 | 237.656 | 25 | 0 | 2 |
| 683 | 0 | 0 | 0 | 1 | 11 | 0 | 225 | 15 | 41 | 237.656 | 28 | 1 | 2 |
| 684 | 0 | 0 | 0 | 1 | 11 | 0 | 179 | 26 | 30 | 237.656 | 19 | 1 | 0 |
| 685 | 0 | 0 | 0 | 1 | 5 | 0 | 118 | 13 | 50 | 237.656 | 31 | 0 | 1 |
| 686 | 1 | 0 | 0 | 0 | 5 | 1 | 118 | 13 | 50 | 237.656 | 31 | 0 | 1 |
| 687 | 0 | 0 | 0 | 1 | 5 | 1 | 118 | 10 | 37 | 237.656 | 28 | 0 | 0 |
| 688 | 0 | 0 | 0 | 0 | 5 | 1 | 118 | 13 | 50 | 237.656 | 31 | 0 | 1 |
| 689 | 0 | 0 | 0 | 1 | 5 | 2 | 179 | 26 | 30 | 237.656 | 19 | 1 | 0 |
| 690 | 0 | 0 | 0 | 0 | 5 | 2 | 378 | 49 | 36 | 237.656 | 21 | 0 | 2 |
| 691 | 0 | 1 | 0 | 0 | 5 | 4 | 179 | 22 | 40 | 237.656 | 22 | 1 | 2 |
| 692 | 1 | 0 | 0 | 0 | 5 | 0 | 155 | 12 | 34 | 237.656 | 25 | 0 | 2 |
| 693 | 1 | 0 | 0 | 0 | 5 | 0 | 235 | 16 | 32 | 237.656 | 25 | 1 | 0 |
| 694 | 0 | 0 | 0 | 1 | 5 | 2 | 291 | 31 | 40 | 237.656 | 25 | 0 | 1 |
| 695 | 1 | 0 | 0 | 0 | 5 | 2 | 179 | 22 | 40 | 237.656 | 22 | 1 | 2 |
| 696 | 1 | 0 | 0 | 0 | 5 | 2 | 225 | 26 | 28 | 237.656 | 24 | 0 | 1 |
| 697 | 1 | 0 | 0 | 0 | 5 | 3 | 330 | 16 | 28 | 237.656 | 25 | 1 | 0 |
| 698 | 0 | 0 | 0 | 1 | 5 | 3 | 235 | 16 | 32 | 237.656 | 25 | 1 | 0 |
| 699 | 0 | 0 | 0 | 1 | 5 | 3 | 291 | 31 | 40 | 237.656 | 25 | 0 | 1 |
700 rows × 13 columns
unscaled_inputs = data_with_targets.iloc[:,: -1]
#from sklearn.preprocessing import StandardScaler
#absenteeism_scaler = StandardScaler()
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import StandardScaler
class CustomScaler(BaseEstimator,TransformerMixin):
def __init__(self,columns,copy=True,with_mean=True,with_std=True):
self.scaler = StandardScaler(copy,with_mean,with_std)
self.columns = columns
self.mean_ = None
self.var_ = None
def fit(self, X, y=None):
self.scaler.fit(X[self.columns], y)
self.mean_ = np.mean(X[self.columns])
self.var_ = np.var(X[self.columns])
return self
def transform(self, X, y=None, copy=None):
init_col_order = X.columns
X_scaled = pd.DataFrame(self.scaler.transform(X[self.columns]), columns=self.columns)
X_not_scaled = X.loc[:,~X.columns.isin(self.columns)]
return pd.concat([X_not_scaled, X_scaled], axis=1)[init_col_order]
unscaled_inputs.columns.values
array(['Reason_1', 'Reason_2', 'Reason_3', 'Reason_4', 'Month Value',
'Day of the Week', 'Transportation Expense', 'Distance to Work',
'Age', 'Daily Work Load Average', 'Body Mass Index', 'Education',
'Children'], dtype=object)
columns_to_omit = ['Reason_1', 'Reason_2', 'Reason_3', 'Reason_4','Education']
columns_to_scale = [x for x in unscaled_inputs.columns.values if x not in columns_to_omit]
absenteeism_scaler = CustomScaler(columns_to_scale)
absenteeism_scaler.fit(unscaled_inputs)
C:\Users\User\Anaconda3\lib\site-packages\sklearn\preprocessing\data.py:645: DataConversionWarning: Data with input dtype int64, float64 were all converted to float64 by StandardScaler. return self.partial_fit(X, y)
CustomScaler(columns=['Month Value', 'Day of the Week', 'Transportation Expense', 'Distance to Work', 'Age', 'Daily Work Load Average', 'Body Mass Index', 'Children'],
copy=None, with_mean=None, with_std=None)
scaled_inputs = absenteeism_scaler.transform(unscaled_inputs)
C:\Users\User\Anaconda3\lib\site-packages\ipykernel_launcher.py:20: DataConversionWarning: Data with input dtype int64, float64 were all converted to float64 by StandardScaler.
scaled_inputs
| Reason_1 | Reason_2 | Reason_3 | Reason_4 | Month Value | Day of the Week | Transportation Expense | Distance to Work | Age | Daily Work Load Average | Body Mass Index | Education | Children | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 0 | 1 | 0.030796 | -0.800950 | 1.005844 | 0.412816 | -0.536062 | -0.806331 | 0.767431 | 0 | 0.880469 |
| 1 | 0 | 0 | 0 | 0 | 0.030796 | -0.800950 | -1.574681 | -1.141882 | 2.130803 | -0.806331 | 1.002633 | 0 | -0.019280 |
| 2 | 0 | 0 | 0 | 1 | 0.030796 | -0.232900 | -0.654143 | 1.426749 | 0.248310 | -0.806331 | 1.002633 | 0 | -0.919030 |
| 3 | 1 | 0 | 0 | 0 | 0.030796 | 0.335149 | 0.854936 | -1.682647 | 0.405184 | -0.806331 | -0.643782 | 0 | 0.880469 |
| 4 | 0 | 0 | 0 | 1 | 0.030796 | 0.335149 | 1.005844 | 0.412816 | -0.536062 | -0.806331 | 0.767431 | 0 | 0.880469 |
| 5 | 0 | 0 | 0 | 1 | 0.929019 | -0.232900 | -0.654143 | 1.426749 | 0.248310 | -0.806331 | 1.002633 | 0 | -0.919030 |
| 6 | 0 | 0 | 0 | 1 | 0.030796 | 0.903199 | 2.092381 | 1.494345 | -1.320435 | -0.806331 | 0.061825 | 0 | -0.019280 |
| 7 | 0 | 0 | 0 | 1 | 0.030796 | 0.903199 | 0.568211 | 1.359154 | -0.065439 | -0.806331 | -0.878984 | 0 | 2.679969 |
| 8 | 0 | 0 | 1 | 0 | -0.268611 | 2.039298 | -1.016322 | -1.209478 | -0.379188 | -0.806331 | -0.408580 | 0 | 0.880469 |
| 9 | 0 | 0 | 0 | 1 | 0.030796 | -1.368999 | 0.190942 | -1.277074 | 0.091435 | -0.806331 | 0.532229 | 1 | -0.019280 |
| 10 | 1 | 0 | 0 | 0 | 0.030796 | -1.368999 | 0.568211 | 1.359154 | -0.065439 | -0.806331 | -0.878984 | 0 | 2.679969 |
| 11 | 1 | 0 | 0 | 0 | 0.030796 | -0.800950 | 0.568211 | 1.359154 | -0.065439 | -0.806331 | -0.878984 | 0 | 2.679969 |
| 12 | 1 | 0 | 0 | 0 | 0.030796 | -0.232900 | 0.568211 | 1.359154 | -0.065439 | -0.806331 | -0.878984 | 0 | 2.679969 |
| 13 | 1 | 0 | 0 | 0 | 0.030796 | -0.232900 | -0.654143 | 1.426749 | 0.248310 | -0.806331 | 1.002633 | 0 | -0.919030 |
| 14 | 0 | 0 | 0 | 1 | 0.030796 | -0.232900 | -0.654143 | 1.426749 | 0.248310 | -0.806331 | 1.002633 | 0 | -0.919030 |
| 15 | 1 | 0 | 0 | 0 | 0.030796 | 0.903199 | 0.356940 | -0.330735 | 0.718933 | -0.806331 | -0.878984 | 0 | -0.919030 |
| 16 | 0 | 0 | 0 | 1 | 0.030796 | 0.903199 | -0.654143 | 1.426749 | 0.248310 | -0.806331 | 1.002633 | 0 | -0.919030 |
| 17 | 0 | 0 | 1 | 0 | 0.030796 | -1.368999 | -0.654143 | 1.426749 | 0.248310 | -0.806331 | 1.002633 | 0 | -0.919030 |
| 18 | 1 | 0 | 0 | 0 | 0.030796 | 0.335149 | -0.503235 | -0.060353 | -0.536062 | -0.806331 | -0.408580 | 0 | 0.880469 |
| 19 | 0 | 0 | 0 | 1 | -0.568019 | 0.903199 | 0.387122 | -0.330735 | 1.660180 | -1.647399 | 1.237836 | 0 | 0.880469 |
| 20 | 1 | 0 | 0 | 0 | 1.527833 | -0.800950 | 1.624567 | -0.939096 | -1.320435 | -1.647399 | -0.408580 | 1 | -0.919030 |
| 21 | 1 | 0 | 0 | 0 | -1.166834 | 2.039298 | -0.654143 | 1.426749 | 0.248310 | -1.647399 | 1.002633 | 0 | -0.919030 |
| 22 | 1 | 0 | 0 | 0 | 0.929019 | 0.335149 | 2.092381 | 1.494345 | -1.320435 | -1.647399 | 0.061825 | 0 | -0.019280 |
| 23 | 0 | 0 | 0 | 1 | 0.330204 | 0.903199 | 0.568211 | 1.359154 | -0.065439 | -1.647399 | -0.878984 | 0 | 2.679969 |
| 24 | 0 | 0 | 1 | 0 | 0.330204 | -1.368999 | 1.005844 | 0.412816 | -0.536062 | -1.647399 | 0.767431 | 0 | 0.880469 |
| 25 | 0 | 0 | 0 | 1 | 0.330204 | -1.368999 | 2.092381 | 1.494345 | -1.320435 | -1.647399 | 0.061825 | 0 | -0.019280 |
| 26 | 0 | 0 | 0 | 1 | -0.867426 | -0.232900 | 1.005844 | 0.412816 | -0.536062 | -1.647399 | 0.767431 | 0 | 0.880469 |
| 27 | 0 | 0 | 0 | 1 | 1.527833 | -0.800950 | -0.986140 | -0.195544 | -1.163560 | -1.647399 | -1.114186 | 0 | -0.919030 |
| 28 | 0 | 0 | 1 | 0 | 0.330204 | -0.232900 | 1.005844 | 0.412816 | -0.536062 | -1.647399 | 0.767431 | 0 | 0.880469 |
| 29 | 0 | 0 | 0 | 1 | 0.330204 | 0.903199 | -0.654143 | 1.426749 | 0.248310 | -1.647399 | 1.002633 | 0 | -0.919030 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 670 | 0 | 0 | 0 | 1 | -0.867426 | -0.800950 | -1.016322 | -1.209478 | -0.379188 | -0.637953 | -0.408580 | 0 | 0.880469 |
| 671 | 0 | 0 | 1 | 0 | -0.867426 | 0.335149 | 0.040034 | -0.263140 | -1.320435 | -0.637953 | -0.643782 | 0 | -0.019280 |
| 672 | 1 | 0 | 0 | 0 | -0.867426 | 0.335149 | -1.574681 | -1.141882 | 2.130803 | -0.637953 | 1.002633 | 0 | -0.019280 |
| 673 | 0 | 0 | 0 | 1 | -0.867426 | 0.903199 | -0.654143 | -0.263140 | -1.006686 | -0.637953 | -1.819793 | 1 | -0.919030 |
| 674 | 0 | 0 | 0 | 1 | 0.030796 | 0.335149 | 0.190942 | -1.277074 | 0.091435 | -0.853789 | 0.532229 | 1 | -0.019280 |
| 675 | 0 | 0 | 1 | 0 | 0.629611 | -0.232900 | 0.040034 | -1.006691 | 0.718933 | -0.853789 | 0.297027 | 1 | 0.880469 |
| 676 | 0 | 0 | 0 | 1 | 0.629611 | -0.232900 | 0.190942 | -0.939096 | -0.692937 | -0.853789 | -0.408580 | 1 | -0.919030 |
| 677 | 1 | 0 | 0 | 0 | 0.629611 | -0.232900 | -1.574681 | -1.344669 | 0.091435 | -0.853789 | 0.297027 | 0 | -0.919030 |
| 678 | 0 | 0 | 0 | 1 | 0.629611 | -0.232900 | 0.190942 | -0.668713 | 1.032682 | -0.853789 | 2.649049 | 0 | -0.019280 |
| 679 | 1 | 0 | 0 | 0 | 0.929019 | 0.903199 | -0.654143 | -0.263140 | -1.006686 | -0.853789 | -1.819793 | 1 | -0.919030 |
| 680 | 0 | 0 | 0 | 1 | 0.929019 | 0.903199 | 1.036026 | 0.074838 | 0.562059 | -0.853789 | -0.408580 | 0 | -0.019280 |
| 681 | 1 | 0 | 0 | 0 | 0.929019 | 0.903199 | 0.040034 | -1.006691 | 0.718933 | -0.853789 | 0.297027 | 1 | 0.880469 |
| 682 | 0 | 0 | 1 | 0 | 1.228426 | -1.368999 | 1.171843 | -0.263140 | 1.032682 | -0.853789 | -0.408580 | 0 | 0.880469 |
| 683 | 0 | 0 | 0 | 1 | 1.228426 | -1.368999 | 0.040034 | -1.006691 | 0.718933 | -0.853789 | 0.297027 | 1 | 0.880469 |
| 684 | 0 | 0 | 0 | 1 | 1.228426 | -1.368999 | -0.654143 | -0.263140 | -1.006686 | -0.853789 | -1.819793 | 1 | -0.919030 |
| 685 | 0 | 0 | 0 | 1 | -0.568019 | -1.368999 | -1.574681 | -1.141882 | 2.130803 | -0.853789 | 1.002633 | 0 | -0.019280 |
| 686 | 1 | 0 | 0 | 0 | -0.568019 | -0.800950 | -1.574681 | -1.141882 | 2.130803 | -0.853789 | 1.002633 | 0 | -0.019280 |
| 687 | 0 | 0 | 0 | 1 | -0.568019 | -0.800950 | -1.574681 | -1.344669 | 0.091435 | -0.853789 | 0.297027 | 0 | -0.919030 |
| 688 | 0 | 0 | 0 | 0 | -0.568019 | -0.800950 | -1.574681 | -1.141882 | 2.130803 | -0.853789 | 1.002633 | 0 | -0.019280 |
| 689 | 0 | 0 | 0 | 1 | -0.568019 | -0.232900 | -0.654143 | -0.263140 | -1.006686 | -0.853789 | -1.819793 | 1 | -0.919030 |
| 690 | 0 | 0 | 0 | 0 | -0.568019 | -0.232900 | 2.348925 | 1.291558 | -0.065439 | -0.853789 | -1.349389 | 0 | 0.880469 |
| 691 | 0 | 1 | 0 | 0 | -0.568019 | 0.903199 | -0.654143 | -0.533522 | 0.562059 | -0.853789 | -1.114186 | 1 | 0.880469 |
| 692 | 1 | 0 | 0 | 0 | -0.568019 | -1.368999 | -1.016322 | -1.209478 | -0.379188 | -0.853789 | -0.408580 | 0 | 0.880469 |
| 693 | 1 | 0 | 0 | 0 | -0.568019 | -1.368999 | 0.190942 | -0.939096 | -0.692937 | -0.853789 | -0.408580 | 1 | -0.919030 |
| 694 | 0 | 0 | 0 | 1 | -0.568019 | -0.232900 | 1.036026 | 0.074838 | 0.562059 | -0.853789 | -0.408580 | 0 | -0.019280 |
| 695 | 1 | 0 | 0 | 0 | -0.568019 | -0.232900 | -0.654143 | -0.533522 | 0.562059 | -0.853789 | -1.114186 | 1 | 0.880469 |
| 696 | 1 | 0 | 0 | 0 | -0.568019 | -0.232900 | 0.040034 | -0.263140 | -1.320435 | -0.853789 | -0.643782 | 0 | -0.019280 |
| 697 | 1 | 0 | 0 | 0 | -0.568019 | 0.335149 | 1.624567 | -0.939096 | -1.320435 | -0.853789 | -0.408580 | 1 | -0.919030 |
| 698 | 0 | 0 | 0 | 1 | -0.568019 | 0.335149 | 0.190942 | -0.939096 | -0.692937 | -0.853789 | -0.408580 | 1 | -0.919030 |
| 699 | 0 | 0 | 0 | 1 | -0.568019 | 0.335149 | 1.036026 | 0.074838 | 0.562059 | -0.853789 | -0.408580 | 0 | -0.019280 |
700 rows × 13 columns
scaled_inputs.shape
(700, 13)
# Split the data into train and test
from sklearn.model_selection import train_test_split
train_test_split(scaled_inputs, targets)
[ Reason_1 Reason_2 Reason_3 Reason_4 Month Value Day of the Week \
270 1 0 0 0 -0.568019 -1.368999
555 1 0 0 0 -0.568019 0.903199
106 0 0 0 1 0.929019 -1.368999
388 0 0 0 1 -1.466241 -0.232900
1 0 0 0 0 0.030796 -0.800950
415 0 0 0 1 0.929019 -0.232900
670 0 0 0 1 -0.867426 -0.800950
611 1 0 0 0 -1.466241 -1.368999
141 0 0 0 1 1.228426 -0.232900
416 0 0 0 1 0.929019 -0.232900
500 0 0 0 1 0.330204 -0.232900
184 0 0 0 1 -0.268611 1.471248
253 0 0 1 0 1.228426 -0.800950
525 1 0 0 0 0.929019 0.903199
577 0 0 1 0 1.527833 1.471248
665 0 0 0 1 -0.867426 0.903199
526 1 0 0 0 0.929019 0.903199
619 0 0 0 1 -0.568019 0.335149
605 0 0 0 1 -1.466241 -0.232900
173 1 0 0 0 -1.166834 -0.800950
151 0 0 1 0 -1.466241 -1.368999
87 1 0 0 0 1.228426 -1.368999
343 0 0 0 1 -0.268611 2.039298
276 0 0 0 0 0.629611 -0.800950
167 1 0 0 0 -1.166834 -0.800950
117 0 0 0 1 -0.268611 -0.232900
366 0 0 0 1 -1.765648 -1.368999
204 1 0 0 0 -0.867426 -0.800950
214 0 0 0 0 -0.568019 -0.232900
53 0 0 0 1 0.629611 -0.800950
.. ... ... ... ... ... ...
321 0 0 0 1 1.228426 -1.368999
257 1 0 0 0 0.929019 1.471248
672 1 0 0 0 -0.867426 0.335149
509 0 0 0 1 -0.268611 1.471248
493 0 0 0 1 0.330204 0.335149
626 0 0 0 1 0.330204 0.903199
693 1 0 0 0 -0.568019 -1.368999
441 0 0 0 1 -0.568019 1.471248
467 0 0 0 1 0.030796 -0.800950
10 1 0 0 0 0.030796 -1.368999
194 1 0 0 0 0.330204 0.335149
681 1 0 0 0 0.929019 0.903199
678 0 0 0 1 0.629611 -0.232900
110 0 0 0 1 1.228426 0.335149
223 0 0 0 1 -0.268611 0.335149
455 1 0 0 0 -0.268611 -0.232900
142 0 0 0 1 1.527833 0.903199
674 0 0 0 1 0.030796 0.335149
314 1 0 0 0 0.929019 0.903199
40 0 0 0 1 -1.765648 0.903199
23 0 0 0 1 0.330204 0.903199
92 1 0 0 0 1.228426 0.903199
682 0 0 1 0 1.228426 -1.368999
22 1 0 0 0 0.929019 0.335149
68 0 0 0 1 -0.268611 -0.232900
20 1 0 0 0 1.527833 -0.800950
538 1 0 0 0 1.228426 0.903199
120 0 0 0 1 0.330204 -1.368999
176 1 0 0 0 -1.166834 -1.368999
466 0 0 0 1 0.030796 -1.368999
Transportation Expense Distance to Work Age \
270 -0.654143 1.426749 0.248310
555 -0.654143 1.426749 0.248310
106 0.040034 -0.263140 -1.320435
388 -0.654143 1.426749 0.248310
1 -1.574681 -1.141882 2.130803
415 2.213108 -0.871500 -0.849811
670 -1.016322 -1.209478 -0.379188
611 0.040034 -0.263140 -1.320435
141 -0.503235 -0.060353 -0.536062
416 0.387122 -0.330735 1.660180
500 -0.654143 -0.263140 -1.006686
184 1.036026 0.074838 0.562059
253 -0.986140 -0.195544 -1.163560
525 2.213108 -0.871500 -0.849811
577 -0.654143 -0.533522 0.562059
665 0.190942 -1.277074 0.091435
526 0.040034 -0.263140 -1.320435
619 0.387122 -0.330735 1.660180
605 -0.654143 1.426749 0.248310
173 -0.654143 1.426749 0.248310
151 1.624567 -0.939096 -1.320435
87 1.036026 0.074838 0.562059
343 -1.574681 -1.141882 2.130803
276 0.130578 0.345220 0.405184
167 -1.016322 -1.209478 -0.379188
117 0.040034 -0.263140 -1.320435
366 -1.574681 -1.141882 2.130803
204 1.005844 0.412816 -0.536062
214 1.624567 -0.939096 -1.320435
53 -1.574681 -1.344669 0.091435
.. ... ... ...
321 1.036026 0.074838 0.562059
257 1.171843 -0.263140 1.032682
672 -1.574681 -1.141882 2.130803
509 0.356940 -0.330735 0.718933
493 1.036026 0.074838 0.562059
626 0.040034 -0.263140 -1.320435
693 0.190942 -0.939096 -0.692937
441 -1.574681 -1.344669 0.091435
467 -1.574681 -1.344669 0.091435
10 0.568211 1.359154 -0.065439
194 0.356940 -0.330735 0.718933
681 0.040034 -1.006691 0.718933
678 0.190942 -0.668713 1.032682
110 -1.574681 -1.344669 0.091435
223 1.036026 0.074838 0.562059
455 -0.654143 1.426749 0.248310
142 0.568211 1.359154 -0.065439
674 0.190942 -1.277074 0.091435
314 -0.654143 -0.263140 -1.006686
40 -0.578689 0.818389 -1.477309
23 0.568211 1.359154 -0.065439
92 0.040034 -0.263140 -1.320435
682 1.171843 -0.263140 1.032682
22 2.092381 1.494345 -1.320435
68 -1.574681 -1.344669 0.091435
20 1.624567 -0.939096 -1.320435
538 0.190942 -0.939096 -0.692937
120 0.040034 -0.263140 -1.320435
176 -0.654143 -0.263140 -1.006686
466 0.568211 1.359154 -0.065439
Daily Work Load Average Body Mass Index Education Children
270 0.560476 1.002633 0 -0.919030
555 0.218718 1.002633 0 -0.919030
106 -0.262439 -0.643782 0 -0.019280
388 -0.499679 1.002633 0 -0.919030
1 -0.806331 1.002633 0 -0.019280
415 -0.809957 -0.408580 0 1.780219
670 -0.637953 -0.408580 0 0.880469
611 -0.188851 -0.643782 0 -0.019280
141 0.769711 -0.408580 0 0.880469
416 -0.809957 1.237836 0 0.880469
500 -0.251187 -1.819793 1 -0.919030
184 1.366488 -0.408580 0 -0.019280
253 -0.154696 -1.114186 0 -0.919030
525 0.326336 -0.408580 0 1.780219
577 1.043433 -1.114186 1 0.880469
665 -0.637953 0.532229 1 -0.019280
526 0.326336 -0.643782 0 -0.019280
619 -1.240355 1.237836 0 0.880469
605 -0.188851 1.002633 0 -0.919030
173 1.786584 1.002633 0 -0.919030
151 0.769711 -0.408580 1 -0.919030
87 0.863727 -0.408580 0 -0.019280
343 -0.879469 1.002633 0 -0.019280
276 0.560476 1.943442 0 0.880469
167 1.786584 -0.408580 0 0.880469
117 0.919937 -0.643782 0 -0.019280
366 1.456728 1.002633 0 -0.019280
204 2.677510 0.767431 0 0.880469
214 2.677510 -0.408580 1 -0.919030
53 -0.758273 0.297027 0 -0.919030
.. ... ... ... ...
321 0.305783 -0.408580 0 -0.019280
257 -0.154696 -0.408580 0 0.880469
672 -0.637953 1.002633 0 -0.019280
509 0.326336 -0.878984 0 -0.919030
493 -0.550213 -0.408580 0 -0.019280
626 -1.240355 -0.643782 0 -0.019280
693 -0.853789 -0.408580 1 -0.919030
441 -0.446195 0.297027 0 -0.919030
467 -1.037971 0.297027 0 -0.919030
10 -0.806331 -0.878984 0 2.679969
194 1.366488 -0.878984 0 -0.919030
681 -0.853789 0.297027 1 0.880469
678 -0.853789 2.649049 0 -0.019280
110 -0.262439 0.297027 0 -0.919030
223 2.644155 -0.408580 0 -0.019280
455 -0.446195 1.002633 0 -0.919030
142 0.769711 -0.878984 0 2.679969
674 -0.853789 0.532229 1 -0.019280
314 -0.169648 -1.819793 1 -0.919030
40 -0.758273 -1.349389 0 -0.919030
23 -1.647399 -0.878984 0 2.679969
92 0.863727 -0.643782 0 -0.019280
682 -0.853789 -0.408580 0 0.880469
22 -1.647399 0.061825 0 -0.019280
68 -0.458497 0.297027 0 -0.919030
20 -1.647399 -0.408580 1 -0.919030
538 -0.082083 -0.408580 1 -0.919030
120 0.919937 -0.643782 0 -0.019280
176 1.786584 -1.819793 1 -0.919030
466 -1.037971 -0.878984 0 2.679969
[525 rows x 13 columns],
Reason_1 Reason_2 Reason_3 Reason_4 Month Value Day of the Week \
636 0 0 0 1 -1.166834 -1.368999
574 1 0 0 0 0.330204 -0.232900
226 0 0 0 1 -0.268611 -1.368999
621 0 0 0 1 -0.268611 2.039298
50 0 0 0 0 0.629611 -1.368999
306 0 0 0 1 0.929019 0.335149
675 0 0 1 0 0.629611 -0.232900
285 0 0 0 0 0.629611 0.335149
426 0 0 0 1 -1.166834 2.039298
292 0 0 0 1 -0.268611 0.903199
399 0 0 0 1 -1.166834 -0.232900
60 0 0 0 1 0.629611 0.903199
518 0 0 0 1 0.929019 -0.800950
694 0 0 0 1 -0.568019 -0.232900
400 0 0 0 0 -1.166834 -0.232900
284 0 0 0 1 0.629611 0.335149
476 0 0 0 1 0.030796 -1.368999
36 0 0 0 1 -0.867426 -0.232900
503 0 0 0 1 0.629611 -0.232900
697 1 0 0 0 -0.568019 0.335149
614 0 0 0 1 -1.466241 -0.800950
635 1 0 0 0 -1.166834 -1.368999
405 0 0 0 0 -1.166834 0.335149
46 0 0 0 1 0.629611 -1.368999
353 1 0 0 0 1.527833 -0.800950
188 0 0 0 1 -0.867426 0.335149
648 1 0 0 0 -1.166834 -0.232900
55 0 0 0 0 0.629611 -0.800950
172 1 0 0 0 -1.166834 -1.368999
581 0 0 0 1 -1.765648 -0.232900
.. ... ... ... ... ... ...
679 1 0 0 0 0.929019 0.903199
34 0 0 0 1 0.330204 -1.368999
558 1 0 0 0 0.330204 1.471248
339 1 0 0 0 -0.568019 0.335149
189 0 0 0 1 -0.867426 0.903199
180 1 0 0 0 -1.166834 0.335149
472 0 0 0 1 0.030796 -0.800950
78 0 0 0 1 0.929019 0.903199
690 0 0 0 0 -0.568019 -0.232900
404 1 0 0 0 -1.166834 -0.232900
661 0 1 0 0 0.929019 0.335149
218 1 0 0 0 1.228426 1.471248
662 0 0 0 1 0.929019 0.335149
453 0 0 0 1 -0.268611 -0.232900
174 0 0 0 1 -1.166834 -0.232900
475 0 0 0 1 0.030796 -1.368999
287 1 0 0 0 0.629611 -1.368999
657 0 0 1 0 -0.867426 -0.232900
643 0 0 0 1 -1.166834 0.335149
185 0 0 0 1 -0.867426 -0.232900
385 0 0 0 1 -1.466241 -0.232900
208 0 0 1 0 -1.166834 1.471248
531 1 0 0 0 0.929019 -0.800950
434 0 0 1 0 -0.568019 -0.232900
638 0 0 0 1 -1.166834 -0.800950
66 0 0 0 1 0.929019 0.903199
495 0 0 0 1 -0.568019 -0.800950
637 1 0 0 0 -1.166834 -1.368999
465 0 0 0 1 0.030796 0.903199
418 0 0 0 1 1.527833 -1.368999
Transportation Expense Distance to Work Age \
636 -0.654143 1.426749 0.248310
574 -0.654143 -0.263140 -1.006686
226 -1.016322 -1.209478 -0.379188
621 0.387122 -0.330735 1.660180
50 0.568211 1.359154 -0.065439
306 -1.574681 -1.141882 2.130803
675 0.040034 -1.006691 0.718933
285 0.190942 -0.668713 1.032682
426 0.387122 -0.330735 1.660180
292 1.005844 0.412816 -0.536062
399 2.092381 1.494345 -1.320435
60 -0.654143 1.426749 0.248310
518 0.040034 -0.263140 -1.320435
694 1.036026 0.074838 0.562059
400 2.213108 -0.871500 -0.849811
284 -1.574681 -1.141882 2.130803
476 0.190942 -0.668713 1.032682
36 1.005844 0.412816 -0.536062
503 0.040034 -0.263140 -1.320435
697 1.624567 -0.939096 -1.320435
614 0.040034 -0.263140 -1.320435
635 0.387122 -0.330735 1.660180
405 0.190942 -1.277074 0.091435
46 -0.654143 1.426749 0.248310
353 0.190942 -1.277074 0.091435
188 -1.016322 -1.209478 -0.379188
648 -1.016322 -1.209478 -0.379188
55 -1.574681 -1.141882 2.130803
172 0.854936 -1.682647 0.405184
581 -0.654143 1.426749 0.248310
.. ... ... ...
679 -0.654143 -0.263140 -1.006686
34 -0.654143 1.426749 0.248310
558 -0.654143 -0.263140 -1.006686
339 0.040034 -0.263140 -1.320435
189 0.040034 -0.263140 -1.320435
180 0.854936 -1.682647 0.405184
472 -0.654143 1.426749 0.248310
78 2.092381 1.494345 -1.320435
690 2.348925 1.291558 -0.065439
404 -1.574681 -1.141882 2.130803
661 -0.654143 -0.533522 0.562059
218 -1.574681 -1.141882 2.130803
662 -1.574681 -1.141882 2.130803
453 0.040034 -0.263140 -1.320435
174 0.040034 -0.263140 -1.320435
475 1.005844 0.412816 -0.536062
287 1.036026 0.074838 0.562059
657 0.387122 -0.330735 1.660180
643 -0.654143 1.426749 0.248310
185 0.040034 -0.263140 -1.320435
385 -0.654143 1.426749 0.248310
208 0.040034 -0.263140 -1.320435
531 0.040034 -0.263140 -1.320435
434 0.085306 -1.074287 3.385799
638 -0.654143 1.426749 0.248310
66 -0.654143 1.426749 0.248310
495 0.356940 -0.330735 0.718933
637 0.040034 -0.263140 -1.320435
465 -1.574681 -1.344669 0.091435
418 -0.654143 1.426749 0.248310
Daily Work Load Average Body Mass Index Education Children
636 -1.240355 1.002633 0 -0.919030
574 1.043433 -1.819793 1 -0.919030
226 2.644155 -0.408580 0 0.880469
621 -1.240355 1.237836 0 0.880469
50 -0.758273 -0.878984 0 2.679969
306 -0.169648 1.002633 0 -0.019280
675 -0.853789 0.297027 1 0.880469
285 0.560476 2.649049 0 -0.019280
426 -0.643304 1.237836 0 0.880469
292 -0.169648 0.767431 0 0.880469
399 -0.685486 0.061825 0 -0.019280
60 -0.758273 1.002633 0 -0.919030
518 0.326336 -0.643782 0 -0.019280
694 -0.853789 -0.408580 0 -0.019280
400 -0.685486 -0.408580 0 1.780219
284 0.560476 1.002633 0 -0.019280
476 -1.037971 2.649049 0 -0.019280
36 -1.647399 0.767431 0 0.880469
503 -0.251187 -0.643782 0 -0.019280
697 -0.853789 -0.408580 1 -0.919030
614 -0.188851 -0.643782 0 -0.019280
635 -1.240355 1.237836 0 0.880469
405 -0.685486 0.532229 1 -0.019280
46 -0.758273 1.002633 0 -0.919030
353 -0.879469 0.532229 1 -0.019280
188 1.366488 -0.408580 0 0.880469
648 -1.240355 -0.408580 0 0.880469
55 -0.758273 1.002633 0 -0.019280
172 1.786584 -0.643782 0 0.880469
581 1.043433 1.002633 0 -0.919030
.. ... ... ... ...
679 -0.853789 -1.819793 1 -0.919030
34 -1.647399 1.002633 0 -0.919030
558 0.218718 -1.819793 1 -0.919030
339 -0.879469 -0.643782 0 -0.019280
189 1.366488 -0.643782 0 -0.019280
180 1.786584 -0.643782 0 0.880469
472 -1.037971 1.002633 0 -0.919030
78 -0.458497 0.061825 0 -0.019280
690 -0.853789 -1.349389 0 0.880469
404 -0.685486 1.002633 0 -0.019280
661 -0.637953 -1.114186 1 0.880469
218 2.677510 1.002633 0 -0.019280
662 -0.637953 1.002633 0 -0.019280
453 -0.446195 -0.643782 0 -0.019280
174 1.786584 -0.643782 0 -0.019280
475 -1.037971 0.767431 0 0.880469
287 0.560476 -0.408580 0 -0.019280
657 -0.637953 1.237836 0 0.880469
643 -1.240355 1.002633 0 -0.919030
185 1.366488 -0.643782 0 -0.019280
385 -0.499679 1.002633 0 -0.919030
208 2.677510 -0.643782 0 -0.019280
531 0.326336 -0.643782 0 -0.019280
434 -0.643304 -1.114186 0 0.880469
638 -1.240355 1.002633 0 -0.919030
66 -0.458497 1.002633 0 -0.919030
495 -0.251187 -0.878984 0 -0.919030
637 -1.240355 -0.643782 0 -0.019280
465 -1.037971 0.297027 0 -0.919030
418 -0.809957 1.002633 0 -0.919030
[175 rows x 13 columns],
array([1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1,
0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1,
1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1,
0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1,
0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1,
1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0,
1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0,
0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0,
1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0,
0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0,
0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1,
1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1,
0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0,
0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1,
1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1,
0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1,
0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1,
0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0,
0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0,
1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1,
1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1,
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1]),
array([0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0,
0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0,
1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1,
0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1,
0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1,
1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1,
0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1])]
X_train, X_test, y_train, y_test=train_test_split(scaled_inputs, targets, train_size = 0.8, random_state = 20)
C:\Users\User\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py:2179: FutureWarning: From version 0.21, test_size will always complement train_size unless both are specified. FutureWarning)
print (X_train.shape, y_train.shape)
(560, 13) (560,)
print (X_test.shape, y_test.shape)
(140, 13) (140,)
#Logistic regression with sklearn
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
reg = LogisticRegression()
reg.fit(X_train, y_train)
C:\Users\User\Anaconda3\lib\site-packages\sklearn\linear_model\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning. FutureWarning)
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
intercept_scaling=1, max_iter=100, multi_class='warn',
n_jobs=None, penalty='l2', random_state=None, solver='warn',
tol=0.0001, verbose=0, warm_start=False)
reg.score(X_train, y_train)
0.775
#Manual Check
model_outputs = reg.predict(X_train)
model_outputs
array([0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0,
0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1,
1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0,
0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0,
0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0,
1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1,
1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1,
1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0,
0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0,
0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0,
1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1,
0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1,
0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0,
0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0,
0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0,
0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0,
0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1,
0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0,
0, 1, 0, 1, 1, 1, 0, 0, 1, 0])
y_train
array([0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0,
1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1,
1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0,
0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1,
1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0,
0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1,
0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1,
1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0,
1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0,
0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0,
1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0,
0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1,
1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1,
0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0,
1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0,
0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1,
0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,
1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0,
1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1,
0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0,
0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0,
0, 0, 0, 1, 1, 1, 1, 0, 1, 0])
model_outputs == y_train
array([ True, True, False, True, True, True, True, True, True,
True, False, True, False, False, True, True, True, True,
False, True, False, True, False, False, True, True, True,
False, True, True, True, True, True, True, True, True,
False, False, True, False, True, False, True, True, True,
True, True, True, True, True, False, True, True, True,
True, True, True, True, True, False, True, True, True,
True, True, True, True, False, True, False, True, True,
True, True, True, False, True, True, True, True, True,
False, True, False, True, True, False, False, False, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, False, True, True, True, True,
True, True, True, True, True, False, True, True, True,
True, True, True, True, True, False, True, True, True,
True, False, True, True, True, True, True, False, False,
True, False, True, False, True, True, True, True, False,
False, False, True, True, False, False, False, True, True,
True, False, True, False, True, False, True, False, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, False, True, True, True,
True, False, True, True, True, True, True, True, True,
True, True, True, True, True, False, False, True, False,
False, True, True, True, True, True, True, True, False,
True, False, True, False, True, True, True, True, False,
True, False, False, True, True, True, True, True, False,
False, False, True, False, True, True, True, True, True,
True, True, True, True, True, True, False, True, True,
True, True, True, True, True, True, True, True, True,
True, False, False, True, True, True, True, True, True,
False, True, True, True, True, True, True, False, False,
False, True, True, True, True, False, True, False, True,
True, True, True, True, True, True, False, True, False,
False, True, True, True, True, True, False, True, True,
True, True, False, False, True, False, True, True, True,
True, True, False, True, True, False, True, True, False,
False, False, True, True, True, True, False, True, False,
True, True, True, False, False, True, True, True, False,
True, False, True, True, True, False, True, True, True,
True, True, True, True, True, True, True, True, True,
False, True, True, False, True, False, True, True, True,
True, True, True, True, True, True, True, False, True,
True, True, True, False, True, True, True, False, True,
True, True, True, True, True, True, True, False, True,
True, True, True, True, True, False, True, True, True,
True, True, True, False, True, True, True, True, True,
True, True, True, False, True, False, True, True, True,
True, True, True, False, True, True, False, True, False,
True, True, True, True, True, False, False, True, True,
True, True, True, True, True, True, True, False, True,
False, True, True, True, False, False, True, True, True,
True, False, True, True, True, True, True, True, True,
True, True, False, True, True, False, False, True, True,
False, True, True, True, True, True, True, False, True,
True, True, False, True, True, True, True, True, True,
True, True, True, True, False, True, True, True, True,
True, False, True, True, False, False, True, True, True,
False, True, True, True, True, True, False, True, True,
False, False, False, True, True, False, True, True, True,
False, True, True, True, True, True, True, True, True,
True, True, True, False, True, True, True, True, True,
True, True, False, True, True, True, True, False, True,
True, True])
np.sum(model_outputs == y_train)
434
model_outputs.shape[0]
560
np.sum(model_outputs == y_train) / model_outputs.shape[0]
0.775
#Finding intercept and coefficients
reg.intercept_
array([-1.44858081])
reg.coef_
array([[ 2.63259611, 0.86935993, 2.81289745, 0.65092627, 0.00723232,
-0.07199168, 0.49781562, -0.03874266, -0.12321956, -0.02108164,
0.26813826, -0.23006247, 0.37360592]])
unscaled_inputs.columns.values
array(['Reason_1', 'Reason_2', 'Reason_3', 'Reason_4', 'Month Value',
'Day of the Week', 'Transportation Expense', 'Distance to Work',
'Age', 'Daily Work Load Average', 'Body Mass Index', 'Education',
'Children'], dtype=object)
feature_name = unscaled_inputs.columns.values
summary_table = pd.DataFrame (columns=['Feature name'], data = feature_name)
summary_table['Coefficient'] = np.transpose(reg.coef_)
summary_table
| Feature name | Coefficient | |
|---|---|---|
| 0 | Reason_1 | 2.632596 |
| 1 | Reason_2 | 0.869360 |
| 2 | Reason_3 | 2.812897 |
| 3 | Reason_4 | 0.650926 |
| 4 | Month Value | 0.007232 |
| 5 | Day of the Week | -0.071992 |
| 6 | Transportation Expense | 0.497816 |
| 7 | Distance to Work | -0.038743 |
| 8 | Age | -0.123220 |
| 9 | Daily Work Load Average | -0.021082 |
| 10 | Body Mass Index | 0.268138 |
| 11 | Education | -0.230062 |
| 12 | Children | 0.373606 |
summary_table.index = summary_table.index + 1
summary_table.loc[0] = ['Intercept', reg.intercept_[0]]
summay_table = summary_table.sort_index()
summary_table
| Feature name | Coefficient | |
|---|---|---|
| 1 | Reason_1 | 2.632596 |
| 2 | Reason_2 | 0.869360 |
| 3 | Reason_3 | 2.812897 |
| 4 | Reason_4 | 0.650926 |
| 5 | Month Value | 0.007232 |
| 6 | Day of the Week | -0.071992 |
| 7 | Transportation Expense | 0.497816 |
| 8 | Distance to Work | -0.038743 |
| 9 | Age | -0.123220 |
| 10 | Daily Work Load Average | -0.021082 |
| 11 | Body Mass Index | 0.268138 |
| 12 | Education | -0.230062 |
| 13 | Children | 0.373606 |
| 0 | Intercept | -1.448581 |
summary_table['Odd ratio'] = np.exp(summary_table.Coefficient)
summary_table
| Feature name | Coefficient | Odd ratio | |
|---|---|---|---|
| 1 | Reason_1 | 2.632596 | 13.909835 |
| 2 | Reason_2 | 0.869360 | 2.385384 |
| 3 | Reason_3 | 2.812897 | 16.658114 |
| 4 | Reason_4 | 0.650926 | 1.917316 |
| 5 | Month Value | 0.007232 | 1.007259 |
| 6 | Day of the Week | -0.071992 | 0.930539 |
| 7 | Transportation Expense | 0.497816 | 1.645124 |
| 8 | Distance to Work | -0.038743 | 0.961998 |
| 9 | Age | -0.123220 | 0.884070 |
| 10 | Daily Work Load Average | -0.021082 | 0.979139 |
| 11 | Body Mass Index | 0.268138 | 1.307528 |
| 12 | Education | -0.230062 | 0.794484 |
| 13 | Children | 0.373606 | 1.452964 |
| 0 | Intercept | -1.448581 | 0.234903 |
summary_table.sort_values('Odd ratio', ascending=False)
| Feature name | Coefficient | Odd ratio | |
|---|---|---|---|
| 3 | Reason_3 | 2.812897 | 16.658114 |
| 1 | Reason_1 | 2.632596 | 13.909835 |
| 2 | Reason_2 | 0.869360 | 2.385384 |
| 4 | Reason_4 | 0.650926 | 1.917316 |
| 7 | Transportation Expense | 0.497816 | 1.645124 |
| 13 | Children | 0.373606 | 1.452964 |
| 11 | Body Mass Index | 0.268138 | 1.307528 |
| 5 | Month Value | 0.007232 | 1.007259 |
| 10 | Daily Work Load Average | -0.021082 | 0.979139 |
| 8 | Distance to Work | -0.038743 | 0.961998 |
| 6 | Day of the Week | -0.071992 | 0.930539 |
| 9 | Age | -0.123220 | 0.884070 |
| 12 | Education | -0.230062 | 0.794484 |
| 0 | Intercept | -1.448581 | 0.234903 |
#Test the model
reg.score(X_test, y_test)
0.7357142857142858
predicted_proba = reg.predict_proba(X_test) predicted_proba
predicted_proba.shape
(140, 2)
predicted_proba[:,1]
array([0.22123841, 0.36746562, 0.50004073, 0.2089951 , 0.90917697,
0.69301566, 0.77107214, 0.8859558 , 0.29549433, 0.22275347,
0.60483662, 0.73858908, 0.90619582, 0.34200406, 0.74793341,
0.40182971, 0.5876329 , 0.55788212, 0.68102628, 0.92943054,
0.31492142, 0.21583614, 0.56698637, 0.54895219, 0.79581917,
0.29599005, 0.44955041, 0.11650185, 0.71867643, 0.22283802,
0.39170243, 0.77004683, 0.66038918, 0.60501965, 0.21583614,
0.55420036, 0.30950994, 0.73126219, 0.4762001 , 0.53951185,
0.23294667, 0.43933879, 0.29219299, 0.38449721, 0.83762964,
0.61556479, 0.73604752, 0.23015421, 0.23219654, 0.20045226,
0.49883448, 0.25661573, 0.67030132, 0.22865307, 0.83616221,
0.40858324, 0.93437215, 0.28932031, 0.31364181, 0.30863997,
0.69642044, 0.64430772, 0.28256538, 0.80449431, 0.3206113 ,
0.2233284 , 0.07984234, 0.2970576 , 0.70680172, 0.34453101,
0.30245977, 0.27141705, 0.87740585, 0.4721832 , 0.58892078,
0.22283802, 0.73379547, 0.76353833, 0.69165819, 0.7104776 ,
0.31898675, 0.10771716, 0.27265444, 0.75448359, 0.45855206,
0.11668418, 0.67523916, 0.54083882, 0.24313306, 0.76272556,
0.19101949, 0.12667202, 0.23921305, 0.21523928, 0.20891439,
0.86219705, 0.21096595, 0.74876632, 0.2087725 , 0.21414747,
0.57956994, 0.77720819, 0.66970792, 0.63896825, 0.46203675,
0.46309751, 0.20777203, 0.84696265, 0.70341205, 0.15023138,
0.10739748, 0.90589125, 0.61112564, 0.39813828, 0.54901137,
0.56430009, 0.73877163, 0.85001406, 0.60503429, 0.33763649,
0.21222172, 0.12312327, 0.74375883, 0.49875672, 0.23153799,
0.31677329, 0.21174627, 0.14713078, 0.66145911, 0.32819144,
0.57901301, 0.21977403, 0.21528198, 0.32006295, 0.27893506,
0.55763394, 0.55818079, 0.32170334, 0.22976194, 0.49689852])