# Import the relevant libraries
import pandas as pd
import numpy as np
data_preprocessed = pd.read_csv('Absenteeism-preprocessed.csv')
data_preprocessed.head()
Reason_1 | Reason_2 | Reason_3 | Reason_4 | Month Value | Day of the Week | Transportation Expense | Distance to Work | Age | Daily Work Load Average | Body Mass Index | Education | Children | Pet | Absenteeism Time in Hours | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 0 | 0 | 1 | 7 | 1 | 289 | 36 | 33 | 239.554 | 30 | 0 | 2 | 1 | 4 |
1 | 0 | 0 | 0 | 0 | 7 | 1 | 118 | 13 | 50 | 239.554 | 31 | 0 | 1 | 0 | 0 |
2 | 0 | 0 | 0 | 1 | 7 | 2 | 179 | 51 | 38 | 239.554 | 31 | 0 | 0 | 0 | 2 |
3 | 1 | 0 | 0 | 0 | 7 | 3 | 279 | 5 | 39 | 239.554 | 24 | 0 | 2 | 0 | 4 |
4 | 0 | 0 | 0 | 1 | 7 | 3 | 289 | 36 | 33 | 239.554 | 30 | 0 | 2 | 1 | 2 |
# Base line classification creation
data_preprocessed['Absenteeism Time in Hours'].median()
3.0
targets =np.where(data_preprocessed['Absenteeism Time in Hours']>
data_preprocessed['Absenteeism Time in Hours'].median(), 1, 0)
targets
array([1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0])
data_preprocessed['Absenteeism Time in Hours'] = targets
data_preprocessed.head()
Reason_1 | Reason_2 | Reason_3 | Reason_4 | Month Value | Day of the Week | Transportation Expense | Distance to Work | Age | Daily Work Load Average | Body Mass Index | Education | Children | Pet | Absenteeism Time in Hours | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 0 | 0 | 1 | 7 | 1 | 289 | 36 | 33 | 239.554 | 30 | 0 | 2 | 1 | 1 |
1 | 0 | 0 | 0 | 0 | 7 | 1 | 118 | 13 | 50 | 239.554 | 31 | 0 | 1 | 0 | 0 |
2 | 0 | 0 | 0 | 1 | 7 | 2 | 179 | 51 | 38 | 239.554 | 31 | 0 | 0 | 0 | 0 |
3 | 1 | 0 | 0 | 0 | 7 | 3 | 279 | 5 | 39 | 239.554 | 24 | 0 | 2 | 0 | 1 |
4 | 0 | 0 | 0 | 1 | 7 | 3 | 289 | 36 | 33 | 239.554 | 30 | 0 | 2 | 1 | 0 |
targets.sum()/ targets.shape[0]
0.45571428571428574
data_with_targets = data_preprocessed.drop(['Absenteeism Time in Hours'], axis=1)
data_with_targets is data_preprocessed
False
data_with_targets.head()
Reason_1 | Reason_2 | Reason_3 | Reason_4 | Month Value | Day of the Week | Transportation Expense | Distance to Work | Age | Daily Work Load Average | Body Mass Index | Education | Children | Pet | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 0 | 0 | 1 | 7 | 1 | 289 | 36 | 33 | 239.554 | 30 | 0 | 2 | 1 |
1 | 0 | 0 | 0 | 0 | 7 | 1 | 118 | 13 | 50 | 239.554 | 31 | 0 | 1 | 0 |
2 | 0 | 0 | 0 | 1 | 7 | 2 | 179 | 51 | 38 | 239.554 | 31 | 0 | 0 | 0 |
3 | 1 | 0 | 0 | 0 | 7 | 3 | 279 | 5 | 39 | 239.554 | 24 | 0 | 2 | 0 |
4 | 0 | 0 | 0 | 1 | 7 | 3 | 289 | 36 | 33 | 239.554 | 30 | 0 | 2 | 1 |
#Selecting input for regression
data_with_targets.shape
(700, 14)
data_with_targets.iloc[:,0:14]
Reason_1 | Reason_2 | Reason_3 | Reason_4 | Month Value | Day of the Week | Transportation Expense | Distance to Work | Age | Daily Work Load Average | Body Mass Index | Education | Children | Pet | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 0 | 0 | 1 | 7 | 1 | 289 | 36 | 33 | 239.554 | 30 | 0 | 2 | 1 |
1 | 0 | 0 | 0 | 0 | 7 | 1 | 118 | 13 | 50 | 239.554 | 31 | 0 | 1 | 0 |
2 | 0 | 0 | 0 | 1 | 7 | 2 | 179 | 51 | 38 | 239.554 | 31 | 0 | 0 | 0 |
3 | 1 | 0 | 0 | 0 | 7 | 3 | 279 | 5 | 39 | 239.554 | 24 | 0 | 2 | 0 |
4 | 0 | 0 | 0 | 1 | 7 | 3 | 289 | 36 | 33 | 239.554 | 30 | 0 | 2 | 1 |
5 | 0 | 0 | 0 | 1 | 10 | 2 | 179 | 51 | 38 | 239.554 | 31 | 0 | 0 | 0 |
6 | 0 | 0 | 0 | 1 | 7 | 4 | 361 | 52 | 28 | 239.554 | 27 | 0 | 1 | 4 |
7 | 0 | 0 | 0 | 1 | 7 | 4 | 260 | 50 | 36 | 239.554 | 23 | 0 | 4 | 0 |
8 | 0 | 0 | 1 | 0 | 6 | 6 | 155 | 12 | 34 | 239.554 | 25 | 0 | 2 | 0 |
9 | 0 | 0 | 0 | 1 | 7 | 0 | 235 | 11 | 37 | 239.554 | 29 | 1 | 1 | 1 |
10 | 1 | 0 | 0 | 0 | 7 | 0 | 260 | 50 | 36 | 239.554 | 23 | 0 | 4 | 0 |
11 | 1 | 0 | 0 | 0 | 7 | 1 | 260 | 50 | 36 | 239.554 | 23 | 0 | 4 | 0 |
12 | 1 | 0 | 0 | 0 | 7 | 2 | 260 | 50 | 36 | 239.554 | 23 | 0 | 4 | 0 |
13 | 1 | 0 | 0 | 0 | 7 | 2 | 179 | 51 | 38 | 239.554 | 31 | 0 | 0 | 0 |
14 | 0 | 0 | 0 | 1 | 7 | 2 | 179 | 51 | 38 | 239.554 | 31 | 0 | 0 | 0 |
15 | 1 | 0 | 0 | 0 | 7 | 4 | 246 | 25 | 41 | 239.554 | 23 | 0 | 0 | 0 |
16 | 0 | 0 | 0 | 1 | 7 | 4 | 179 | 51 | 38 | 239.554 | 31 | 0 | 0 | 0 |
17 | 0 | 0 | 1 | 0 | 7 | 0 | 179 | 51 | 38 | 239.554 | 31 | 0 | 0 | 0 |
18 | 1 | 0 | 0 | 0 | 7 | 3 | 189 | 29 | 33 | 239.554 | 25 | 0 | 2 | 2 |
19 | 0 | 0 | 0 | 1 | 5 | 4 | 248 | 25 | 47 | 205.917 | 32 | 0 | 2 | 1 |
20 | 1 | 0 | 0 | 0 | 12 | 1 | 330 | 16 | 28 | 205.917 | 25 | 1 | 0 | 0 |
21 | 1 | 0 | 0 | 0 | 3 | 6 | 179 | 51 | 38 | 205.917 | 31 | 0 | 0 | 0 |
22 | 1 | 0 | 0 | 0 | 10 | 3 | 361 | 52 | 28 | 205.917 | 27 | 0 | 1 | 4 |
23 | 0 | 0 | 0 | 1 | 8 | 4 | 260 | 50 | 36 | 205.917 | 23 | 0 | 4 | 0 |
24 | 0 | 0 | 1 | 0 | 8 | 0 | 289 | 36 | 33 | 205.917 | 30 | 0 | 2 | 1 |
25 | 0 | 0 | 0 | 1 | 8 | 0 | 361 | 52 | 28 | 205.917 | 27 | 0 | 1 | 4 |
26 | 0 | 0 | 0 | 1 | 4 | 2 | 289 | 36 | 33 | 205.917 | 30 | 0 | 2 | 1 |
27 | 0 | 0 | 0 | 1 | 12 | 1 | 157 | 27 | 29 | 205.917 | 22 | 0 | 0 | 0 |
28 | 0 | 0 | 1 | 0 | 8 | 2 | 289 | 36 | 33 | 205.917 | 30 | 0 | 2 | 1 |
29 | 0 | 0 | 0 | 1 | 8 | 4 | 179 | 51 | 38 | 205.917 | 31 | 0 | 0 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
670 | 0 | 0 | 0 | 1 | 4 | 1 | 155 | 12 | 34 | 246.288 | 25 | 0 | 2 | 0 |
671 | 0 | 0 | 1 | 0 | 4 | 3 | 225 | 26 | 28 | 246.288 | 24 | 0 | 1 | 2 |
672 | 1 | 0 | 0 | 0 | 4 | 3 | 118 | 13 | 50 | 246.288 | 31 | 0 | 1 | 0 |
673 | 0 | 0 | 0 | 1 | 4 | 4 | 179 | 26 | 30 | 246.288 | 19 | 1 | 0 | 0 |
674 | 0 | 0 | 0 | 1 | 7 | 3 | 235 | 11 | 37 | 237.656 | 29 | 1 | 1 | 1 |
675 | 0 | 0 | 1 | 0 | 9 | 2 | 225 | 15 | 41 | 237.656 | 28 | 1 | 2 | 2 |
676 | 0 | 0 | 0 | 1 | 9 | 2 | 235 | 16 | 32 | 237.656 | 25 | 1 | 0 | 0 |
677 | 1 | 0 | 0 | 0 | 9 | 2 | 118 | 10 | 37 | 237.656 | 28 | 0 | 0 | 0 |
678 | 0 | 0 | 0 | 1 | 9 | 2 | 235 | 20 | 43 | 237.656 | 38 | 0 | 1 | 0 |
679 | 1 | 0 | 0 | 0 | 10 | 4 | 179 | 26 | 30 | 237.656 | 19 | 1 | 0 | 0 |
680 | 0 | 0 | 0 | 1 | 10 | 4 | 291 | 31 | 40 | 237.656 | 25 | 0 | 1 | 1 |
681 | 1 | 0 | 0 | 0 | 10 | 4 | 225 | 15 | 41 | 237.656 | 28 | 1 | 2 | 2 |
682 | 0 | 0 | 1 | 0 | 11 | 0 | 300 | 26 | 43 | 237.656 | 25 | 0 | 2 | 1 |
683 | 0 | 0 | 0 | 1 | 11 | 0 | 225 | 15 | 41 | 237.656 | 28 | 1 | 2 | 2 |
684 | 0 | 0 | 0 | 1 | 11 | 0 | 179 | 26 | 30 | 237.656 | 19 | 1 | 0 | 0 |
685 | 0 | 0 | 0 | 1 | 5 | 0 | 118 | 13 | 50 | 237.656 | 31 | 0 | 1 | 0 |
686 | 1 | 0 | 0 | 0 | 5 | 1 | 118 | 13 | 50 | 237.656 | 31 | 0 | 1 | 0 |
687 | 0 | 0 | 0 | 1 | 5 | 1 | 118 | 10 | 37 | 237.656 | 28 | 0 | 0 | 0 |
688 | 0 | 0 | 0 | 0 | 5 | 1 | 118 | 13 | 50 | 237.656 | 31 | 0 | 1 | 0 |
689 | 0 | 0 | 0 | 1 | 5 | 2 | 179 | 26 | 30 | 237.656 | 19 | 1 | 0 | 0 |
690 | 0 | 0 | 0 | 0 | 5 | 2 | 378 | 49 | 36 | 237.656 | 21 | 0 | 2 | 4 |
691 | 0 | 1 | 0 | 0 | 5 | 4 | 179 | 22 | 40 | 237.656 | 22 | 1 | 2 | 0 |
692 | 1 | 0 | 0 | 0 | 5 | 0 | 155 | 12 | 34 | 237.656 | 25 | 0 | 2 | 0 |
693 | 1 | 0 | 0 | 0 | 5 | 0 | 235 | 16 | 32 | 237.656 | 25 | 1 | 0 | 0 |
694 | 0 | 0 | 0 | 1 | 5 | 2 | 291 | 31 | 40 | 237.656 | 25 | 0 | 1 | 1 |
695 | 1 | 0 | 0 | 0 | 5 | 2 | 179 | 22 | 40 | 237.656 | 22 | 1 | 2 | 0 |
696 | 1 | 0 | 0 | 0 | 5 | 2 | 225 | 26 | 28 | 237.656 | 24 | 0 | 1 | 2 |
697 | 1 | 0 | 0 | 0 | 5 | 3 | 330 | 16 | 28 | 237.656 | 25 | 1 | 0 | 0 |
698 | 0 | 0 | 0 | 1 | 5 | 3 | 235 | 16 | 32 | 237.656 | 25 | 1 | 0 | 0 |
699 | 0 | 0 | 0 | 1 | 5 | 3 | 291 | 31 | 40 | 237.656 | 25 | 0 | 1 | 1 |
700 rows × 14 columns
data_with_targets.iloc[:,: -1]
Reason_1 | Reason_2 | Reason_3 | Reason_4 | Month Value | Day of the Week | Transportation Expense | Distance to Work | Age | Daily Work Load Average | Body Mass Index | Education | Children | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 0 | 0 | 1 | 7 | 1 | 289 | 36 | 33 | 239.554 | 30 | 0 | 2 |
1 | 0 | 0 | 0 | 0 | 7 | 1 | 118 | 13 | 50 | 239.554 | 31 | 0 | 1 |
2 | 0 | 0 | 0 | 1 | 7 | 2 | 179 | 51 | 38 | 239.554 | 31 | 0 | 0 |
3 | 1 | 0 | 0 | 0 | 7 | 3 | 279 | 5 | 39 | 239.554 | 24 | 0 | 2 |
4 | 0 | 0 | 0 | 1 | 7 | 3 | 289 | 36 | 33 | 239.554 | 30 | 0 | 2 |
5 | 0 | 0 | 0 | 1 | 10 | 2 | 179 | 51 | 38 | 239.554 | 31 | 0 | 0 |
6 | 0 | 0 | 0 | 1 | 7 | 4 | 361 | 52 | 28 | 239.554 | 27 | 0 | 1 |
7 | 0 | 0 | 0 | 1 | 7 | 4 | 260 | 50 | 36 | 239.554 | 23 | 0 | 4 |
8 | 0 | 0 | 1 | 0 | 6 | 6 | 155 | 12 | 34 | 239.554 | 25 | 0 | 2 |
9 | 0 | 0 | 0 | 1 | 7 | 0 | 235 | 11 | 37 | 239.554 | 29 | 1 | 1 |
10 | 1 | 0 | 0 | 0 | 7 | 0 | 260 | 50 | 36 | 239.554 | 23 | 0 | 4 |
11 | 1 | 0 | 0 | 0 | 7 | 1 | 260 | 50 | 36 | 239.554 | 23 | 0 | 4 |
12 | 1 | 0 | 0 | 0 | 7 | 2 | 260 | 50 | 36 | 239.554 | 23 | 0 | 4 |
13 | 1 | 0 | 0 | 0 | 7 | 2 | 179 | 51 | 38 | 239.554 | 31 | 0 | 0 |
14 | 0 | 0 | 0 | 1 | 7 | 2 | 179 | 51 | 38 | 239.554 | 31 | 0 | 0 |
15 | 1 | 0 | 0 | 0 | 7 | 4 | 246 | 25 | 41 | 239.554 | 23 | 0 | 0 |
16 | 0 | 0 | 0 | 1 | 7 | 4 | 179 | 51 | 38 | 239.554 | 31 | 0 | 0 |
17 | 0 | 0 | 1 | 0 | 7 | 0 | 179 | 51 | 38 | 239.554 | 31 | 0 | 0 |
18 | 1 | 0 | 0 | 0 | 7 | 3 | 189 | 29 | 33 | 239.554 | 25 | 0 | 2 |
19 | 0 | 0 | 0 | 1 | 5 | 4 | 248 | 25 | 47 | 205.917 | 32 | 0 | 2 |
20 | 1 | 0 | 0 | 0 | 12 | 1 | 330 | 16 | 28 | 205.917 | 25 | 1 | 0 |
21 | 1 | 0 | 0 | 0 | 3 | 6 | 179 | 51 | 38 | 205.917 | 31 | 0 | 0 |
22 | 1 | 0 | 0 | 0 | 10 | 3 | 361 | 52 | 28 | 205.917 | 27 | 0 | 1 |
23 | 0 | 0 | 0 | 1 | 8 | 4 | 260 | 50 | 36 | 205.917 | 23 | 0 | 4 |
24 | 0 | 0 | 1 | 0 | 8 | 0 | 289 | 36 | 33 | 205.917 | 30 | 0 | 2 |
25 | 0 | 0 | 0 | 1 | 8 | 0 | 361 | 52 | 28 | 205.917 | 27 | 0 | 1 |
26 | 0 | 0 | 0 | 1 | 4 | 2 | 289 | 36 | 33 | 205.917 | 30 | 0 | 2 |
27 | 0 | 0 | 0 | 1 | 12 | 1 | 157 | 27 | 29 | 205.917 | 22 | 0 | 0 |
28 | 0 | 0 | 1 | 0 | 8 | 2 | 289 | 36 | 33 | 205.917 | 30 | 0 | 2 |
29 | 0 | 0 | 0 | 1 | 8 | 4 | 179 | 51 | 38 | 205.917 | 31 | 0 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
670 | 0 | 0 | 0 | 1 | 4 | 1 | 155 | 12 | 34 | 246.288 | 25 | 0 | 2 |
671 | 0 | 0 | 1 | 0 | 4 | 3 | 225 | 26 | 28 | 246.288 | 24 | 0 | 1 |
672 | 1 | 0 | 0 | 0 | 4 | 3 | 118 | 13 | 50 | 246.288 | 31 | 0 | 1 |
673 | 0 | 0 | 0 | 1 | 4 | 4 | 179 | 26 | 30 | 246.288 | 19 | 1 | 0 |
674 | 0 | 0 | 0 | 1 | 7 | 3 | 235 | 11 | 37 | 237.656 | 29 | 1 | 1 |
675 | 0 | 0 | 1 | 0 | 9 | 2 | 225 | 15 | 41 | 237.656 | 28 | 1 | 2 |
676 | 0 | 0 | 0 | 1 | 9 | 2 | 235 | 16 | 32 | 237.656 | 25 | 1 | 0 |
677 | 1 | 0 | 0 | 0 | 9 | 2 | 118 | 10 | 37 | 237.656 | 28 | 0 | 0 |
678 | 0 | 0 | 0 | 1 | 9 | 2 | 235 | 20 | 43 | 237.656 | 38 | 0 | 1 |
679 | 1 | 0 | 0 | 0 | 10 | 4 | 179 | 26 | 30 | 237.656 | 19 | 1 | 0 |
680 | 0 | 0 | 0 | 1 | 10 | 4 | 291 | 31 | 40 | 237.656 | 25 | 0 | 1 |
681 | 1 | 0 | 0 | 0 | 10 | 4 | 225 | 15 | 41 | 237.656 | 28 | 1 | 2 |
682 | 0 | 0 | 1 | 0 | 11 | 0 | 300 | 26 | 43 | 237.656 | 25 | 0 | 2 |
683 | 0 | 0 | 0 | 1 | 11 | 0 | 225 | 15 | 41 | 237.656 | 28 | 1 | 2 |
684 | 0 | 0 | 0 | 1 | 11 | 0 | 179 | 26 | 30 | 237.656 | 19 | 1 | 0 |
685 | 0 | 0 | 0 | 1 | 5 | 0 | 118 | 13 | 50 | 237.656 | 31 | 0 | 1 |
686 | 1 | 0 | 0 | 0 | 5 | 1 | 118 | 13 | 50 | 237.656 | 31 | 0 | 1 |
687 | 0 | 0 | 0 | 1 | 5 | 1 | 118 | 10 | 37 | 237.656 | 28 | 0 | 0 |
688 | 0 | 0 | 0 | 0 | 5 | 1 | 118 | 13 | 50 | 237.656 | 31 | 0 | 1 |
689 | 0 | 0 | 0 | 1 | 5 | 2 | 179 | 26 | 30 | 237.656 | 19 | 1 | 0 |
690 | 0 | 0 | 0 | 0 | 5 | 2 | 378 | 49 | 36 | 237.656 | 21 | 0 | 2 |
691 | 0 | 1 | 0 | 0 | 5 | 4 | 179 | 22 | 40 | 237.656 | 22 | 1 | 2 |
692 | 1 | 0 | 0 | 0 | 5 | 0 | 155 | 12 | 34 | 237.656 | 25 | 0 | 2 |
693 | 1 | 0 | 0 | 0 | 5 | 0 | 235 | 16 | 32 | 237.656 | 25 | 1 | 0 |
694 | 0 | 0 | 0 | 1 | 5 | 2 | 291 | 31 | 40 | 237.656 | 25 | 0 | 1 |
695 | 1 | 0 | 0 | 0 | 5 | 2 | 179 | 22 | 40 | 237.656 | 22 | 1 | 2 |
696 | 1 | 0 | 0 | 0 | 5 | 2 | 225 | 26 | 28 | 237.656 | 24 | 0 | 1 |
697 | 1 | 0 | 0 | 0 | 5 | 3 | 330 | 16 | 28 | 237.656 | 25 | 1 | 0 |
698 | 0 | 0 | 0 | 1 | 5 | 3 | 235 | 16 | 32 | 237.656 | 25 | 1 | 0 |
699 | 0 | 0 | 0 | 1 | 5 | 3 | 291 | 31 | 40 | 237.656 | 25 | 0 | 1 |
700 rows × 13 columns
unscaled_inputs = data_with_targets.iloc[:,: -1]
#from sklearn.preprocessing import StandardScaler
#absenteeism_scaler = StandardScaler()
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import StandardScaler
class CustomScaler(BaseEstimator,TransformerMixin):
def __init__(self,columns,copy=True,with_mean=True,with_std=True):
self.scaler = StandardScaler(copy,with_mean,with_std)
self.columns = columns
self.mean_ = None
self.var_ = None
def fit(self, X, y=None):
self.scaler.fit(X[self.columns], y)
self.mean_ = np.mean(X[self.columns])
self.var_ = np.var(X[self.columns])
return self
def transform(self, X, y=None, copy=None):
init_col_order = X.columns
X_scaled = pd.DataFrame(self.scaler.transform(X[self.columns]), columns=self.columns)
X_not_scaled = X.loc[:,~X.columns.isin(self.columns)]
return pd.concat([X_not_scaled, X_scaled], axis=1)[init_col_order]
unscaled_inputs.columns.values
array(['Reason_1', 'Reason_2', 'Reason_3', 'Reason_4', 'Month Value', 'Day of the Week', 'Transportation Expense', 'Distance to Work', 'Age', 'Daily Work Load Average', 'Body Mass Index', 'Education', 'Children'], dtype=object)
columns_to_omit = ['Reason_1', 'Reason_2', 'Reason_3', 'Reason_4','Education']
columns_to_scale = [x for x in unscaled_inputs.columns.values if x not in columns_to_omit]
absenteeism_scaler = CustomScaler(columns_to_scale)
absenteeism_scaler.fit(unscaled_inputs)
C:\Users\User\Anaconda3\lib\site-packages\sklearn\preprocessing\data.py:645: DataConversionWarning: Data with input dtype int64, float64 were all converted to float64 by StandardScaler. return self.partial_fit(X, y)
CustomScaler(columns=['Month Value', 'Day of the Week', 'Transportation Expense', 'Distance to Work', 'Age', 'Daily Work Load Average', 'Body Mass Index', 'Children'], copy=None, with_mean=None, with_std=None)
scaled_inputs = absenteeism_scaler.transform(unscaled_inputs)
C:\Users\User\Anaconda3\lib\site-packages\ipykernel_launcher.py:20: DataConversionWarning: Data with input dtype int64, float64 were all converted to float64 by StandardScaler.
scaled_inputs
Reason_1 | Reason_2 | Reason_3 | Reason_4 | Month Value | Day of the Week | Transportation Expense | Distance to Work | Age | Daily Work Load Average | Body Mass Index | Education | Children | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 0 | 0 | 1 | 0.030796 | -0.800950 | 1.005844 | 0.412816 | -0.536062 | -0.806331 | 0.767431 | 0 | 0.880469 |
1 | 0 | 0 | 0 | 0 | 0.030796 | -0.800950 | -1.574681 | -1.141882 | 2.130803 | -0.806331 | 1.002633 | 0 | -0.019280 |
2 | 0 | 0 | 0 | 1 | 0.030796 | -0.232900 | -0.654143 | 1.426749 | 0.248310 | -0.806331 | 1.002633 | 0 | -0.919030 |
3 | 1 | 0 | 0 | 0 | 0.030796 | 0.335149 | 0.854936 | -1.682647 | 0.405184 | -0.806331 | -0.643782 | 0 | 0.880469 |
4 | 0 | 0 | 0 | 1 | 0.030796 | 0.335149 | 1.005844 | 0.412816 | -0.536062 | -0.806331 | 0.767431 | 0 | 0.880469 |
5 | 0 | 0 | 0 | 1 | 0.929019 | -0.232900 | -0.654143 | 1.426749 | 0.248310 | -0.806331 | 1.002633 | 0 | -0.919030 |
6 | 0 | 0 | 0 | 1 | 0.030796 | 0.903199 | 2.092381 | 1.494345 | -1.320435 | -0.806331 | 0.061825 | 0 | -0.019280 |
7 | 0 | 0 | 0 | 1 | 0.030796 | 0.903199 | 0.568211 | 1.359154 | -0.065439 | -0.806331 | -0.878984 | 0 | 2.679969 |
8 | 0 | 0 | 1 | 0 | -0.268611 | 2.039298 | -1.016322 | -1.209478 | -0.379188 | -0.806331 | -0.408580 | 0 | 0.880469 |
9 | 0 | 0 | 0 | 1 | 0.030796 | -1.368999 | 0.190942 | -1.277074 | 0.091435 | -0.806331 | 0.532229 | 1 | -0.019280 |
10 | 1 | 0 | 0 | 0 | 0.030796 | -1.368999 | 0.568211 | 1.359154 | -0.065439 | -0.806331 | -0.878984 | 0 | 2.679969 |
11 | 1 | 0 | 0 | 0 | 0.030796 | -0.800950 | 0.568211 | 1.359154 | -0.065439 | -0.806331 | -0.878984 | 0 | 2.679969 |
12 | 1 | 0 | 0 | 0 | 0.030796 | -0.232900 | 0.568211 | 1.359154 | -0.065439 | -0.806331 | -0.878984 | 0 | 2.679969 |
13 | 1 | 0 | 0 | 0 | 0.030796 | -0.232900 | -0.654143 | 1.426749 | 0.248310 | -0.806331 | 1.002633 | 0 | -0.919030 |
14 | 0 | 0 | 0 | 1 | 0.030796 | -0.232900 | -0.654143 | 1.426749 | 0.248310 | -0.806331 | 1.002633 | 0 | -0.919030 |
15 | 1 | 0 | 0 | 0 | 0.030796 | 0.903199 | 0.356940 | -0.330735 | 0.718933 | -0.806331 | -0.878984 | 0 | -0.919030 |
16 | 0 | 0 | 0 | 1 | 0.030796 | 0.903199 | -0.654143 | 1.426749 | 0.248310 | -0.806331 | 1.002633 | 0 | -0.919030 |
17 | 0 | 0 | 1 | 0 | 0.030796 | -1.368999 | -0.654143 | 1.426749 | 0.248310 | -0.806331 | 1.002633 | 0 | -0.919030 |
18 | 1 | 0 | 0 | 0 | 0.030796 | 0.335149 | -0.503235 | -0.060353 | -0.536062 | -0.806331 | -0.408580 | 0 | 0.880469 |
19 | 0 | 0 | 0 | 1 | -0.568019 | 0.903199 | 0.387122 | -0.330735 | 1.660180 | -1.647399 | 1.237836 | 0 | 0.880469 |
20 | 1 | 0 | 0 | 0 | 1.527833 | -0.800950 | 1.624567 | -0.939096 | -1.320435 | -1.647399 | -0.408580 | 1 | -0.919030 |
21 | 1 | 0 | 0 | 0 | -1.166834 | 2.039298 | -0.654143 | 1.426749 | 0.248310 | -1.647399 | 1.002633 | 0 | -0.919030 |
22 | 1 | 0 | 0 | 0 | 0.929019 | 0.335149 | 2.092381 | 1.494345 | -1.320435 | -1.647399 | 0.061825 | 0 | -0.019280 |
23 | 0 | 0 | 0 | 1 | 0.330204 | 0.903199 | 0.568211 | 1.359154 | -0.065439 | -1.647399 | -0.878984 | 0 | 2.679969 |
24 | 0 | 0 | 1 | 0 | 0.330204 | -1.368999 | 1.005844 | 0.412816 | -0.536062 | -1.647399 | 0.767431 | 0 | 0.880469 |
25 | 0 | 0 | 0 | 1 | 0.330204 | -1.368999 | 2.092381 | 1.494345 | -1.320435 | -1.647399 | 0.061825 | 0 | -0.019280 |
26 | 0 | 0 | 0 | 1 | -0.867426 | -0.232900 | 1.005844 | 0.412816 | -0.536062 | -1.647399 | 0.767431 | 0 | 0.880469 |
27 | 0 | 0 | 0 | 1 | 1.527833 | -0.800950 | -0.986140 | -0.195544 | -1.163560 | -1.647399 | -1.114186 | 0 | -0.919030 |
28 | 0 | 0 | 1 | 0 | 0.330204 | -0.232900 | 1.005844 | 0.412816 | -0.536062 | -1.647399 | 0.767431 | 0 | 0.880469 |
29 | 0 | 0 | 0 | 1 | 0.330204 | 0.903199 | -0.654143 | 1.426749 | 0.248310 | -1.647399 | 1.002633 | 0 | -0.919030 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
670 | 0 | 0 | 0 | 1 | -0.867426 | -0.800950 | -1.016322 | -1.209478 | -0.379188 | -0.637953 | -0.408580 | 0 | 0.880469 |
671 | 0 | 0 | 1 | 0 | -0.867426 | 0.335149 | 0.040034 | -0.263140 | -1.320435 | -0.637953 | -0.643782 | 0 | -0.019280 |
672 | 1 | 0 | 0 | 0 | -0.867426 | 0.335149 | -1.574681 | -1.141882 | 2.130803 | -0.637953 | 1.002633 | 0 | -0.019280 |
673 | 0 | 0 | 0 | 1 | -0.867426 | 0.903199 | -0.654143 | -0.263140 | -1.006686 | -0.637953 | -1.819793 | 1 | -0.919030 |
674 | 0 | 0 | 0 | 1 | 0.030796 | 0.335149 | 0.190942 | -1.277074 | 0.091435 | -0.853789 | 0.532229 | 1 | -0.019280 |
675 | 0 | 0 | 1 | 0 | 0.629611 | -0.232900 | 0.040034 | -1.006691 | 0.718933 | -0.853789 | 0.297027 | 1 | 0.880469 |
676 | 0 | 0 | 0 | 1 | 0.629611 | -0.232900 | 0.190942 | -0.939096 | -0.692937 | -0.853789 | -0.408580 | 1 | -0.919030 |
677 | 1 | 0 | 0 | 0 | 0.629611 | -0.232900 | -1.574681 | -1.344669 | 0.091435 | -0.853789 | 0.297027 | 0 | -0.919030 |
678 | 0 | 0 | 0 | 1 | 0.629611 | -0.232900 | 0.190942 | -0.668713 | 1.032682 | -0.853789 | 2.649049 | 0 | -0.019280 |
679 | 1 | 0 | 0 | 0 | 0.929019 | 0.903199 | -0.654143 | -0.263140 | -1.006686 | -0.853789 | -1.819793 | 1 | -0.919030 |
680 | 0 | 0 | 0 | 1 | 0.929019 | 0.903199 | 1.036026 | 0.074838 | 0.562059 | -0.853789 | -0.408580 | 0 | -0.019280 |
681 | 1 | 0 | 0 | 0 | 0.929019 | 0.903199 | 0.040034 | -1.006691 | 0.718933 | -0.853789 | 0.297027 | 1 | 0.880469 |
682 | 0 | 0 | 1 | 0 | 1.228426 | -1.368999 | 1.171843 | -0.263140 | 1.032682 | -0.853789 | -0.408580 | 0 | 0.880469 |
683 | 0 | 0 | 0 | 1 | 1.228426 | -1.368999 | 0.040034 | -1.006691 | 0.718933 | -0.853789 | 0.297027 | 1 | 0.880469 |
684 | 0 | 0 | 0 | 1 | 1.228426 | -1.368999 | -0.654143 | -0.263140 | -1.006686 | -0.853789 | -1.819793 | 1 | -0.919030 |
685 | 0 | 0 | 0 | 1 | -0.568019 | -1.368999 | -1.574681 | -1.141882 | 2.130803 | -0.853789 | 1.002633 | 0 | -0.019280 |
686 | 1 | 0 | 0 | 0 | -0.568019 | -0.800950 | -1.574681 | -1.141882 | 2.130803 | -0.853789 | 1.002633 | 0 | -0.019280 |
687 | 0 | 0 | 0 | 1 | -0.568019 | -0.800950 | -1.574681 | -1.344669 | 0.091435 | -0.853789 | 0.297027 | 0 | -0.919030 |
688 | 0 | 0 | 0 | 0 | -0.568019 | -0.800950 | -1.574681 | -1.141882 | 2.130803 | -0.853789 | 1.002633 | 0 | -0.019280 |
689 | 0 | 0 | 0 | 1 | -0.568019 | -0.232900 | -0.654143 | -0.263140 | -1.006686 | -0.853789 | -1.819793 | 1 | -0.919030 |
690 | 0 | 0 | 0 | 0 | -0.568019 | -0.232900 | 2.348925 | 1.291558 | -0.065439 | -0.853789 | -1.349389 | 0 | 0.880469 |
691 | 0 | 1 | 0 | 0 | -0.568019 | 0.903199 | -0.654143 | -0.533522 | 0.562059 | -0.853789 | -1.114186 | 1 | 0.880469 |
692 | 1 | 0 | 0 | 0 | -0.568019 | -1.368999 | -1.016322 | -1.209478 | -0.379188 | -0.853789 | -0.408580 | 0 | 0.880469 |
693 | 1 | 0 | 0 | 0 | -0.568019 | -1.368999 | 0.190942 | -0.939096 | -0.692937 | -0.853789 | -0.408580 | 1 | -0.919030 |
694 | 0 | 0 | 0 | 1 | -0.568019 | -0.232900 | 1.036026 | 0.074838 | 0.562059 | -0.853789 | -0.408580 | 0 | -0.019280 |
695 | 1 | 0 | 0 | 0 | -0.568019 | -0.232900 | -0.654143 | -0.533522 | 0.562059 | -0.853789 | -1.114186 | 1 | 0.880469 |
696 | 1 | 0 | 0 | 0 | -0.568019 | -0.232900 | 0.040034 | -0.263140 | -1.320435 | -0.853789 | -0.643782 | 0 | -0.019280 |
697 | 1 | 0 | 0 | 0 | -0.568019 | 0.335149 | 1.624567 | -0.939096 | -1.320435 | -0.853789 | -0.408580 | 1 | -0.919030 |
698 | 0 | 0 | 0 | 1 | -0.568019 | 0.335149 | 0.190942 | -0.939096 | -0.692937 | -0.853789 | -0.408580 | 1 | -0.919030 |
699 | 0 | 0 | 0 | 1 | -0.568019 | 0.335149 | 1.036026 | 0.074838 | 0.562059 | -0.853789 | -0.408580 | 0 | -0.019280 |
700 rows × 13 columns
scaled_inputs.shape
(700, 13)
# Split the data into train and test
from sklearn.model_selection import train_test_split
train_test_split(scaled_inputs, targets)
[ Reason_1 Reason_2 Reason_3 Reason_4 Month Value Day of the Week \ 270 1 0 0 0 -0.568019 -1.368999 555 1 0 0 0 -0.568019 0.903199 106 0 0 0 1 0.929019 -1.368999 388 0 0 0 1 -1.466241 -0.232900 1 0 0 0 0 0.030796 -0.800950 415 0 0 0 1 0.929019 -0.232900 670 0 0 0 1 -0.867426 -0.800950 611 1 0 0 0 -1.466241 -1.368999 141 0 0 0 1 1.228426 -0.232900 416 0 0 0 1 0.929019 -0.232900 500 0 0 0 1 0.330204 -0.232900 184 0 0 0 1 -0.268611 1.471248 253 0 0 1 0 1.228426 -0.800950 525 1 0 0 0 0.929019 0.903199 577 0 0 1 0 1.527833 1.471248 665 0 0 0 1 -0.867426 0.903199 526 1 0 0 0 0.929019 0.903199 619 0 0 0 1 -0.568019 0.335149 605 0 0 0 1 -1.466241 -0.232900 173 1 0 0 0 -1.166834 -0.800950 151 0 0 1 0 -1.466241 -1.368999 87 1 0 0 0 1.228426 -1.368999 343 0 0 0 1 -0.268611 2.039298 276 0 0 0 0 0.629611 -0.800950 167 1 0 0 0 -1.166834 -0.800950 117 0 0 0 1 -0.268611 -0.232900 366 0 0 0 1 -1.765648 -1.368999 204 1 0 0 0 -0.867426 -0.800950 214 0 0 0 0 -0.568019 -0.232900 53 0 0 0 1 0.629611 -0.800950 .. ... ... ... ... ... ... 321 0 0 0 1 1.228426 -1.368999 257 1 0 0 0 0.929019 1.471248 672 1 0 0 0 -0.867426 0.335149 509 0 0 0 1 -0.268611 1.471248 493 0 0 0 1 0.330204 0.335149 626 0 0 0 1 0.330204 0.903199 693 1 0 0 0 -0.568019 -1.368999 441 0 0 0 1 -0.568019 1.471248 467 0 0 0 1 0.030796 -0.800950 10 1 0 0 0 0.030796 -1.368999 194 1 0 0 0 0.330204 0.335149 681 1 0 0 0 0.929019 0.903199 678 0 0 0 1 0.629611 -0.232900 110 0 0 0 1 1.228426 0.335149 223 0 0 0 1 -0.268611 0.335149 455 1 0 0 0 -0.268611 -0.232900 142 0 0 0 1 1.527833 0.903199 674 0 0 0 1 0.030796 0.335149 314 1 0 0 0 0.929019 0.903199 40 0 0 0 1 -1.765648 0.903199 23 0 0 0 1 0.330204 0.903199 92 1 0 0 0 1.228426 0.903199 682 0 0 1 0 1.228426 -1.368999 22 1 0 0 0 0.929019 0.335149 68 0 0 0 1 -0.268611 -0.232900 20 1 0 0 0 1.527833 -0.800950 538 1 0 0 0 1.228426 0.903199 120 0 0 0 1 0.330204 -1.368999 176 1 0 0 0 -1.166834 -1.368999 466 0 0 0 1 0.030796 -1.368999 Transportation Expense Distance to Work Age \ 270 -0.654143 1.426749 0.248310 555 -0.654143 1.426749 0.248310 106 0.040034 -0.263140 -1.320435 388 -0.654143 1.426749 0.248310 1 -1.574681 -1.141882 2.130803 415 2.213108 -0.871500 -0.849811 670 -1.016322 -1.209478 -0.379188 611 0.040034 -0.263140 -1.320435 141 -0.503235 -0.060353 -0.536062 416 0.387122 -0.330735 1.660180 500 -0.654143 -0.263140 -1.006686 184 1.036026 0.074838 0.562059 253 -0.986140 -0.195544 -1.163560 525 2.213108 -0.871500 -0.849811 577 -0.654143 -0.533522 0.562059 665 0.190942 -1.277074 0.091435 526 0.040034 -0.263140 -1.320435 619 0.387122 -0.330735 1.660180 605 -0.654143 1.426749 0.248310 173 -0.654143 1.426749 0.248310 151 1.624567 -0.939096 -1.320435 87 1.036026 0.074838 0.562059 343 -1.574681 -1.141882 2.130803 276 0.130578 0.345220 0.405184 167 -1.016322 -1.209478 -0.379188 117 0.040034 -0.263140 -1.320435 366 -1.574681 -1.141882 2.130803 204 1.005844 0.412816 -0.536062 214 1.624567 -0.939096 -1.320435 53 -1.574681 -1.344669 0.091435 .. ... ... ... 321 1.036026 0.074838 0.562059 257 1.171843 -0.263140 1.032682 672 -1.574681 -1.141882 2.130803 509 0.356940 -0.330735 0.718933 493 1.036026 0.074838 0.562059 626 0.040034 -0.263140 -1.320435 693 0.190942 -0.939096 -0.692937 441 -1.574681 -1.344669 0.091435 467 -1.574681 -1.344669 0.091435 10 0.568211 1.359154 -0.065439 194 0.356940 -0.330735 0.718933 681 0.040034 -1.006691 0.718933 678 0.190942 -0.668713 1.032682 110 -1.574681 -1.344669 0.091435 223 1.036026 0.074838 0.562059 455 -0.654143 1.426749 0.248310 142 0.568211 1.359154 -0.065439 674 0.190942 -1.277074 0.091435 314 -0.654143 -0.263140 -1.006686 40 -0.578689 0.818389 -1.477309 23 0.568211 1.359154 -0.065439 92 0.040034 -0.263140 -1.320435 682 1.171843 -0.263140 1.032682 22 2.092381 1.494345 -1.320435 68 -1.574681 -1.344669 0.091435 20 1.624567 -0.939096 -1.320435 538 0.190942 -0.939096 -0.692937 120 0.040034 -0.263140 -1.320435 176 -0.654143 -0.263140 -1.006686 466 0.568211 1.359154 -0.065439 Daily Work Load Average Body Mass Index Education Children 270 0.560476 1.002633 0 -0.919030 555 0.218718 1.002633 0 -0.919030 106 -0.262439 -0.643782 0 -0.019280 388 -0.499679 1.002633 0 -0.919030 1 -0.806331 1.002633 0 -0.019280 415 -0.809957 -0.408580 0 1.780219 670 -0.637953 -0.408580 0 0.880469 611 -0.188851 -0.643782 0 -0.019280 141 0.769711 -0.408580 0 0.880469 416 -0.809957 1.237836 0 0.880469 500 -0.251187 -1.819793 1 -0.919030 184 1.366488 -0.408580 0 -0.019280 253 -0.154696 -1.114186 0 -0.919030 525 0.326336 -0.408580 0 1.780219 577 1.043433 -1.114186 1 0.880469 665 -0.637953 0.532229 1 -0.019280 526 0.326336 -0.643782 0 -0.019280 619 -1.240355 1.237836 0 0.880469 605 -0.188851 1.002633 0 -0.919030 173 1.786584 1.002633 0 -0.919030 151 0.769711 -0.408580 1 -0.919030 87 0.863727 -0.408580 0 -0.019280 343 -0.879469 1.002633 0 -0.019280 276 0.560476 1.943442 0 0.880469 167 1.786584 -0.408580 0 0.880469 117 0.919937 -0.643782 0 -0.019280 366 1.456728 1.002633 0 -0.019280 204 2.677510 0.767431 0 0.880469 214 2.677510 -0.408580 1 -0.919030 53 -0.758273 0.297027 0 -0.919030 .. ... ... ... ... 321 0.305783 -0.408580 0 -0.019280 257 -0.154696 -0.408580 0 0.880469 672 -0.637953 1.002633 0 -0.019280 509 0.326336 -0.878984 0 -0.919030 493 -0.550213 -0.408580 0 -0.019280 626 -1.240355 -0.643782 0 -0.019280 693 -0.853789 -0.408580 1 -0.919030 441 -0.446195 0.297027 0 -0.919030 467 -1.037971 0.297027 0 -0.919030 10 -0.806331 -0.878984 0 2.679969 194 1.366488 -0.878984 0 -0.919030 681 -0.853789 0.297027 1 0.880469 678 -0.853789 2.649049 0 -0.019280 110 -0.262439 0.297027 0 -0.919030 223 2.644155 -0.408580 0 -0.019280 455 -0.446195 1.002633 0 -0.919030 142 0.769711 -0.878984 0 2.679969 674 -0.853789 0.532229 1 -0.019280 314 -0.169648 -1.819793 1 -0.919030 40 -0.758273 -1.349389 0 -0.919030 23 -1.647399 -0.878984 0 2.679969 92 0.863727 -0.643782 0 -0.019280 682 -0.853789 -0.408580 0 0.880469 22 -1.647399 0.061825 0 -0.019280 68 -0.458497 0.297027 0 -0.919030 20 -1.647399 -0.408580 1 -0.919030 538 -0.082083 -0.408580 1 -0.919030 120 0.919937 -0.643782 0 -0.019280 176 1.786584 -1.819793 1 -0.919030 466 -1.037971 -0.878984 0 2.679969 [525 rows x 13 columns], Reason_1 Reason_2 Reason_3 Reason_4 Month Value Day of the Week \ 636 0 0 0 1 -1.166834 -1.368999 574 1 0 0 0 0.330204 -0.232900 226 0 0 0 1 -0.268611 -1.368999 621 0 0 0 1 -0.268611 2.039298 50 0 0 0 0 0.629611 -1.368999 306 0 0 0 1 0.929019 0.335149 675 0 0 1 0 0.629611 -0.232900 285 0 0 0 0 0.629611 0.335149 426 0 0 0 1 -1.166834 2.039298 292 0 0 0 1 -0.268611 0.903199 399 0 0 0 1 -1.166834 -0.232900 60 0 0 0 1 0.629611 0.903199 518 0 0 0 1 0.929019 -0.800950 694 0 0 0 1 -0.568019 -0.232900 400 0 0 0 0 -1.166834 -0.232900 284 0 0 0 1 0.629611 0.335149 476 0 0 0 1 0.030796 -1.368999 36 0 0 0 1 -0.867426 -0.232900 503 0 0 0 1 0.629611 -0.232900 697 1 0 0 0 -0.568019 0.335149 614 0 0 0 1 -1.466241 -0.800950 635 1 0 0 0 -1.166834 -1.368999 405 0 0 0 0 -1.166834 0.335149 46 0 0 0 1 0.629611 -1.368999 353 1 0 0 0 1.527833 -0.800950 188 0 0 0 1 -0.867426 0.335149 648 1 0 0 0 -1.166834 -0.232900 55 0 0 0 0 0.629611 -0.800950 172 1 0 0 0 -1.166834 -1.368999 581 0 0 0 1 -1.765648 -0.232900 .. ... ... ... ... ... ... 679 1 0 0 0 0.929019 0.903199 34 0 0 0 1 0.330204 -1.368999 558 1 0 0 0 0.330204 1.471248 339 1 0 0 0 -0.568019 0.335149 189 0 0 0 1 -0.867426 0.903199 180 1 0 0 0 -1.166834 0.335149 472 0 0 0 1 0.030796 -0.800950 78 0 0 0 1 0.929019 0.903199 690 0 0 0 0 -0.568019 -0.232900 404 1 0 0 0 -1.166834 -0.232900 661 0 1 0 0 0.929019 0.335149 218 1 0 0 0 1.228426 1.471248 662 0 0 0 1 0.929019 0.335149 453 0 0 0 1 -0.268611 -0.232900 174 0 0 0 1 -1.166834 -0.232900 475 0 0 0 1 0.030796 -1.368999 287 1 0 0 0 0.629611 -1.368999 657 0 0 1 0 -0.867426 -0.232900 643 0 0 0 1 -1.166834 0.335149 185 0 0 0 1 -0.867426 -0.232900 385 0 0 0 1 -1.466241 -0.232900 208 0 0 1 0 -1.166834 1.471248 531 1 0 0 0 0.929019 -0.800950 434 0 0 1 0 -0.568019 -0.232900 638 0 0 0 1 -1.166834 -0.800950 66 0 0 0 1 0.929019 0.903199 495 0 0 0 1 -0.568019 -0.800950 637 1 0 0 0 -1.166834 -1.368999 465 0 0 0 1 0.030796 0.903199 418 0 0 0 1 1.527833 -1.368999 Transportation Expense Distance to Work Age \ 636 -0.654143 1.426749 0.248310 574 -0.654143 -0.263140 -1.006686 226 -1.016322 -1.209478 -0.379188 621 0.387122 -0.330735 1.660180 50 0.568211 1.359154 -0.065439 306 -1.574681 -1.141882 2.130803 675 0.040034 -1.006691 0.718933 285 0.190942 -0.668713 1.032682 426 0.387122 -0.330735 1.660180 292 1.005844 0.412816 -0.536062 399 2.092381 1.494345 -1.320435 60 -0.654143 1.426749 0.248310 518 0.040034 -0.263140 -1.320435 694 1.036026 0.074838 0.562059 400 2.213108 -0.871500 -0.849811 284 -1.574681 -1.141882 2.130803 476 0.190942 -0.668713 1.032682 36 1.005844 0.412816 -0.536062 503 0.040034 -0.263140 -1.320435 697 1.624567 -0.939096 -1.320435 614 0.040034 -0.263140 -1.320435 635 0.387122 -0.330735 1.660180 405 0.190942 -1.277074 0.091435 46 -0.654143 1.426749 0.248310 353 0.190942 -1.277074 0.091435 188 -1.016322 -1.209478 -0.379188 648 -1.016322 -1.209478 -0.379188 55 -1.574681 -1.141882 2.130803 172 0.854936 -1.682647 0.405184 581 -0.654143 1.426749 0.248310 .. ... ... ... 679 -0.654143 -0.263140 -1.006686 34 -0.654143 1.426749 0.248310 558 -0.654143 -0.263140 -1.006686 339 0.040034 -0.263140 -1.320435 189 0.040034 -0.263140 -1.320435 180 0.854936 -1.682647 0.405184 472 -0.654143 1.426749 0.248310 78 2.092381 1.494345 -1.320435 690 2.348925 1.291558 -0.065439 404 -1.574681 -1.141882 2.130803 661 -0.654143 -0.533522 0.562059 218 -1.574681 -1.141882 2.130803 662 -1.574681 -1.141882 2.130803 453 0.040034 -0.263140 -1.320435 174 0.040034 -0.263140 -1.320435 475 1.005844 0.412816 -0.536062 287 1.036026 0.074838 0.562059 657 0.387122 -0.330735 1.660180 643 -0.654143 1.426749 0.248310 185 0.040034 -0.263140 -1.320435 385 -0.654143 1.426749 0.248310 208 0.040034 -0.263140 -1.320435 531 0.040034 -0.263140 -1.320435 434 0.085306 -1.074287 3.385799 638 -0.654143 1.426749 0.248310 66 -0.654143 1.426749 0.248310 495 0.356940 -0.330735 0.718933 637 0.040034 -0.263140 -1.320435 465 -1.574681 -1.344669 0.091435 418 -0.654143 1.426749 0.248310 Daily Work Load Average Body Mass Index Education Children 636 -1.240355 1.002633 0 -0.919030 574 1.043433 -1.819793 1 -0.919030 226 2.644155 -0.408580 0 0.880469 621 -1.240355 1.237836 0 0.880469 50 -0.758273 -0.878984 0 2.679969 306 -0.169648 1.002633 0 -0.019280 675 -0.853789 0.297027 1 0.880469 285 0.560476 2.649049 0 -0.019280 426 -0.643304 1.237836 0 0.880469 292 -0.169648 0.767431 0 0.880469 399 -0.685486 0.061825 0 -0.019280 60 -0.758273 1.002633 0 -0.919030 518 0.326336 -0.643782 0 -0.019280 694 -0.853789 -0.408580 0 -0.019280 400 -0.685486 -0.408580 0 1.780219 284 0.560476 1.002633 0 -0.019280 476 -1.037971 2.649049 0 -0.019280 36 -1.647399 0.767431 0 0.880469 503 -0.251187 -0.643782 0 -0.019280 697 -0.853789 -0.408580 1 -0.919030 614 -0.188851 -0.643782 0 -0.019280 635 -1.240355 1.237836 0 0.880469 405 -0.685486 0.532229 1 -0.019280 46 -0.758273 1.002633 0 -0.919030 353 -0.879469 0.532229 1 -0.019280 188 1.366488 -0.408580 0 0.880469 648 -1.240355 -0.408580 0 0.880469 55 -0.758273 1.002633 0 -0.019280 172 1.786584 -0.643782 0 0.880469 581 1.043433 1.002633 0 -0.919030 .. ... ... ... ... 679 -0.853789 -1.819793 1 -0.919030 34 -1.647399 1.002633 0 -0.919030 558 0.218718 -1.819793 1 -0.919030 339 -0.879469 -0.643782 0 -0.019280 189 1.366488 -0.643782 0 -0.019280 180 1.786584 -0.643782 0 0.880469 472 -1.037971 1.002633 0 -0.919030 78 -0.458497 0.061825 0 -0.019280 690 -0.853789 -1.349389 0 0.880469 404 -0.685486 1.002633 0 -0.019280 661 -0.637953 -1.114186 1 0.880469 218 2.677510 1.002633 0 -0.019280 662 -0.637953 1.002633 0 -0.019280 453 -0.446195 -0.643782 0 -0.019280 174 1.786584 -0.643782 0 -0.019280 475 -1.037971 0.767431 0 0.880469 287 0.560476 -0.408580 0 -0.019280 657 -0.637953 1.237836 0 0.880469 643 -1.240355 1.002633 0 -0.919030 185 1.366488 -0.643782 0 -0.019280 385 -0.499679 1.002633 0 -0.919030 208 2.677510 -0.643782 0 -0.019280 531 0.326336 -0.643782 0 -0.019280 434 -0.643304 -1.114186 0 0.880469 638 -1.240355 1.002633 0 -0.919030 66 -0.458497 1.002633 0 -0.919030 495 -0.251187 -0.878984 0 -0.919030 637 -1.240355 -0.643782 0 -0.019280 465 -1.037971 0.297027 0 -0.919030 418 -0.809957 1.002633 0 -0.919030 [175 rows x 13 columns], array([1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1]), array([0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1])]
X_train, X_test, y_train, y_test=train_test_split(scaled_inputs, targets, train_size = 0.8, random_state = 20)
C:\Users\User\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py:2179: FutureWarning: From version 0.21, test_size will always complement train_size unless both are specified. FutureWarning)
print (X_train.shape, y_train.shape)
(560, 13) (560,)
print (X_test.shape, y_test.shape)
(140, 13) (140,)
#Logistic regression with sklearn
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
reg = LogisticRegression()
reg.fit(X_train, y_train)
C:\Users\User\Anaconda3\lib\site-packages\sklearn\linear_model\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning. FutureWarning)
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, max_iter=100, multi_class='warn', n_jobs=None, penalty='l2', random_state=None, solver='warn', tol=0.0001, verbose=0, warm_start=False)
reg.score(X_train, y_train)
0.775
#Manual Check
model_outputs = reg.predict(X_train)
model_outputs
array([0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0])
y_train
array([0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0])
model_outputs == y_train
array([ True, True, False, True, True, True, True, True, True, True, False, True, False, False, True, True, True, True, False, True, False, True, False, False, True, True, True, False, True, True, True, True, True, True, True, True, False, False, True, False, True, False, True, True, True, True, True, True, True, True, False, True, True, True, True, True, True, True, True, False, True, True, True, True, True, True, True, False, True, False, True, True, True, True, True, False, True, True, True, True, True, False, True, False, True, True, False, False, False, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, False, True, True, True, True, True, True, True, True, True, False, True, True, True, True, True, True, True, True, False, True, True, True, True, False, True, True, True, True, True, False, False, True, False, True, False, True, True, True, True, False, False, False, True, True, False, False, False, True, True, True, False, True, False, True, False, True, False, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, False, True, True, True, True, False, True, True, True, True, True, True, True, True, True, True, True, True, False, False, True, False, False, True, True, True, True, True, True, True, False, True, False, True, False, True, True, True, True, False, True, False, False, True, True, True, True, True, False, False, False, True, False, True, True, True, True, True, True, True, True, True, True, True, False, True, True, True, True, True, True, True, True, True, True, True, True, False, False, True, True, True, True, True, True, False, True, True, True, True, True, True, False, False, False, True, True, True, True, False, True, False, True, True, True, True, True, True, True, False, True, False, False, True, True, True, True, True, False, True, True, True, True, False, False, True, False, True, True, True, True, True, False, True, True, False, True, True, False, False, False, True, True, True, True, False, True, False, True, True, True, False, False, True, True, True, False, True, False, True, True, True, False, True, True, True, True, True, True, True, True, True, True, True, True, False, True, True, False, True, False, True, True, True, True, True, True, True, True, True, True, False, True, True, True, True, False, True, True, True, False, True, True, True, True, True, True, True, True, False, True, True, True, True, True, True, False, True, True, True, True, True, True, False, True, True, True, True, True, True, True, True, False, True, False, True, True, True, True, True, True, False, True, True, False, True, False, True, True, True, True, True, False, False, True, True, True, True, True, True, True, True, True, False, True, False, True, True, True, False, False, True, True, True, True, False, True, True, True, True, True, True, True, True, True, False, True, True, False, False, True, True, False, True, True, True, True, True, True, False, True, True, True, False, True, True, True, True, True, True, True, True, True, True, False, True, True, True, True, True, False, True, True, False, False, True, True, True, False, True, True, True, True, True, False, True, True, False, False, False, True, True, False, True, True, True, False, True, True, True, True, True, True, True, True, True, True, True, False, True, True, True, True, True, True, True, False, True, True, True, True, False, True, True, True])
np.sum(model_outputs == y_train)
434
model_outputs.shape[0]
560
np.sum(model_outputs == y_train) / model_outputs.shape[0]
0.775
#Finding intercept and coefficients
reg.intercept_
array([-1.44858081])
reg.coef_
array([[ 2.63259611, 0.86935993, 2.81289745, 0.65092627, 0.00723232, -0.07199168, 0.49781562, -0.03874266, -0.12321956, -0.02108164, 0.26813826, -0.23006247, 0.37360592]])
unscaled_inputs.columns.values
array(['Reason_1', 'Reason_2', 'Reason_3', 'Reason_4', 'Month Value', 'Day of the Week', 'Transportation Expense', 'Distance to Work', 'Age', 'Daily Work Load Average', 'Body Mass Index', 'Education', 'Children'], dtype=object)
feature_name = unscaled_inputs.columns.values
summary_table = pd.DataFrame (columns=['Feature name'], data = feature_name)
summary_table['Coefficient'] = np.transpose(reg.coef_)
summary_table
Feature name | Coefficient | |
---|---|---|
0 | Reason_1 | 2.632596 |
1 | Reason_2 | 0.869360 |
2 | Reason_3 | 2.812897 |
3 | Reason_4 | 0.650926 |
4 | Month Value | 0.007232 |
5 | Day of the Week | -0.071992 |
6 | Transportation Expense | 0.497816 |
7 | Distance to Work | -0.038743 |
8 | Age | -0.123220 |
9 | Daily Work Load Average | -0.021082 |
10 | Body Mass Index | 0.268138 |
11 | Education | -0.230062 |
12 | Children | 0.373606 |
summary_table.index = summary_table.index + 1
summary_table.loc[0] = ['Intercept', reg.intercept_[0]]
summay_table = summary_table.sort_index()
summary_table
Feature name | Coefficient | |
---|---|---|
1 | Reason_1 | 2.632596 |
2 | Reason_2 | 0.869360 |
3 | Reason_3 | 2.812897 |
4 | Reason_4 | 0.650926 |
5 | Month Value | 0.007232 |
6 | Day of the Week | -0.071992 |
7 | Transportation Expense | 0.497816 |
8 | Distance to Work | -0.038743 |
9 | Age | -0.123220 |
10 | Daily Work Load Average | -0.021082 |
11 | Body Mass Index | 0.268138 |
12 | Education | -0.230062 |
13 | Children | 0.373606 |
0 | Intercept | -1.448581 |
summary_table['Odd ratio'] = np.exp(summary_table.Coefficient)
summary_table
Feature name | Coefficient | Odd ratio | |
---|---|---|---|
1 | Reason_1 | 2.632596 | 13.909835 |
2 | Reason_2 | 0.869360 | 2.385384 |
3 | Reason_3 | 2.812897 | 16.658114 |
4 | Reason_4 | 0.650926 | 1.917316 |
5 | Month Value | 0.007232 | 1.007259 |
6 | Day of the Week | -0.071992 | 0.930539 |
7 | Transportation Expense | 0.497816 | 1.645124 |
8 | Distance to Work | -0.038743 | 0.961998 |
9 | Age | -0.123220 | 0.884070 |
10 | Daily Work Load Average | -0.021082 | 0.979139 |
11 | Body Mass Index | 0.268138 | 1.307528 |
12 | Education | -0.230062 | 0.794484 |
13 | Children | 0.373606 | 1.452964 |
0 | Intercept | -1.448581 | 0.234903 |
summary_table.sort_values('Odd ratio', ascending=False)
Feature name | Coefficient | Odd ratio | |
---|---|---|---|
3 | Reason_3 | 2.812897 | 16.658114 |
1 | Reason_1 | 2.632596 | 13.909835 |
2 | Reason_2 | 0.869360 | 2.385384 |
4 | Reason_4 | 0.650926 | 1.917316 |
7 | Transportation Expense | 0.497816 | 1.645124 |
13 | Children | 0.373606 | 1.452964 |
11 | Body Mass Index | 0.268138 | 1.307528 |
5 | Month Value | 0.007232 | 1.007259 |
10 | Daily Work Load Average | -0.021082 | 0.979139 |
8 | Distance to Work | -0.038743 | 0.961998 |
6 | Day of the Week | -0.071992 | 0.930539 |
9 | Age | -0.123220 | 0.884070 |
12 | Education | -0.230062 | 0.794484 |
0 | Intercept | -1.448581 | 0.234903 |
#Test the model
reg.score(X_test, y_test)
0.7357142857142858
predicted_proba = reg.predict_proba(X_test) predicted_proba
predicted_proba.shape
(140, 2)
predicted_proba[:,1]
array([0.22123841, 0.36746562, 0.50004073, 0.2089951 , 0.90917697, 0.69301566, 0.77107214, 0.8859558 , 0.29549433, 0.22275347, 0.60483662, 0.73858908, 0.90619582, 0.34200406, 0.74793341, 0.40182971, 0.5876329 , 0.55788212, 0.68102628, 0.92943054, 0.31492142, 0.21583614, 0.56698637, 0.54895219, 0.79581917, 0.29599005, 0.44955041, 0.11650185, 0.71867643, 0.22283802, 0.39170243, 0.77004683, 0.66038918, 0.60501965, 0.21583614, 0.55420036, 0.30950994, 0.73126219, 0.4762001 , 0.53951185, 0.23294667, 0.43933879, 0.29219299, 0.38449721, 0.83762964, 0.61556479, 0.73604752, 0.23015421, 0.23219654, 0.20045226, 0.49883448, 0.25661573, 0.67030132, 0.22865307, 0.83616221, 0.40858324, 0.93437215, 0.28932031, 0.31364181, 0.30863997, 0.69642044, 0.64430772, 0.28256538, 0.80449431, 0.3206113 , 0.2233284 , 0.07984234, 0.2970576 , 0.70680172, 0.34453101, 0.30245977, 0.27141705, 0.87740585, 0.4721832 , 0.58892078, 0.22283802, 0.73379547, 0.76353833, 0.69165819, 0.7104776 , 0.31898675, 0.10771716, 0.27265444, 0.75448359, 0.45855206, 0.11668418, 0.67523916, 0.54083882, 0.24313306, 0.76272556, 0.19101949, 0.12667202, 0.23921305, 0.21523928, 0.20891439, 0.86219705, 0.21096595, 0.74876632, 0.2087725 , 0.21414747, 0.57956994, 0.77720819, 0.66970792, 0.63896825, 0.46203675, 0.46309751, 0.20777203, 0.84696265, 0.70341205, 0.15023138, 0.10739748, 0.90589125, 0.61112564, 0.39813828, 0.54901137, 0.56430009, 0.73877163, 0.85001406, 0.60503429, 0.33763649, 0.21222172, 0.12312327, 0.74375883, 0.49875672, 0.23153799, 0.31677329, 0.21174627, 0.14713078, 0.66145911, 0.32819144, 0.57901301, 0.21977403, 0.21528198, 0.32006295, 0.27893506, 0.55763394, 0.55818079, 0.32170334, 0.22976194, 0.49689852])