Business Problem¶

Churn is one of the largest problems faced by most businesses. It costs between 5 times and 25 times as much to find a new customer than to retain an existing one. Therefore,anticipating when a customer churn is an important business objective. For the purposes of this report,

Telco dataset from IBM was utilised (Telecom Churn Dataset -https://www.ibm.com/communities/analytics/watson-analytics-blog/guide-to-sample-datasets/)
Report Objective: Build a model to predict whether a customer will churn

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
import numpy as np
from sklearn.svm import SVC          
import plotly.graph_objs as go
import plotly.offline as py
py.init_notebook_mode(connected=True)

%matplotlib inline

1. Data Overview¶

#Data Collection
df = pd.read_csv (r'C:\Users\User\Documents\Data science\Telco-Customer-Churn.csv') 
# df = pd.read_csv('Telco-Customer-Churn.csv') 
df.head()

#Data Overview
print("Rows:",df.shape[0])
print ("Columns:",df.shape[1])
print ("\nFeatures:\n",df.columns.tolist())
print ("\nMissingvalues:",df.isnull().sum().values.sum())
print ("\nUnique values:\n",df.nunique())
print (df['Churn'].value_counts())

Rows: 7043
Columns: 21

Features:
 ['customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure', 'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod', 'MonthlyCharges', 'TotalCharges', 'Churn']

Missingvalues: 0

Unique values:
 customerID          7043
gender                 2
SeniorCitizen          2
Partner                2
Dependents             2
tenure                73
PhoneService           2
MultipleLines          3
InternetService        3
OnlineSecurity         3
OnlineBackup           3
DeviceProtection       3
TechSupport            3
StreamingTV            3
StreamingMovies        3
Contract               3
PaperlessBilling       2
PaymentMethod          4
MonthlyCharges      1585
TotalCharges        6531
Churn                  2
dtype: int64
No     5174
Yes    1869
Name: Churn, dtype: int64

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 21 columns):
customerID          7043 non-null object
gender              7043 non-null object
SeniorCitizen       7043 non-null int64
Partner             7043 non-null object
Dependents          7043 non-null object
tenure              7043 non-null int64
PhoneService        7043 non-null object
MultipleLines       7043 non-null object
InternetService     7043 non-null object
OnlineSecurity      7043 non-null object
OnlineBackup        7043 non-null object
DeviceProtection    7043 non-null object
TechSupport         7043 non-null object
StreamingTV         7043 non-null object
StreamingMovies     7043 non-null object
Contract            7043 non-null object
PaperlessBilling    7043 non-null object
PaymentMethod       7043 non-null object
MonthlyCharges      7043 non-null float64
TotalCharges        7043 non-null object
Churn               7043 non-null object
dtypes: float64(1), int64(2), object(18)
memory usage: 1.1+ MB

df.TotalCharges = pd.to_numeric(df.TotalCharges, errors="coerce")

2. Overview:¶

plt.style.use(['seaborn-dark','seaborn-talk'])

fig, ax = plt.subplots(1,2,figsize=(16,6))

df['Churn'].value_counts().plot.pie(explode=[0,0.08], ax=ax[0], autopct='%1.2f%%', shadow=True, 
                                    fontsize=14, startangle=30, colors=["#008000", "#a6814c"])
ax[0].set_title('Total Churn Percentage')

sns.countplot('Churn', data=df, ax=ax[1], palette=["#008000", "#a6814c"])
ax[1].set_title('Total Number of Churn Customers')
ax[1].set_ylabel(' ')

plt.show()

The data shows that 26.54% of the company's customers have decided to terminate the telco's service and left.

2.1 Exploratory Data Analysis¶

Id_col     = ['customerID']
target_col = ["Churn"]
cat_cols   = df.nunique()[df.nunique() < 6].keys().tolist()
cat_cols   = [x for x in cat_cols if x not in target_col]
num_cols   = [x for x in df.columns if x not in cat_cols + target_col + Id_col]

#Separating churn and non churn customers
churn     = df[df["Churn"] == "Yes"]
not_churn = df[df["Churn"] == "No"]

def plot_pie(column) :
    
    trace1 = go.Pie(values  = churn[column].value_counts().values.tolist(),
                    labels  = churn[column].value_counts().keys().tolist(),
                    hoverinfo = "label+percent+name",
                    domain  = dict(x = [0,.48]),
                    name    = "Churn Customers",
                    marker  = dict(line = dict(width = 2,
                                               color = "rgb(33, 75, 99)")
                                  ),
                    hole    = .6
                   )
    trace2 = go.Pie(values  = not_churn[column].value_counts().values.tolist(),
                    labels  = not_churn[column].value_counts().keys().tolist(),
                    hoverinfo = "label+percent+name",
                    marker  = dict(line = dict(width = 2,
                                               color = "rgb(33, 75, 99)")
                                  ),
                    domain  = dict(x = [.52,1]),
                    hole    = .6,
                    name    = "Non Churn Customers" 
                   )


    layout = go.Layout(dict(title = column + " distribution in customer attrition ",
                            plot_bgcolor  = "rgb(243,243,243)",
                            paper_bgcolor = "rgb(243,243,243)",
                            annotations = [dict(text = "churn customers",
                                                font = dict(size = 13),
                                                showarrow = False,
                                                x = .15, y = .5),
                                           dict(text = "Non Churn Customers",
                                                font = dict(size = 13),
                                                showarrow = False,
                                                x = .88,y = .5
                                               )
                                          ]
                           )
                      )
    data = [trace1,trace2]
    fig  = go.Figure(data = data,layout = layout)
    py.iplot(fig)
#function  for histogram for customer attrition types
def histogram(column) :
    trace1 = go.Histogram(x  = churn[column],
                          histnorm= "percent",
                          name = "Churn Customers",
                          marker = dict(line = dict(width = .5,
                                                    color = "black"
                                                    )
                                        ),
                         opacity = .9 
                         ) 

    trace2 = go.Histogram(x  = not_churn[column],
                          histnorm = "percent",
                          name = "Non Churn customers",
                          marker = dict(line = dict(width = .5,
                                              color = "black"
                                             )
                                 ),
                          opacity = .9
                         )
    
    data = [trace1,trace2]
    layout = go.Layout(dict(title =column + " distribution in customer attrition ",
                            plot_bgcolor  = "rgb(243,243,243)",
                            paper_bgcolor = "rgb(243,243,243)",
                            xaxis = dict(gridcolor = 'rgb(255, 255, 255)',
                                             title = column,
                                             zerolinewidth=1,
                                             ticklen=5,
                                             gridwidth=2
                                            ),
                            yaxis = dict(gridcolor = 'rgb(255, 255, 255)',
                                             title = "percent",
                                             zerolinewidth=1,
                                             ticklen=5,
                                             gridwidth=2
                                            ),
                           )
                      )
    fig  = go.Figure(data=data,layout=layout)
    
    py.iplot(fig)
    
#function  for scatter plot matrix  for numerical columns in data
def scatter_matrix(df)  :
    
    df  = df.sort_values(by = "Churn" ,ascending = True)
    classes = df["Churn"].unique().tolist()
    classes
    
    class_code  = {classes[k] : k for k in range(2)}
    class_code

    color_vals = [class_code[cl] for cl in df["Churn"]]
    color_vals

    pl_colorscale = "Viridis"

    pl_colorscale

    text = [df.loc[k,"Churn"] for k in range(len(df))]
    text

    trace = go.Splom(dimensions = [dict(label  = "tenure",
                                       values = df["tenure"]),
                                  dict(label  = 'MonthlyCharges',
                                       values = df['MonthlyCharges']),
                                  dict(label  = 'TotalCharges',
                                       values = df['TotalCharges'])],
                     text = text,
                     marker = dict(color = color_vals,
                                   colorscale = pl_colorscale,
                                   size = 3,
                                   showscale = False,
                                   line = dict(width = .1,
                                               color='rgb(230,230,230)'
                                              )
                                  )
                    )
    axis = dict(showline  = True,
                zeroline  = False,
                gridcolor = "#fff",
                ticklen   = 4
               )
    
    layout = go.Layout(dict(title  = 
                            "Scatter plot matrix for Numerical columns for customer attrition",
                            autosize = False,
                            height = 800,
                            width  = 800,
                            dragmode = "select",
                            hovermode = "closest",
                            plot_bgcolor  = 'rgba(240,240,240, 0.95)',
                            xaxis1 = dict(axis),
                            yaxis1 = dict(axis),
                            xaxis2 = dict(axis),
                            yaxis2 = dict(axis),
                            xaxis3 = dict(axis),
                            yaxis3 = dict(axis),
                           )
                      )
    data   = [trace]
    fig = go.Figure(data = data,layout = layout )
    py.iplot(fig)

#all categorical columns plot pie
for i in cat_cols :
    plot_pie(i)
    
#for all categorical columns plot pie
for i in cat_cols: 
    plot_pie(i)

#for all categorical columns plot histogram    
for i in num_cols :
    histogram(i)

#scatter plot matrix
scatter_matrix(df)

Preliminary Finding¶

Churned Customer Profile¶

For better understanding we wanted to compare these important variables from costumers who churned and costumers who did not churned. Customer that churned are those who

Are majority pre-paid customers, as 88.6% of customer holds month-to-month renewal contracts
Have opted for paperless Billing (74.9%)
uses electronic checks (57.3%) and mailed checks (16.5%) as payment mode, which suggest that these customers tend to have cash-flow problem.
owns multiple lines
preferred fiber optic internet services,streaming TV (43.6%) and streaming movie (43.8%) services

2.2 Tenure vs Churn Relationship Analysis¶

import seaborn as sns sns.boxplot (x='Churn', y='tenure', data=df) sns.set_palette("cubehelix",3) plt.show()

Boxplot finding indicates that customers who has stayed with the telco over 10 years and more are unlike to churn versus a new customers

# 1.2. Monthly Charges vs Churn Relationship Analysis 
sns.boxplot (x='Churn',
            y='MonthlyCharges',
            data=df)
sns.set_palette("cubehelix",3)
plt.show()

Boxplot finding also indicates that there is a positive correlation between Monthly Charges and customer churn .

# 1.3. Total Charges vs Churn Relationship Analysis 
sns.boxplot(x='Churn',
            y='TotalCharges',
            data=df)
sns.set_palette("cubehelix",3)
plt.show()

# 1.3. Contract vs Churn Relationship Analysis
sns.set(style="darkgrid")
sns.set_palette("cubehelix",3)
fig, ax = plt.subplots(figsize=(20,10))
ax = sns.countplot(x="Contract", hue="Churn", data=df)

# 1.4. TechSupport vs Churn Relationship Analysis 
sns.set(style="darkgrid")
sns.set_palette("cubehelix",3)
fig, ax = plt.subplots(figsize=(20,10))
ax = sns.countplot(x="TechSupport", hue="Churn", data=df)

#Preprocessing for Churn Modeling 
df['gender'].replace(['Male','Female'],[0,1],inplace=True)
df['Partner'].replace(['Yes','No'],[1,0],inplace=True)
df['Dependents'].replace(['Yes','No'],[1,0],inplace=True)
df['PhoneService'].replace(['Yes','No'],[1,0],inplace=True)
df['MultipleLines'].replace(['No phone service','No', 'Yes'],[0,0,1],inplace=True)
df['InternetService'].replace(['No','DSL','Fiber optic'],[0,1,2],inplace=True)
df['OnlineSecurity'].replace(['No','Yes','No internet service'],[0,1,0],inplace=True)
df['OnlineBackup'].replace(['No','Yes','No internet service'],[0,1,0],inplace=True)
df['DeviceProtection'].replace(['No','Yes','No internet service'],[0,1,0],inplace=True)
df['TechSupport'].replace(['No','Yes','No internet service'],[0,1,0],inplace=True)
df['StreamingTV'].replace(['No','Yes','No internet service'],[0,1,0],inplace=True)
df['StreamingMovies'].replace(['No','Yes','No internet service'],[0,1,0],inplace=True)
df['Contract'].replace(['Month-to-month', 'One year', 'Two year'],[0,1,2],inplace=True)
df['PaperlessBilling'].replace(['Yes','No'],[1,0],inplace=True)
df['PaymentMethod'].replace(['Electronic check', 'Mailed check', 'Bank transfer (automatic)','Credit card (automatic)'],[0,1,2,3],inplace=True)
df['Churn'].replace(['Yes','No'],[1,0],inplace=True)
 
df.pop('customerID')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 20 columns):
gender              7043 non-null int64
SeniorCitizen       7043 non-null int64
Partner             7043 non-null int64
Dependents          7043 non-null int64
tenure              7043 non-null int64
PhoneService        7043 non-null int64
MultipleLines       7043 non-null int64
InternetService     7043 non-null int64
OnlineSecurity      7043 non-null int64
OnlineBackup        7043 non-null int64
DeviceProtection    7043 non-null int64
TechSupport         7043 non-null int64
StreamingTV         7043 non-null int64
StreamingMovies     7043 non-null int64
Contract            7043 non-null int64
PaperlessBilling    7043 non-null int64
PaymentMethod       7043 non-null int64
MonthlyCharges      7043 non-null float64
TotalCharges        7032 non-null float64
Churn               7043 non-null int64
dtypes: float64(2), int64(18)
memory usage: 1.1 MB

#Future Scaling
sns.set(font_scale=1)
plot = sns.heatmap(df.corr(), cmap='cubehelix', linewidth=2, square = True)

To decide which features of the data to include in the predictive churn model, we’ll examine the correlation between churn and each customer feature. To avoid unstable estimates of coeffiecients and making it difficult to interprete in our models, we will drop the ‘TotalCharges’ variable, as it is highly correlated to both ‘Tenure’ and ‘MonthlyCharges’.

df.pop('TotalCharges')

0         29.85
1       1889.50
2        108.15
3       1840.75
4        151.65
5        820.50
6       1949.40
7        301.90
8       3046.05
9       3487.95
10       587.45
11       326.80
12      5681.10
13      5036.30
14      2686.05
15      7895.15
16      1022.95
17      7382.25
18       528.35
19      1862.90
20        39.65
21       202.25
22        20.15
23      3505.10
24      2970.30
25      1530.60
26      4749.15
27        30.20
28      6369.45
29      1093.10
         ...   
7013    3756.40
7014    3645.75
7015    2874.45
7016      49.95
7017    1020.75
7018      70.65
7019     826.00
7020     239.00
7021     727.80
7022    7544.30
7023    6479.40
7024    3626.35
7025    1679.40
7026     403.35
7027     931.55
7028    4326.25
7029     263.05
7030      39.25
7031    3316.10
7032      75.75
7033    2625.25
7034    6886.25
7035    1495.10
7036     743.30
7037    1419.40
7038    1990.50
7039    7362.90
7040     346.45
7041     306.60
7042    6844.50
Name: TotalCharges, Length: 7043, dtype: float64

#Future Scaling 
categorical_features = [
"gender",
"SeniorCitizen",
"Partner",
"Dependents",
"PhoneService"
"MultipleLines",
"InternetService",
"OnlineSecurity",
"OnlineBackup",
"DeviceProtection",
"TechSupport",
"StreamingTV",
"StreamingMovies",
"Contract",
"PaperlessBilling",
"PaymentMethod",
]
numerical_features = ["tenure", "MonthlyCharges",]
target = "Churn"

df[numerical_features].describe()

#Feature Engeering
### Examine correlations
df.corr()

3. Predictive Model¶

#Making Predictions: Churn Prediction
from sklearn.model_selection import train_test_split 
train, test = train_test_split(df, test_size = 0.25)

train_y = train['Churn']
test_y = test['Churn']

train_x = train
train_x.pop('Churn')
test_x = test
test_x.pop('Churn')

2081    0
6285    0
53      1
773     1
3992    0
5742    0
2548    0
4244    0
1688    0
4026    0
123     0
2175    1
4674    0
3219    0
316     0
1032    1
2209    0
4839    1
2155    1
3080    0
586     0
5100    1
395     0
2890    1
3883    0
6944    0
3465    0
6149    0
3412    0
3969    0
       ..
4363    0
6102    0
866     0
4821    0
732     0
6618    0
3482    0
1653    0
385     1
636     0
437     0
1934    1
1171    1
6755    0
2471    1
4438    0
4278    0
2941    0
5343    0
5923    0
3162    0
6168    1
4180    1
745     0
1554    0
4499    1
2396    0
1852    1
684     0
6443    0
Name: Churn, Length: 1761, dtype: int64

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report
 
logisticRegr = LogisticRegression()
logisticRegr.fit(X=train_x, y=train_y)
 
test_y_pred = logisticRegr.predict(test_x)
confusion_matrix = confusion_matrix(test_y, test_y_pred)
print('Intercept: ' + str(logisticRegr.intercept_))
print('Regression: ' + str(logisticRegr.coef_))
print('Accuracy of logistic regression classifier on test set: {:.2f}'.format(logisticRegr.score(test_x, test_y)))
print(classification_report(test_y, test_y_pred))
 
confusion_matrix_df = pd.DataFrame(confusion_matrix, ('No churn', 'Churn'), ('No churn', 'Churn'))
heatmap = sns.heatmap(confusion_matrix_df, annot=True, annot_kws={"size": 20}, fmt="d")
heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(), rotation=0, ha='right', fontsize = 14)
heatmap.xaxis.set_ticklabels(heatmap.xaxis.get_ticklabels(), rotation=45, ha='right', fontsize = 14)
plt.ylabel('True label', fontsize = 14)
plt.xlabel('Predicted label', fontsize = 14)

C:\Users\User\Anaconda3\lib\site-packages\sklearn\linear_model\logistic.py:433: FutureWarning:

Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.

Intercept: [-0.76710156]
Regression: [[-0.00605541  0.17216462  0.0654986  -0.08875017 -0.03343347 -0.44114542
   0.40652823  1.11830627 -0.37079382 -0.10505384  0.04858744 -0.29610886
   0.32421128  0.37301858 -0.65864974  0.38870659 -0.10840444 -0.00980091]]
Accuracy of logistic regression classifier on test set: 0.81
              precision    recall  f1-score   support

           0       0.85      0.91      0.88      1284
           1       0.69      0.56      0.62       477

   micro avg       0.81      0.81      0.81      1761
   macro avg       0.77      0.73      0.75      1761
weighted avg       0.81      0.81      0.81      1761

Text(0.5, 35.25000000000001, 'Predicted label')

3.3 Computing Accuracy¶

Classification accuracy from the logistic regression classifie is 81% however the precision and recall for predictions in the positive class (churn) are relatively low, which suggests our data set may be imbalanced.

df['Churn'].value_counts()

0    5174
1    1869
Name: Churn, dtype: int64

from sklearn.utils import resample
 
df_majority = df[df['Churn']==0]
df_minority = df[df['Churn']==1]
 
df_minority_upsampled = resample(df_minority,
replace=True,
n_samples=5174, #same number of samples as majority classe
random_state=1) #set the seed for random resampling
# Combine resampled results
df_upsampled = pd.concat([df_majority, df_minority_upsampled])
 
df_upsampled['Churn'].value_counts()

1    5174
0    5174
Name: Churn, dtype: int64

3.4 Rerun the model¶

train, test = train_test_split(df_upsampled, test_size = 0.3)
 
train_y_upsampled = train['Churn']
test_y_upsampled = test['Churn']
 
train_x_upsampled = train
train_x_upsampled.pop('Churn')
test_x_upsampled = test
test_x_upsampled.pop('Churn')
 
logisticRegr_balanced = LogisticRegression()
logisticRegr_balanced.fit(X=train_x_upsampled, y=train_y_upsampled)
 
test_y_pred_balanced = logisticRegr_balanced.predict(test_x_upsampled)
print('Accuracy of logistic regression classifier on test set: {:.2f}'.format(logisticRegr_balanced.score(test_x_upsampled, test_y_upsampled)))
print(classification_report(test_y_upsampled, test_y_pred_balanced))

Accuracy of logistic regression classifier on test set: 0.76
              precision    recall  f1-score   support

           0       0.79      0.72      0.75      1615
           1       0.72      0.79      0.76      1490

   micro avg       0.76      0.76      0.76      3105
   macro avg       0.76      0.76      0.76      3105
weighted avg       0.76      0.76      0.76      3105

C:\Users\User\Anaconda3\lib\site-packages\sklearn\linear_model\logistic.py:433: FutureWarning:

Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.

The overall accuracy of the model has decreased, but the precision and recall scores for predicting a churn have improved.

!jupyter nbconvert your_notebook_name.ipynb --to html

	tenure	MonthlyCharges
count	7043.000000	7043.000000
mean	32.371149	64.761692
std	24.559481	30.090047
min	0.000000	18.250000
25%	9.000000	35.500000
50%	29.000000	70.350000
75%	55.000000	89.850000
max	72.000000	118.750000

	gender	SeniorCitizen	Partner	Dependents	tenure	PhoneService	MultipleLines	InternetService	OnlineSecurity	OnlineBackup	DeviceProtection	TechSupport	StreamingTV	StreamingMovies	Contract	PaperlessBilling	PaymentMethod	MonthlyCharges	Churn
gender	1.000000	0.001874	0.001808	-0.010517	-0.005106	0.006488	0.008414	0.010380	0.017021	0.013773	0.002105	0.009212	0.008393	0.010487	-0.000126	0.011754	0.005209	0.014569	0.008612
SeniorCitizen	0.001874	1.000000	0.016479	-0.211185	0.016567	0.008576	0.142948	0.259390	-0.038653	0.066572	0.059428	-0.060625	0.105378	0.120176	-0.142554	0.156530	-0.093704	0.220173	0.150889
Partner	0.001808	0.016479	1.000000	0.452676	0.379697	0.017706	0.142057	-0.000132	0.143106	0.141498	0.153786	0.119999	0.124666	0.117412	0.294806	-0.014877	0.133115	0.096848	-0.150448
Dependents	-0.010517	-0.211185	0.452676	1.000000	0.159712	-0.001762	-0.024526	-0.179631	0.080972	0.023671	0.013963	0.063268	-0.016558	-0.039741	0.243187	-0.111377	0.123844	-0.113890	-0.164221
tenure	-0.005106	0.016567	0.379697	0.159712	1.000000	0.008448	0.331941	0.033230	0.327203	0.360277	0.360653	0.324221	0.279756	0.286111	0.671607	0.006152	0.340305	0.247900	-0.352229
PhoneService	0.006488	0.008576	0.017706	-0.001762	0.008448	1.000000	0.279690	0.093720	-0.092893	-0.052312	-0.071227	-0.096340	-0.022574	-0.032959	0.002247	0.016505	-0.004070	0.247398	0.011942
MultipleLines	0.008414	0.142948	0.142057	-0.024526	0.331941	0.279690	1.000000	0.344684	0.098108	0.202237	0.201137	0.100571	0.257152	0.258751	0.107114	0.163530	0.035851	0.490434	0.040102
InternetService	0.010380	0.259390	-0.000132	-0.179631	0.033230	0.093720	0.344684	1.000000	0.156799	0.307420	0.313603	0.164833	0.429707	0.427264	-0.290189	0.378108	-0.178137	0.905491	0.316846
OnlineSecurity	0.017021	-0.038653	0.143106	0.080972	0.327203	-0.092893	0.098108	0.156799	1.000000	0.283832	0.275438	0.354931	0.176207	0.187398	0.245530	-0.003636	0.163367	0.296594	-0.171226
OnlineBackup	0.013773	0.066572	0.141498	0.023671	0.360277	-0.052312	0.202237	0.307420	0.283832	1.000000	0.303546	0.294233	0.282106	0.274501	0.155085	0.126735	0.096550	0.441780	-0.082255
DeviceProtection	0.002105	0.059428	0.153786	0.013963	0.360653	-0.071227	0.201137	0.313603	0.275438	0.303546	1.000000	0.333313	0.390874	0.402111	0.219310	0.103797	0.111241	0.482692	-0.066160
TechSupport	0.009212	-0.060625	0.119999	0.063268	0.324221	-0.096340	0.100571	0.164833	0.354931	0.294233	0.333313	1.000000	0.278070	0.279358	0.293691	0.037880	0.167701	0.338304	-0.164674
StreamingTV	0.008393	0.105378	0.124666	-0.016558	0.279756	-0.022574	0.257152	0.429707	0.176207	0.282106	0.390874	0.278070	1.000000	0.533094	0.103944	0.223841	-0.013826	0.629603	0.063228
StreamingMovies	0.010487	0.120176	0.117412	-0.039741	0.286111	-0.032959	0.258751	0.427264	0.187398	0.274501	0.402111	0.279358	0.533094	1.000000	0.107520	0.211716	-0.004390	0.627429	0.061382
Contract	-0.000126	-0.142554	0.294806	0.243187	0.671607	0.002247	0.107114	-0.290189	0.245530	0.155085	0.219310	0.293691	0.103944	0.107520	1.000000	-0.176733	0.358913	-0.074195	-0.396713
PaperlessBilling	0.011754	0.156530	-0.014877	-0.111377	0.006152	0.016505	0.163530	0.378108	-0.003636	0.126735	0.103797	0.037880	0.223841	0.211716	-0.176733	1.000000	-0.101480	0.352150	0.191825
PaymentMethod	0.005209	-0.093704	0.133115	0.123844	0.340305	-0.004070	0.035851	-0.178137	0.163367	0.096550	0.111241	0.167701	-0.013826	-0.004390	0.358913	-0.101480	1.000000	-0.074353	-0.262818
MonthlyCharges	0.014569	0.220173	0.096848	-0.113890	0.247900	0.247398	0.490434	0.905491	0.296594	0.441780	0.482692	0.338304	0.629603	0.627429	-0.074195	0.352150	-0.074353	1.000000	0.193356
Churn	0.008612	0.150889	-0.150448	-0.164221	-0.352229	0.011942	0.040102	0.316846	-0.171226	-0.082255	-0.066160	-0.164674	0.063228	0.061382	-0.396713	0.191825	-0.262818	0.193356	1.000000

	customerID	gender	Partner	Dependents	tenure	PhoneService	MultipleLines	InternetService	OnlineSecurity	...	DeviceProtection	TechSupport	StreamingTV	StreamingMovies	Contract	PaperlessBilling	PaymentMethod	MonthlyCharges	TotalCharges	Churn
0	7590-VHVEG	Female	Yes	No	1	No	No phone service	DSL	No	...	No	No	No	No	Month-to-month	Yes	Electronic check	29.85	29.85	No
1	5575-GNVDE	Male	No	No	34	Yes	No	DSL	Yes	...	Yes	No	No	No	One year	No	Mailed check	56.95	1889.5	No
2	3668-QPYBK	Male	No	No	2	Yes	No	DSL	Yes	...	No	No	No	No	Month-to-month	Yes	Mailed check	53.85	108.15	Yes
3	7795-CFOCW	Male	No	No	45	No	No phone service	DSL	Yes	...	Yes	Yes	No	No	One year	No	Bank transfer (automatic)	42.30	1840.75	No
4	9237-HQITU	Female	No	No	2	Yes	No	Fiber optic	No	...	No	No	No	No	Month-to-month	Yes	Electronic check	70.70	151.65	Yes