Create CleaningUpData.py
This commit is contained in:
parent
755518dbad
commit
7f500e8250
36
maths-prog/MachineLearningDemystified/CleaningUpData.py
Normal file
36
maths-prog/MachineLearningDemystified/CleaningUpData.py
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
directory = '/home/nuno/Documents/Jobs/IDInsight'
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
## Install the dataframe
|
||||||
|
insuranceDataFrame = pd.read_csv(directory + '/insurance.csv')
|
||||||
|
|
||||||
|
## Some functions for cleaning up, inspired by R's ifelse function
|
||||||
|
|
||||||
|
def ifelse1(x, listOfChecks, yesLabel, noLabel):
|
||||||
|
if x in listOfChecks:
|
||||||
|
return (yesLabel)
|
||||||
|
else:
|
||||||
|
return (noLabel)
|
||||||
|
|
||||||
|
def ifelse2(x,listOfChecks, listOfLabels):
|
||||||
|
n = len(listOfChecks)
|
||||||
|
for i in range(n):
|
||||||
|
if x == listOfChecks[i]:
|
||||||
|
return (listOfLabels[i])
|
||||||
|
return None
|
||||||
|
|
||||||
|
insuranceDataFrame['sex_numeric'] =insuranceDataFrame['sex'].apply(lambda x: ifelse1(x, np.array(['male']),1,0))
|
||||||
|
|
||||||
|
insuranceDataFrame['smoker_numeric'] =insuranceDataFrame['smoker'].apply(lambda x: ifelse1(x, np.array(['yes']),1,0))
|
||||||
|
|
||||||
|
insuranceDataFrame['region_numeric'] =insuranceDataFrame['region'].apply(lambda x: ifelse2(x, np.unique(insuranceDataFrame['region']), np.array([0,1,2,3])))
|
||||||
|
|
||||||
|
insuranceDataFrame = insuranceDataFrame.drop(["sex", "smoker", "region"],axis=1)
|
||||||
|
|
||||||
|
## We save the database
|
||||||
|
|
||||||
|
insuranceDataFrame.to_csv(directory +'insurance_clean_continuous.csv', index=False)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user