rm(list=ls()) # Create example data frame: Sunny<-"Sunny"; Overcast<-"Overcast"; Rain<-"Rain" Outlook<-c(Sunny,Sunny,Overcast,Rain,Rain,Rain,Overcast,Sunny,Sunny,Rain,Sunny,Overcast,Overcast,Rain) Hot<-"Hot"; Mild<-"Mild"; Cool<-"Cool" Temperature<-c(Hot,Hot,Hot,Mild,Cool,Cool,Cool,Mild,Cool,Mild,Mild,Mild,Hot,Mild) High<-"High"; Normal<-"Normal" Humidity<-c(High,High,High,High,Normal,Normal,Normal,High,Normal,Normal,Normal,High,Normal,High) Weak<-"Weak"; Strong<-"Strong" Wind<-c(Weak,Strong,Weak,Weak,Weak,Strong,Strong,Weak,Weak,Weak,Strong,Strong,Weak,Strong) Yes<-"Yes"; No<-"No" PlayTennis<-c(No,No,Yes,Yes,Yes,No,Yes,No,Yes,Yes,Yes,Yes,Yes,No) (S<-as.data.frame(cbind(Outlook,Temperature,Humidity,Wind,PlayTennis))) outputVarColNumbers<-c(dim(S)[2]) #c(5) numOutputVar<-length(outputVarColNumbers) for (outputVarCount in 1:numOutputVar){ assign(paste("numOutputVarValues.",names(S)[outputVarColNumbers[outputVarCount]],sep=""),length(table(S[,outputVarColNumbers[outputVarCount]]))) assign(paste(names(S)[outputVarColNumbers[outputVarCount]],"Entropy",sep=""),0) for (valCount in 1:length(table(S[,outputVarColNumbers[outputVarCount]]))){ assign(paste("p.",names(S)[outputVarColNumbers[outputVarCount]],".",valCount,sep=""),(sum(S[,outputVarColNumbers[outputVarCount]]==names(table(S[,outputVarColNumbers[outputVarCount]]))[valCount]))/dim(S)[1]) assign(paste(names(S)[outputVarColNumbers[outputVarCount]],"Entropy",sep=""),get(paste(names(S)[outputVarColNumbers[outputVarCount]],"Entropy",sep=""))-get(paste("p.",names(S)[outputVarColNumbers[outputVarCount]],".",valCount,sep=""))*log2(get(paste("p.",names(S)[outputVarColNumbers[outputVarCount]],".",valCount,sep="")))) } } inputVarColNumbers<-seq(1,(dim(S)[2]-1)) #c(1,2,3,4) outputVarColNumbers<-c(dim(S)[2]) #c(5) numInputVar<-length(inputVarColNumbers) numOutputVar<-length(outputVarColNumbers) for (inputVarCount in 1:numInputVar){ ## ["Outlook","Temperature","Humidity","Wind"] inputVarName<-names(S[,inputVarColNumbers])[inputVarCount] numInputVarVal<-length(table(S[,inputVarColNumbers[inputVarCount]])) #assign(paste("informationGain.",inputVarName,sep=""),get(paste(names(S)[outputVarColNumbers[outputVarCount]],"Entropy",sep=""))) assign(paste("informationGain.",inputVarName,sep=""),get(paste(names(S)[outputVarColNumbers[1]],"Entropy",sep=""))) for (inputVarValCount in 1:numInputVarVal){ ## ["Sunny","Overcast","Rain"] inputVarValName<-names(table(S[,inputVarColNumbers[inputVarCount]]))[inputVarValCount] for (outputVarCount in 1:numOutputVar){ ## ["PlayTennis"] outputVarName<-names(S)[outputVarColNumbers[outputVarCount]] numOutputVarVal<-length(table(S[,outputVarColNumbers[outputVarCount]])) assign(paste("Entropy.",inputVarName,".",inputVarValName,".",outputVarName,sep=""),0) for (outputVarValCount in 1:numOutputVarVal){ ## ["Yes", "No"] outputVarValName<-names(table(S[,outputVarColNumbers[outputVarCount]]))[outputVarValCount] assign(paste("p.",inputVarName,".",inputVarValName,".",outputVarName,".",outputVarValName,sep=""),(sum(get(inputVarName)==inputVarValName & get(outputVarName)==outputVarValName)/sum(get(inputVarName)==inputVarValName))) assign(paste("Entropy.",inputVarName,".",inputVarValName,".",outputVarName,sep=""),get(paste("Entropy.",inputVarName,".",inputVarValName,".",outputVarName,sep=""))-get(paste("p.",inputVarName,".",inputVarValName,".",outputVarName,".",outputVarValName,sep=""))*log2(get(paste("p.",inputVarName,".",inputVarValName,".",outputVarName,".",outputVarValName,sep="")))) } } G<-((sum(get(inputVarName)==inputVarValName))/(dim(S)[1]))*get(paste("Entropy.",inputVarName,".",inputVarValName,".",outputVarName,sep="")) assign(paste("informationGain.",inputVarName,sep=""),get(paste("informationGain.",inputVarName,sep=""))-G) } }