文档介绍:数据挖掘算法实验报告
实验题目
基于决策树的分类算法,属性的选择采用ID3 ,采用如下的数据建立分类决策树。
算法基本思想的描述
ID3选择具有最高信息熵增益的属性作为分裂属性,基于这种原则我们首先可以算出初始集合in-credit_rating<<endl;
return 0;
}
double calculate(double a,double b)
{
if(a==0)
return 0;
else
return (a/b)*log10(b/a)/log10(2);
}
void origin_entropy(Data data[],double &entropy)
{
int i;
double yes=0, no=0;
for( i=0;i<SIZE;i++)
{
if(strcmp(data[i].buys_computer,"yes")==0)
yes++;
else
no++;
}
entropy=calculate(yes,SIZE)+calculate(no,SIZE);
}
void age_entropy(Data data[],double &entropy)
{
double youth[3]={0};
double middle[3]={0};
double old[3]={0};
for (int i=0;i<SIZE;i++)
{
if(strcmp(data[i].age,"<=30")==0)
{
(strcmp(data[i].buys_computer,"yes")==0)?youth[0]++:youth[1]++;youth[2]++;
}
else if(strcmp(data[i].age,"31...40")==0)
{
(strcmp(data[i].buys_computer,"yes")==0)?middle[0]++:middle[1]++;middle[2]++;
}
else
{
(strcmp(data[i].buys_computer,"yes")==0)?old[0]++:old[1]++;old[2]++;
}
}
entropy=youth[2]/SIZE*(calculate(youth[0],youth[2])+calculate(youth[1],youth[2]))+middle[2]/SIZE*(calculate(middle[0],middle[2])+calculate(middle[1],middle[2]))+old[2]/SIZE*(calculate(old[0],old[2])+calculate(old[1],old[2]));
}
void income_entropy(Data data[],double &entropy)
{
double high[3]={0};
double medium[3]={0};
double low[3]={0};
for (int i=0;i<SIZE;i+