C++统计各个单词出现的次数

从文档（txt）加载一段英文文本，统计各个单词出现的次数，然后按照单词出现频率在cmd中「降序」输出单词及其出现的次数。

例如输入：
One little, two little, three little rabbits.

输出：
little 3
one 1
two 1
…….

不用去标点，不用改小写，这些我已经提前做好了

其实我看过08年的那个版本但现在我这个版本用不了。。

// 算法如下，如果你想从文本读取，算法原理一样，只需将结果赋值给string变量：

#include <iostream>
#include <string>
using namespace std;
#define  MAXLONG 10000
void sort(string numW[MAXLONG],int wordI[MAXLONG],int wordNum){
	int i,j,swapped;
	string tempW;
	do 
	{
		swapped=0;
		for (i=1;i<wordNum;i++)
		{
			if (wordI[i-1]<wordI[i])
			{
				j=wordI[i-1];
				wordI[i-1]=wordI[i];
				wordI[i]=j;

				tempW=numW[i-1];
				numW[i-1]=numW[i];
				numW[i]=tempW;
                swapped=1;
			}
		}
	} while (swapped);
}
bool CompareW(string word[MAXLONG],string targetW,int currentL){
	
	for (int i=0;i<currentL;i++){
		if(word[i]==targetW) return true;
	}

	return false;
}
void CalToWord(string wsl,int lenw){
    string numW[MAXLONG],tempW="";
	int wordI[MAXLONG];
    int i,j,k=0,index=0;
    bool FindNew;
	for (i=0;i<MAXLONG;i++){
		wordI[i]=0;
		numW[i]="";
	}
	
	/*********算法开始**********/
	char c;
	while(k<lenw){
        
        c=wsl.at(k);
		if (c!=' '&&c!=',') tempW+=c;
		else{
			FindNew=false;
			if(!CompareW(numW,tempW,index)){
               numW[index]=tempW; // 陵橡保存当前汪磨的单词
			   wordI[index]=1;    // 标记出现次数为1次
			   FindNew=true;
			}
			 
			 tempW="";  // 清空当前的单词
			 for (j=k+1;j<lenw;j++)
			 {
				 c=wsl.at(j);
				 if (c!=' '&尺陵旁&c!=',') tempW+=c;
				 else{
					 if(numW[index]==tempW){
						 wordI[index]++; // 次数自增
					 }
					 tempW="";// 清空已匹配的单词
				 }
			 }
			 // 处理尾部
			 if(tempW!=""){
				 if(numW[index]==tempW){
					 wordI[index]++; // 次数自增
				 }
					 tempW="";// 清空已匹配的单词
			 }
			 if(FindNew) index++; // 指向下一个单词
		}
        k++;
	}
    // 处理尾部
    if (tempW!=""){
		FindNew=false;
		if(!CompareW(numW,tempW,index)){
			numW[index]=tempW; // 保存当前的单词
			wordI[index]=1;    // 标记出现次数为1次
			FindNew=true;
		}
		
		tempW="";  // 清空当前的单词
		for (j=k+1;j<lenw;j++)
		{
			c=wsl.at(j);
			if (c!=' '&&c!=',') tempW+=c;
			else{
				if(numW[index]==tempW){
					wordI[index]++; // 次数自增
				}
				tempW="";// 清空已匹配的单词
			}
		}
		// 处理尾部
		if(tempW!=""){
			if(numW[index]==tempW){
				wordI[index]++; // 次数自增
			}
			tempW="";// 清空已匹配的单词
		}
			 if(FindNew) index++; // 指向下一个单词
    }
     /*********算法结束**********/

	// 输出结果
	cout<<"New Word\tCount"<<endl;
	sort(numW,wordI,index);
    for (i=0;i<index;i++)
    {
		cout<<numW[i]<<"\t\t"<<wordI[i]<<endl;
		
    }
}
int main(){
	
	string wordsL="祈祷 马航 MH370,one little,two little,three little";
	 
	CalToWord(wordsL,wordsL.length());
	
	return -1;
}

运行结果截图如下：

每出现一个单词先判断是否已有，有的话为他的计数器加1，没有的话新开辟一个计数器，为他加1.

用CString 的find函数就可中码以了，
int count,length;
count =0;//出现次数
length = 0;
while(length <链简 file.length())
{
int i = file.find(length,"title");
if(i >卖唤哪0)

{
length = i;

count ++;

}
}

知识宝库

C++统计各个单词出现的次数

相关内容

最新更新