跳转到内容

用户:Antigng-bot/wikitemplate

维基百科,自由的百科全书
#include <string.h>
#include "mem.h"
#include "wikitemplate.h"
#include "network.h"
#include "convert.h"
void checkparsestate(const char ch,struct _parsestate *p,int *nowiki,int *comment,int *link,int *ref)
{
	/*
	tag analyzer
	*/
	switch(p->tagstate)
	{
	case 0:
		if((ch=='<')&&(!*comment))
		{
			p->tagstate=1;
			p->tagpos=0;
		}
		break;
	case 1:
		if(((ch>='A')&&(ch<='Z'))||((ch>='a')&&(ch<='z')))
		{
			p->tagstate=2;
			p->tag[0]=((ch>='A')&&(ch<='Z'))?ch+'a'-'A':ch;
			p->tagpos=1;
		}
		else if(ch!='<')
		{
			p->tagstate=0;
		}
		break;
	case 2:
		if(ch=='>')
		{
			p->tag[p->tagpos]=0;
			if(!strncmp(p->tag,"nowiki",6))
			{
				*nowiki=1;
				p->tagstate=3;
			}
			else if(!strncmp(p->tag,"pre",3))
			{
				*nowiki=2;
				p->tagstate=3;
			}
			else if(!strncmp(p->tag,"math",4))
			{
				*nowiki=3;
				p->tagstate=3;
			}
			else if(!strncmp(p->tag,"source",6))
			{
				*nowiki=4;
				p->tagstate=3;
			}
			else if(!strncmp(p->tag,"includeonly",11))
			{
				*nowiki=5;
				p->tagstate=3;
			}
			else if(!strncmp(p->tag,"code",4))
			{
				*nowiki=6;
				p->tagstate=3;
			}
			else if(!strncmp(p->tag,"syntaxhighlight",15))
			{
				*nowiki=7;
				p->tagstate=3;
			}
			else
			{
				p->tagstate=0;
			}
		}
		else if(ch=='/'||ch=='\n'||ch=='{'||ch=='}'||ch=='['||ch==']'||p->tagpos>30)
		{
			p->tagstate=0;
		}
		else if(ch=='<')
		{
			p->tagstate=1;
			p->tagpos=0;
		}
		else if(ch!=' ')
		{
			p->tag[p->tagpos]=((ch>='A')&&(ch<='Z'))?ch+'a'-'A':ch;
			p->tagpos++;
		}
		break;
	case 3:
		if(ch=='<')
		{
			p->tagstate=4;
		}
		break;
	case 4:
		if(ch=='/')
		{
			p->tagstate=5;
			p->tagpos=0;
		}
		else if(ch!='<')
		{
			p->tagstate=3;
		}
		break;
	case 5:
		if(((ch>='A')&&(ch<='Z'))||((ch>='a')&&(ch<='z')))
		{
			p->tagstate=6;
			p->tag[0]=((ch>='A')&&(ch<='Z'))?ch+'a'-'A':ch;
			p->tagpos=1;
		}
		else if(ch=='<')
		{
			p->tagstate=4;
		}
		else
		{
			p->tagstate=3;
		}
		break;
	case 6:
		if(ch=='>')
		{
			p->tag[p->tagpos]=0;
			switch(*nowiki)
			{
			case 1:
				if(!strncmp(p->tag,"nowiki",6))
				{
					p->tagstate=7;
				}
				else
				{
					p->tagstate=3;
				}
				break;
			case 2:
				if(!strncmp(p->tag,"pre",3))
				{
					p->tagstate=7;
				}
				else
				{
					p->tagstate=3;
				}
				break;
			case 3:
				if(!strncmp(p->tag,"math",4))
				{
					p->tagstate=7;
				}
				else
				{
					p->tagstate=3;
				}
				break;
			case 4:
				if(!strncmp(p->tag,"source",6))
				{
					p->tagstate=7;
				}
				else
				{
					p->tagstate=3;
				}
				break;
			case 5:
				if(!strncmp(p->tag,"includeonly",11))
				{
					p->tagstate=7;
				}
				else
				{
					p->tagstate=3;
				}
				break;
			case 6:
				if(!strncmp(p->tag,"code",4))
				{
					p->tagstate=7;
				}
				else
				{
					p->tagstate=3;
				}
				break;
			case 7:
				if(!strncmp(p->tag,"syntaxhighlight",15))
				{
					p->tagstate=7;
				}
				else
				{
					p->tagstate=3;
				}
				break;
			}
		}
		else if(ch=='/'||ch=='\n'||ch=='{'||ch=='}'||ch=='['||ch==']'||p->tagpos>30)
		{
			p->tagstate=3;
		}
		else if(ch=='<')
		{
			p->tagstate=3;
		}
		else if(ch!=' ')
		{
			p->tag[p->tagpos]=((ch>='A')&&(ch<='Z'))?ch+'a'-'A':ch;
			p->tagpos++;
		}
		break;
	case 7:
		if(ch!='<')
		{
			*nowiki=0;
			p->tagstate=0;
		}
		else
		{
			*nowiki=0;
			p->tagstate=1;
			p->tagpos=0;
		}
		break;
	}
	/*
	comment analyzer
	*/
	switch(p->commentstate)
	{
	case 0:
		if((ch=='<')&&(!*nowiki))
		{
			p->commentstate=1;
		}
		break;
	case 1:
		if(ch=='!')
		{
			p->commentstate=2;
			p->dashcount=0;
		}
		else if(ch!='<')
		{
			p->commentstate=0;
		}
		break;
	case 2:
		if(ch=='-')
		{
			p->dashcount++;
		}
		else if(ch=='>')
		{
			p->commentstate=0;
		}
		else if(p->dashcount>1)
		{
			p->commentstate=3;
			*comment=1;
		}
		else if(ch=='<')
		{
			p->commentstate=1;
		}
		else 
		{
			p->commentstate=0;
		}
		break;
	case 3:
		if(ch=='-')
		{
			p->commentstate=4;
		}
		break;
	case 4:
		if(ch=='>')
		{
			p->commentstate=5;
		}
		else if(ch!='-')
		{
			p->commentstate=3;
		}
		break;
	case 5:
		if(ch!='<')
		{
			*comment=0;
			p->commentstate=0;
		}
		else
		{
			*comment=0;
			p->commentstate=1;
		}
	}
	/*
	link analyzer
	*/
	switch(p->linkstate)
	{
	case 0:
		if((!*comment)&&(!*nowiki)&&ch=='[')
		{
			p->linkstate=1;
		}
		break;
	case 1:
		if(ch=='[')
		{
			p->linkstate=2;
			*link=1;
		}
		else
		{
			p->linkstate=0;
		}
		break;
	case 2:
		if((ch=='[')&&(!*comment)&&(!*nowiki))
		{
			p->linkstate=3;
		}
		else if((ch==']')&&(!*comment)&&(!*nowiki))
		{
			p->linkstate=4;
		}
		break;
	case 3:
		if(ch=='[')
		{
			(*link)++;
		}
		p->linkstate=2;
		break;
	case 4:
		if(ch==']')
		{
			(*link)--;
			if(!(*link)) p->linkstate=0;
			else p->linkstate=2;
		}
		else p->linkstate=2;
	}
	/*
	reference analyzer
	*/
	switch(p->refstate)
	{
	case 0:
		if((ch=='<')&&(!*comment)&&(!*nowiki))
		{
			p->refstate=1;
			p->refpos=0;
		}
		break;
	case 1:
		if(((ch>='A')&&(ch<='Z'))||((ch>='a')&&(ch<='z')))
		{
			p->refstate=2;
			p->reftag[0]=((ch>='A')&&(ch<='Z'))?ch+'a'-'A':ch;
			p->refpos=1;
		}
		else if(ch=='/')
		{
			p->refstate=3;
		}
		else if(ch!='<')
		{
			p->refstate=0;
		}
		break;
	case 2:
		if(ch=='>')
		{
			p->reftag[p->refpos]=0;
			if(!strncmp(p->reftag,"ref",3))
			{
				(*ref)+=1;
				p->refstate=0;
			}
			else
			{
				p->refstate=0;
			}
		}
		else if(ch=='/'||ch=='\n'||ch=='{'||ch=='}'||ch=='['||ch==']'||p->refpos>1022)
		{
			p->refstate=0;
		}
		else if(ch=='<')
		{
			p->refstate=1;
			p->refpos=0;
		}
		else if(ch!=' ')
		{
			p->reftag[p->refpos]=((ch>='A')&&(ch<='Z'))?ch+'a'-'A':ch;
			p->refpos++;
		}
		break;
	case 3:
		if(ch=='>')
		{
			p->reftag[p->refpos]=0;
			if(!strncmp(p->reftag,"ref",3))
			{
				if(*ref>0) (*ref)-=1;
				p->refstate=0;
			}
			else
			{
				p->refstate=0;
			}
		}
		else if(ch=='/'||ch=='\n'||ch=='{'||ch=='}'||ch=='['||ch==']'||p->refpos>1022)
		{
			p->refstate=0;
		}
		else if(ch=='<')
		{
			p->refstate=1;
			p->refpos=0;
		}
		else if(ch!=' ')
		{
			p->reftag[p->refpos]=((ch>='A')&&(ch<='Z'))?ch+'a'-'A':ch;
			p->refpos++;
		}
		break;
	}
	return;
}
void parsestatefinal(struct _parsestate *p,int *nowiki,int *comment,int *link,int *ref)
{
	if(p->commentstate==5)
	{
		p->commentstate=0;
		*comment=0;
	}
	if(p->tagstate==7)
	{
		p->tagstate=0;
		*nowiki=0;
	}
	return;
}
int tempprocess(struct _templatehead *temp,int *nowiki,int *comment,int *link,int *ref,struct _parsestate *pstate,HTTP txt,const int depth)
{
	char ch=0;
	int state=0;
	int namepos=0;
	int valuepos=0;
	struct _template *cur_content=0;
	struct _value *cur_value=0;
	int baseref=*ref;
	if(depth>40)
	{
		temp->overflow=1;
		return TEMPLATE_ROTTEN;
	}
	while(xmlpulltext(txt,&ch)==XML_TEXT_CONTINUE)
	{
		checkparsestate(ch,pstate,nowiki,comment,link,ref);
		switch(state)
		{
		case 0:
			/*
			reading template name
			*/
			if(((ch=='{'||ch=='['||ch==']')&&(!*comment))||(namepos>4096)||(pstate->tagstate>=2))
			{
				temp->tempname[namepos]=ch;
				temp->totalbytes++;
				namepos++;
				temp->tempname[namepos]=0;
				temp->rot=1;
				return TEMPLATE_ROTTEN;
			}
			else if(ch=='}')
			{
				state=1;
			}
			else if(ch=='|')
			{
				if(!namepos) 
				{
					temp->rot=1;
					return TEMPLATE_ROTTEN;
				}
				cur_content=temp->content=(struct _template *)s_calloc(sizeof(struct _template),1);
				cur_value=cur_content->name=(struct _value *)s_calloc(sizeof(struct _value),1);
				cur_value->type=0;
				cur_value->elem.ch=(char *)s_calloc(sizeof(char)*8192,1);
				namepos=0;
				state=2;
			}
			else 
			{
				temp->tempname[namepos]=ch;
				temp->totalbytes++;
				namepos++;
				temp->tempname[namepos]=0;
			}
			break;
		case 1:
			if(ch=='}')
			{
				if(*ref==baseref) return TEMPLATE_OK;
				else
				{
					temp->rot=1;
					return TEMPLATE_ROTTEN;
				}
			}
			else
			{
				temp->tempname[namepos]=ch;
				temp->totalbytes++;
				namepos++;
				temp->tempname[namepos]=0;
				temp->rot=1;
				return TEMPLATE_ROTTEN;
			}
			break;
		case 2:
			if(namepos>4096)
			{
				cur_value->elem.ch[namepos]=ch;
				temp->totalbytes++;
				namepos++;
				cur_value->elem.ch[namepos]=0;
				temp->rot=1;
				return TEMPLATE_ROTTEN;
			}
			else if(ch=='{'&&(!*nowiki)&&(!*comment))
			{
				state=3;
			}
			else if((ch=='}')&&(!*nowiki)&&(!*comment)&&((*ref)==baseref))
			{
				state=4;
			}
			else if(ch=='|'&&(!*nowiki)&&(!*comment)&&(!*link)&&((*ref)==baseref))
			{
				cur_content->next=(struct _template *)s_calloc(sizeof(struct _template),1);
				cur_content=cur_content->next;
				cur_value=cur_content->name=(struct _value *)s_calloc(sizeof(struct _value),1);
				cur_value->type=0;
				cur_value->elem.ch=(char *)s_calloc(sizeof(char)*8192,1);
				namepos=0;
			}
			else if(ch=='='&&(!*nowiki)&&(!*comment)&&((*ref)==baseref)&&(pstate->refstate!=2)&&(pstate->tagstate!=2))
			{
				cur_value=cur_content->value=(struct _value *)s_calloc(sizeof(struct _value),1);
				cur_value->type=0;
				cur_value->elem.ch=(char *)s_calloc(8192*sizeof(char),1);
				state=5;
				valuepos=0;
			}
			else
			{
				cur_value->elem.ch[namepos]=ch;
				namepos++;
				temp->totalbytes++;
				cur_value->elem.ch[namepos]=0;
			}
			break;
		case 3:
			if(ch=='{')
			{
				struct _templatehead *subtemp=(struct _templatehead *)s_calloc(sizeof(struct _templatehead),1);
				cur_content->nameflag++;
				cur_value->next=(struct _value *)s_calloc(sizeof(struct _value),1);
				cur_value=cur_value->next;
				cur_value->type=1;
				cur_value->elem.temp=subtemp;
				if(tempprocess(subtemp,nowiki,comment,link,ref,pstate,txt,depth+1)==TEMPLATE_ROTTEN)
				{
					temp->overflow=subtemp->overflow;
					temp->rot=1;
					return TEMPLATE_ROTTEN;
				}
				else
				{
					cur_value->next=(struct _value *)s_calloc(sizeof(struct _value),1);
					cur_value=cur_value->next;
					cur_value->type=0;
					cur_value->elem.ch=(char *)s_calloc(sizeof(char)*8192,1);
					namepos=0;
					state=2;
				}
			}
			else
			{
				cur_value->elem.ch[namepos]='{';
				cur_value->elem.ch[namepos+1]=0;
				temp->totalbytes++;
				namepos++;
				if(ch=='=')
				{
					cur_value=cur_content->value=(struct _value *)s_calloc(sizeof(struct _value),1);
					cur_value->type=0;
					cur_value->elem.ch=(char *)s_calloc(8192*sizeof(char),1);
					state=5;
					valuepos=0;
				}
				else if(ch=='|')
				{
					temp->rot=1;
					return TEMPLATE_ROTTEN;
				}
				else if(ch=='}')
				{
					state=4;
				}
				else
				{
					cur_value->elem.ch[namepos]=ch;
					namepos++;
					temp->totalbytes++;
					cur_value->elem.ch[namepos]=0;
					state=2;
				}

			}
			break;
		case 4:
			if(ch=='}')
			{
				if((*ref)==baseref) return TEMPLATE_OK;
				else
				{
					temp->rot=1;
					return TEMPLATE_ROTTEN;
				}
			}
			else
			{
				cur_value->elem.ch[namepos]='}';
				cur_value->elem.ch[namepos+1]=0;
				temp->totalbytes++;
				namepos++;
				if(ch=='{')
				{
					state=3;
				}
				else if(ch=='=')
				{
					cur_value=cur_content->value=(struct _value *)s_calloc(sizeof(struct _value),1);
					cur_value->type=0;
					cur_value->elem.ch=(char *)s_calloc(8192*sizeof(char),1);
					state=5;
					valuepos=0;
				}
				else if(ch=='|')
				{
					cur_content->next=(struct _template *)s_calloc(sizeof(struct _template),1);
					cur_content=cur_content->next;
					cur_value=cur_content->name=(struct _value *)s_calloc(sizeof(struct _value),1);
					cur_value->type=0;
					cur_value->elem.ch=(char *)s_calloc(sizeof(char)*8192,1);
					namepos=0;
					state=2;
				}
				else
				{
					cur_value->elem.ch[namepos]=ch;
					namepos++;
					temp->totalbytes++;
					cur_value->elem.ch[namepos]=0;
					state=2;
				}
			}
			break;
		case 5:
			if(valuepos>4096)
			{
				cur_value->elem.ch[valuepos]=ch;
				valuepos++;
				cur_value->elem.ch[valuepos]=0;
				temp->totalbytes++;
				temp->rot=1;
				return TEMPLATE_ROTTEN;
			}
			else if((ch=='|')&&(!*nowiki)&&(!*comment)&&(!*link)&&((*ref)==baseref))
			{
				cur_content->next=(struct _template *)s_calloc(sizeof(struct _template),1);
				cur_content=cur_content->next;
				cur_value=cur_content->name=(struct _value *)s_calloc(sizeof(struct _value),1);
				cur_value->type=0;
				cur_value->elem.ch=(char *)s_calloc(sizeof(char)*8192,1);
				namepos=0;
				state=2;
			}
			else if((ch=='}')&&(!*nowiki)&&(!*comment)&&((*ref)==baseref))
			{
				state=6;
			}
			else if((ch=='{')&&(!*nowiki)&&(!*comment))
			{
				state=7;
			}
			else
			{
				cur_value->elem.ch[valuepos]=ch;
				valuepos++;
				cur_value->elem.ch[valuepos]=0;
				temp->totalbytes++;
			}
			break;
		case 6:
			if(ch=='}')
			{
				if((*ref)==baseref) return TEMPLATE_OK;
				else
				{
					temp->rot=1;
					return TEMPLATE_ROTTEN;
				}
			}
			else
			{
				cur_value->elem.ch[valuepos]='}';
				cur_value->elem.ch[valuepos+1]=0;
				temp->totalbytes++;
				valuepos++;
				if(ch=='{') state=7;
				else if(ch=='|')
				{
					cur_content->next=(struct _template *)s_calloc(sizeof(struct _template),1);
					cur_content=cur_content->next;
					cur_value=cur_content->name=(struct _value *)s_calloc(sizeof(struct _value),1);
					cur_value->type=0;
					cur_value->elem.ch=(char *)s_calloc(sizeof(char)*8192,1);
					namepos=0;
					state=2;
				}
				else
				{
					cur_value->elem.ch[valuepos]=ch;
					cur_value->elem.ch[valuepos+1]=0;
					temp->totalbytes++;
					valuepos++;
					state=5;
				}				
			}
			break;
		case 7:
			if(ch!='{')
			{
				cur_value->elem.ch[valuepos]='{';
				cur_value->elem.ch[valuepos+1]=0;
				temp->totalbytes++;
				valuepos++;
				if(ch=='}') state=6;
				else if(ch=='|')
				{
					temp->rot=1;
					return TEMPLATE_ROTTEN;
				}
				else
				{
					cur_value->elem.ch[valuepos]=ch;
					cur_value->elem.ch[valuepos+1]=0;
					temp->totalbytes++;
					valuepos++;
					state=5;
				}	
			}
			else
			{
				struct _templatehead *subtemp=(struct _templatehead *)s_calloc(sizeof(struct _templatehead),1);
				cur_content->valueflag++;
				cur_value->next=(struct _value *)s_calloc(sizeof(struct _value),1);
				cur_value=cur_value->next;
				cur_value->type=1;
				cur_value->elem.temp=subtemp;
				if(tempprocess(subtemp,nowiki,comment,link,ref,pstate,txt,depth+1)==TEMPLATE_ROTTEN)
				{
					temp->rot=1;
					temp->overflow=subtemp->overflow;
					return TEMPLATE_ROTTEN;
				}
				else
				{
					cur_value->next=(struct _value *)s_calloc(sizeof(struct _value),1);
					cur_value=cur_value->next;
					cur_value->type=0;
					cur_value->elem.ch=(char *)s_calloc(sizeof(char)*8192,1);
					valuepos=0;
					state=5;
				}
			}
			break;
		}
	}	
	return TEMPLATE_ROTTEN;
}