#!/usr/bin/perl
# catnav bot by WikiPedia:User:下一次登录
# Sisclaimer: No warranty granted, use at your own risk!
# call requirements
use Getopt::Std;
use LWP::Simple;
use LWP::UserAgent;
use HTTP::Request;
use HTTP::Response;
use HTTP::Cookies;
#subroutine
#parameters
local $username="xcnbot"; #input your username here, only English names are tested.
local $password="******"; #input your password here
local $WIKI_PATH="zh.wikipedia.org";
local $WIKI_PAGE;
### Login to wiki
# Set up connection data
my $browser=LWP::UserAgent->new();
my @ns_headers = (
'User-Agent' => 'Xcnbot 1.0 by 下一次登录', #Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7) Gecko/20041107 Firefox/1.0',
'Accept' => 'image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*',
'Accept-Charset' => 'iso-8859-1,*,utf-8',
'Accept-Language' => 'en-US',
);
# Hold cookies
$browser->cookie_jar( {} );
{# Login
# Make login request
$response=$browser->post("http://".$WIKI_PATH."/w/index.php?title=Special:Userlogin&action=submitlogin",
@ns_headers, Content=>[wpName=>$username,wpPassword=>$password,wpRemember=>"1",wpLoginAttempt=>"Log in"]);
# After logging in, we should be redirected to another page.
# If we aren't, something is wrong.
if($response->code!=302) { #cannot login
print "We weren't able to login.\n\n";
close(DEBUG);
exit 1;
}
}
print "Logged in \n";
# Trivial variables
local $content;
local $content1;
local $content2;
local @cnTree;
local $editToken;
local $editTime;
# Set parameters
local $DEPTH=5; #predefined DEPTH
local $cChange=0; #Counter of CHANGEs
my $vName; #Variable: current cat NAME
my $vNameU="%E9%A0%81%E9%9D%A2%E5%88%86%E9%A1%9E"; #vName: Unicode, starting from “页面分类”
my @aTree; #Array: current cat-TREE
my $vDepth=0; #Variable: current cat-tree DEPTH
my @aChild; #Array: current cat's CHILDREN
my @aChildU; #aChildren: Unicode
# Connect to root cat
$WIKI_PAGE=$vNameU;
$URL="http://".$WIKI_PATH."/wiki/Category:".$WIKI_PAGE;
$response=$browser->get($URL, @ns_headers);
$content=$response->as_string;
{ # extract vName
$filestartstr="<h1 class=\"firstHeading\">Category:";
$filestart=index($content, $filestartstr);
$content1=substr($content, $filestart+34);
$fileendstr="</h1>";
$fileend=index($content1, $fileendstr);
$vName=substr($content1, 0, $fileend);
} # extract vName
print "got vName\n";
# Change @aTree
$aTree[$vDepth]=$vName;
$vDepth+=1;
if(1)
{ #debug
open INPUT, ">debug1.txt";
print INPUT $URL;
print INPUT "\n\n before digui\n\n";
close INPUT;
} #debug
if(1)
{ #log
open INPUT, ">cnlog.txt";
print INPUT "program start\n";
close INPUT;
} #log
&gotocat($vName, $vNameU, $vDepth, @aTree);
sub gotocat
{ #递归程序
my($vName, $vNameU, $vDepth, @aTree) = @_;
if(1)
{ #debug log
open INPUT, ">diguilog.txt";
print INPUT "Name=";
print INPUT $vName;
print INPUT " UName=";
print INPUT $vNameU;
print INPUT " vDepth=";
print INPUT " aTree=";
print INPUT $vDepth;
for($i=0;$i<$vDepth;$i+=1)
{
print INPUT $aTree[$i];
print INPUT ">";
}
print INPUT "\n";
close INPUT;
} #debug log
my @aChild;
my @aChildU;
my $nChild=0; #Child cat number
if($vDepth>$DEPTH)
{ #depth exceeds
$aTree[1]="...";
for($i=2;$i<$DEPTH;$i+=1)
{
$aTree[$i]=$aTree[$i+1];
}
$vDepth=$DEPTH;
if(1)
{ #debug log
open INPUT, ">>diguilog.txt";
print INPUT "Depth exceeds\n";
close INPUT;
} #debug log
}
print "in digui. vDepth=";
print $vDepth;
print "\n";
if(1)
{ # list and sort all children
## list
print "list ";
$WIKI_PAGE=$vNameU;
$URL="http://".$WIKI_PATH."/wiki/Category:".$WIKI_PAGE;
$response=$browser->get($URL, @ns_headers);
$content=$response->as_string;
$content1=$content;
$filestartstr="<a class=\"CategoryTreeLabel CategoryTreeLabelNs14 CategoryTreeLabelCategory\" href=\"/wiki/Category:";
while(index($content1, $filestartstr)>=0)
{ #while there is more children
print "c ";
{ # extract a Child cat line
$filestartstr="<a class=\"CategoryTreeLabel CategoryTreeLabelNs14 CategoryTreeLabelCategory\" href=\"/wiki/Category:";
$filestart=index($content1, $filestartstr);
$content1=substr($content1, $filestart+99);
$fileendstr="</a></div>";
$fileend=index($content1, $fileendstr);
$content2=substr($content1, 0, $fileend);
$content1=substr($content1, $fileend+10);
} # extract a Child cat line
{ # cat name and Uname
$fileendstr="\">";
$fileend=index($content2, $fileendstr);
$aChildU[$nChild]=substr($content2, 0, $fileend);
$aChild[$nChild]=substr($content2, $fileend+2);
} # cat name and Uname
$nChild+=1;
} #while there is more children
if(0) { ## check if there is "next 200"
$filestartstr="<br style=\"clear:both;\"/>";
$filestart=index($content, $filestartstr);
$content1=substr($content, $filestart+25);
$fileendstr="<h2>";
$fileend=index($content1, $fileendstr);
$content1=substr($content1, 0, $fileend);
$filestartstr="200</a>)<div id";
while(index($content1, $filestartstr)>=0)
{ # while there is "next 200"
print "200 ";
{ ## extract url
$filestartstr="200) (<a href=\"";
$filestart=index($content1, $filestartstr);
$content1=substr($content1, $filestart+15);
$fileendstr="\" title=";
$fileend=index($content1, $fileendstr);
$WIKI_PAGE=substr($content1, 0, $fileend);
} ## extract url
$URL="http://".$WIKI_PATH.$WIKI_PAGE;
if(1)
{ #log
open INPUT, ">>cnlog.txt";
print INPUT $URL;
print INPUT "\n";
close INPUT;
} #log
$response=$browser->get($URL, @ns_headers);
$content=$response->as_string;
$content1=$content;
$filestartstr="<a class=\"CategoryTreeLabel CategoryTreeLabelNs14 CategoryTreeLabelCategory\" href=\"/wiki/Category:";
while(index($content1, $filestartstr)>=0)
{ #while there is more children
print "c ";
{ # extract a Child cat line
$filestartstr="<a class=\"CategoryTreeLabel CategoryTreeLabelNs14 CategoryTreeLabelCategory\" href=\"/wiki/Category:";
$filestart=index($content1, $filestartstr);
$content1=substr($content1, $filestart+99);
$fileendstr="</a></div>";
$fileend=index($content1, $fileendstr);
$content2=substr($content1, 0, $fileend);
$content1=substr($content1, $fileend+10);
} # extract a Child cat line
{ # cat name and Uname
$fileendstr="\">";
$fileend=index($content2, $fileendstr);
$aChildU[$nChild]=substr($content2, 0, $fileend);
$aChild[$nChild]=substr($content2, $fileend+2);
} # cat name and Uname
$nChild+=1;
} #while there is more children
# next 200?
$filestartstr="<br style=\"clear:both;\"/>";
$filestart=index($content, $filestartstr);
$content1=substr($content, $filestart+25);
$fileendstr="<h2>";
$fileend=index($content1, $fileendstr);
$content1=substr($content1, 0, $fileend);
$filestartstr="200</a>)<div id";
} # while there is "next 200"
} ## check if there is "next 200"
## sort
print "sort ";
my $i,$j;
my $swap;
for($i=0;$i<$nChild-1;$i+=1)
{
for($j=$i+1;$j<$nChild;$j+=1)
{
if($aChild[$i] gt $aChild[$j])
{
$swap=$aChild[$i];
$aChild[$i]=$aChild[$j];
$aChild[$j]=$swap;
$swap=$aChildU[$i];
$aChildU[$i]=$aChildU[$j];
$aChildU[$j]=$swap;
}
}
}
print "entered current page. nChild=";
print $nChild;
print "\n";
if(0)
{ #debug
open INPUT, ">>debug1.txt";
my $i;
for($i=0;$i<$nChild;$i+=1)
{
print INPUT $aChild[$i];
print INPUT " ";
print INPUT $aChildU[$i];
print INPUT "\n";
}
print INPUT "\n\n\n";
close INPUT;
} #debug
} # list and sort all children
if(1)
{ #log
open INPUT, ">>cnlog.txt";
print INPUT "Digui: Cat=";
print INPUT $vName;
print INPUT ", Tree=";
for($i=0;$i<$vDepth;$i+=1)
{
print INPUT $aTree[$i];
print INPUT ">";
}
print INPUT "\n";
print INPUT " ";
print INPUT $nChild;
print INPUT " children: ";
for($i=0;$i<$nChild;$i+=1)
{
print INPUT $aChild[$i];
print INPUT " ";
}
print INPUT "\n";
close INPUT;
} #log
my $i;
for($i=0;$i<$nChild;$i+=1)
{ # check and add catnav to all children
my $cnDepth;
my $bFound=0; #found same tree?
my $bSame=1; #cat name is the same?
my $cnFound=0;
# get edit content
$WIKI_PAGE=$aChildU[$i];
#$WIKI_PAGE="%E4%BA%9A%E6%B4%B2%E5%9B%BD%E5%AE%B6";
$URL="http://".$WIKI_PATH."/w/index.php?title=Category:".$WIKI_PAGE."&action=edit";
$response=$browser->get($URL, @ns_headers);
$content=$response->as_string;
# Get EditToken
($editToken) = ( $content =~ m/value\=\"([0-9a-f\\]*)\" name\=\"wpEditToken\"/ );
($editTime) = ( $content =~ m/value\=\"([0-9a-f]*)\" name\=\"wpEdittime\"/ );
$filestartstr="<textarea tabindex='1' accesskey=\",\" name=\"wpTextbox1\" id=\"wpTextbox1\" rows='25'";
$fileendstr="</textarea>";
$filestart= index($content, $filestartstr);
$filestart+=92;
$fileend= index($content, $fileendstr);
$content=substr($content, $filestart, $fileend-$filestart);
# check if there is Catnav
$filestartstr="{{Catnav|";
$content1=$content;
while(index($content1, $filestartstr)>=0)
{ #while there is still Catnav
$cnFound=1;
$cnDepth=0;
$filestart=index($content1, $filestartstr);
$content1=substr($content1, $filestart+9);
$fileendstr="}}";
$fileend=index($content1, $fileendstr);
$content2=substr($content1, 0, $fileend);
$content1=substr($content1, $fileend+1);
$filestartstr="|";
while(index($content2, $filestartstr)>=0)
{ #while there is |
$filestart=index($content2, $filestartstr);
$cnTree[$cnDepth]=substr($content2, 0, $filestart);
$content2=substr($content2, $filestart+1);
$cnDepth+=1;
} #while there is |
#the last one
$cnTree[$cnDepth]=$content2;
$cnDepth+=1;
if(0)
{ #debug
open INPUT, ">>debug1.txt";
my $j;
for($j=0;$j<$cnDepth;$j+=1)
{ # check and add catnav to all children
{
print INPUT $cnTree[$j];
print INPUT " ";
}
print INPUT "\n\n\n";
} # check and add catnav to all children
} #debug
$bSame=1;
if($vDepth == $cnDepth)
{
for($j=0;$j<$cnDepth;$j+=1)
{
if($cnTree[$j] ne $aTree[$j]) {
$bSame=0;}
}
}
else
{
$bSame=0;
}
if($bSame)
{
$bFound=1;
}
$filestartstr="{{Catnav|";
} #while there is still Catnav
print "entered child page. bFound=";
print $bFound;
print "\n";
if($bFound<1)
{ # add new tree
my $treecontent;
$treecontent="{{Catnav";
for($j=0;$j<$vDepth;$j+=1)
{
$treecontent=$treecontent."|".$aTree[$j];
}
if($cnFound) {
$treecontent=$treecontent."}}\n";
}
else {
$treecontent=$treecontent."}}\n\n";
}
$content=$treecontent.$content;
$WIKI_PAGE=$aChildU[$i];
$URL="http://".$WIKI_PATH."/w/index.php?title=Category:".$WIKI_PAGE."&action=edit";
#check for illegal characters
my $special_char;
$special_char="""; #"
while(index($content, $special_char)>=0) {
substr($content, index($content, $special_char), length($special_char) ) ="\"";
}
$special_char="<"; #<
while(index($content, $special_char)>=0) {
substr($content, index($content, $special_char), length($special_char) ) ="<";
}
$special_char=">"; #>
while(index($content, $special_char)>=0) {
substr($content, index($content, $special_char), length($special_char) ) =">";
}
$special_char="&"; #&
while(index($content, $special_char)>=0) {
substr($content, index($content, $special_char), length($special_char) ) ="&";
}
if(1) {#Update
$response=$browser ->
post($URL, @ns_headers, Content_Type=>'form-data',Content=>
[ wpTextbox1 => $content,
wpSummary => "[[User:xcnbot|xcnbot]] testing",
wpSave => "Save page",
wpSection => "",
wpEdittime => $editTime,
wpEditToken => $editToken,
wpMinoredit => "1",
]);
print "Change made\n";
if(1)
{ #log
open INPUT, ">>cnlog.txt";
print INPUT " Child:";
print INPUT $aChild[$i];
print INPUT " change made.\n";
close INPUT;
} #log
} #Update
}
} # check and add catnav to all children
# call sub-gotocat on every child
my $ch;
for($ch=0;$ch<$nChild;$ch+=1)
{
# Change @aTree
if(1)
{ #log
open INPUT, ">>cnlog.txt";
print INPUT "Go into: Child=";
print INPUT $aChild[$ch];
print INPUT "\n";
close INPUT;
} #log
$aTree[$vDepth]=$aChild[$ch];
&gotocat($aChild[$ch], $aChildU[$ch], $vDepth+1, @aTree);
if(1)
{ #log
open INPUT, ">>cnlog.txt";
print INPUT "Jump outto: Parent=";
print INPUT $vName;
print INPUT "\n";
close INPUT;
} #log
}
} #递归程序