User:Ricordisamoa/ACimport.js
Jump to navigation
Jump to search
Note: After publishing, you may have to bypass your browser's cache to see the changes.
- Firefox / Safari: Hold Shift while clicking Reload, or press either Ctrl-F5 or Ctrl-R (⌘-R on a Mac)
- Google Chrome: Press Ctrl-Shift-R (⌘-Shift-R on a Mac)
- Edge: Hold Ctrl while clicking Refresh, or press Ctrl-F5.
/* <nowiki>
*
* ACimport.js by [[User:Ricordisamoa]]
*
* automatically imports Authority Control data from de,en,it,ja.wiki to Wikidata
*
* ---> optimized to run on a bot <---
* has served [[User:SamoaBot]]
*
*/
(function(){
var queue=[];
var properties={
VIAF:214,
GND:227,
LCCN:244,
BNF:268,
SUDOC:269,
NDL:349,
"imdb_id":345,
imdb:345,
"taxon name":225
};
var references={//list of Wikipedia item IDs
de:48183,
en:328,
fr:8447,
it:11920,
ja:177837,
nl:10000,
ru:206855,
es:8449
};
var formatIMDb=function(IMDb){
return "tt"+Array(8-IMDb.match(/[0-9]+/)[0].length).join("0")+IMDb.replace(/^tt/,"");
};
var formatLCCN=function(LCCN){
LCCN=LCCN.replace(new RegExp("\\/","g"),"");
var numbers=LCCN.match(/[0-9]+/)[0].length;
if(numbers<8) LCCN=LCCN.replace(/([0-9]{2})/,"$1"+Array(9-numbers).join("0"));
return LCCN;
};
var setProp=function(itemId,propName,propValue,lang,claimId){
if(Object.keys(properties).indexOf(propName)===-1) return;
if(propName==="imdb"&&/^[0-9]+$/.test(propValue)===false) return;
propValue=(propName==="LCCN"?formatLCCN(propValue):(propName.indexOf("imdb")!=-1?formatIMDb(propValue):propValue));
$.post(
mw.util.wikiScript("api"),
$.extend(
{
format:"json",
entity:itemId,
snaktype:"value",
value:"\""+propValue+"\"",
summary:"Bot: importing "+propName+" from "+lang+".wiki",
bot:true,
assert:"bot",
token:mw.user.tokens.get("editToken")
},
lang==="en"&&propName==="imdb"&&claimId!=null?{
action:"wbsetclaimvalue",
claim:claimId
}:{
action:"wbcreateclaim",
property:"p"+properties[propName]
}
)
)
.done(function(data){
if(data.error&&data.error.info) console.warn("Error: "+data.error.info);
else{
var spaces=Array(12-itemId.length).join(" ");
if(claimId!=null){
console.log(itemId+spaces+"| "+propName+" | "+propValue);
return;
}
var propId=data.claim.id;
var snaks={
"p143":[{
snaktype:"value",
property:"p143",
datavalue:{
type:"wikibase-entityid",
value:{
"entity-type":"item",
"numeric-id":references[lang]
}
}
}]
};
$.post(
mw.util.wikiScript("api"),
{
format:"json",
action:"wbsetreference",
entity:itemId,
statement:propId,
snaks:JSON.stringify(snaks),
summary:"importing reference from "+lang+".wiki",
bot:true,
assert:"bot",
token:mw.user.tokens.get("editToken")
}
)
.done(function(data){
if(data.error&&data.error.info) console.warn("Error: "+data.error.info);
else console.log(itemId+spaces+"| "+propName+" | "+propValue);
})
.fail(function(){
console.warn("Error");
});
}
})
.fail(function(){
console.warn("Error");
});
};
var startTask=function(lang,templateNum){
console.log(queue.length+" item"+(queue.length!=1?"s":"")+" to be processed.");
if(queue.length===0){
ACimport.startTask(lang,500,undefined,templateNum);
return;
}
var ii=0;
var interval=setInterval(function(){
if(ii<queue.length){
setProp(queue[ii].itemId,queue[ii].propName,queue[ii].propValue,queue[ii].lang,typeof queue[ii].claimId!="undefined"?queue[ii].claimId:null);
ii+=1;
}
else{
console.log(" ---> Task completed; starting a new one.");
clearInterval(interval);
ACimport.startTask(lang,500);
}
},10000);
};
var doPage=function(lang,pageTitle,content,start,templateNum){
$.getJSON(
mw.util.wikiScript("api"),
{
action:"wbgetentities",
sites:lang+"wiki",
titles:pageTitle,
format:"json",
props:"claims"
},
function(data){
console.groupCollapsed(pageTitle);
console.log(content);
console.groupEnd();
var wdProperties={};
if(typeof Object.keys(data.entities)=="undefined"||Object.keys(data.entities).length!=1) return;
if(Object.keys(data.entities)[0]==-1) console.warn("Entity not found: "+pageTitle+"@"+lang+".wiki");
else{
var entity=data.entities[Object.keys(data.entities)[0]];
if(
lang==="en"&&(
!entity.claims||
!entity.claims["p345"]||
formatIMDb(entity.claims["p345"][0].mainsnak.datavalue.value)!=entity.claims["p345"][0].mainsnak.datavalue.value
)
){
var occurrences=content.match(/\{\{[Ii]MDb title(\}\}|\|)/g);
if(occurrences!=null&&occurrences.length>1){
console.warn("More than 1 occurrence of {{IMDb title}} found on "+pageTitle+"@"+lang+".wiki");
return;
}
var wpVal=content.match(/\{\{[Ii](MD|md)b title\|([Ii]d=)?([0-9]+)(\}\}|\|)/);
if(wpVal!=null) queue.push({itemId:Object.keys(data.entities)[0],propName:"imdb",propValue:wpVal[3],lang:lang,claimId:entity.claims&&typeof entity.claims["p345"]!="undefined"?entity.claims["p345"][0].id:undefined});
}
if(
lang==="en"&&(
!entity.claims||
!entity.claims["p225"]
)
){
var occurrences=content.match(/\{\{Taxobox[\s\|\n\r]/g);
if(occurrences!=null&&occurrences.length>1){
console.warn("More than 1 occurrence of {{Taxobox}} found on "+pageTitle+"@"+lang+".wiki");
return;
}
var wpVal=content.match(/\| *binomial *= *''([a-z ]+)''/i);
if(wpVal!=null&&wpVal.length===2) queue.push({itemId:Object.keys(data.entities)[0],propName:"taxon name",propValue:wpVal[1],lang:lang});
}
else if(entity.claims){
$.each(properties,function(key,property){
if(entity.claims["p"+property]){
var wdVal=entity.claims["p"+property][0].mainsnak.datavalue.value;
console.log("wd "+key+": "+wdVal);
wdProperties[key]=wdVal;
}
});
}
if(lang!="en") $.each(properties,function(key,property){
var wpVal=content.match(new RegExp("\\| *"+key+" *= *([0-9a-zA-Z\\/\\-]+)[^0-9]"));
if(wpVal!=null){
wpVal=wpVal[1];
console.log("wp "+key+": "+wpVal);
if(Object.keys(wdProperties).indexOf(key)!=-1){
console.log(" --> "+key+" already present on Wikidata");
if(wdProperties[key]!=wpVal){
if(key==="LCCN"&&formatLCCN(wdProperties[key])===formatLCCN(wpVal)) return;
if(key==="imdb"&&formatIMDb(wdProperties[key])===formatIMDb(wpVal)) return;
else console.warn("Conflict for "+key+"!");
}
}
else{
queue.push({itemId:Object.keys(data.entities)[0],propName:key,propValue:wpVal,lang:lang});
console.log(" --> "+key+" not present: added to queue");
}
}
});
}
if(start){
console.log(queue);
startTask(lang,templateNum);
}
}
);
};
window.ACimport={};
window.ACimport.fromPage=function(lang,pageTitle){
$.get(
"//"+lang+".wikipedia.org/w/api.php",
{
action:"query",
format:"json",
titles:pageTitle,
prop:"revisions",
rvprop:"content"
},
function(data){
doPage(lang,pageTitle,data.query.pages[Object.keys(data.query.pages)[0]].revisions[0]["*"],true);
},
"jsonp"
);
};
window.ACimport.startTask=function(lang,taskLength,eicontinue,templateNum){
queue=[];
var storageName="ACimport-eicontinue-"+lang+"-"+template;
var temp=$.jStorage.get(storageName);
if(typeof eicontinue=="undefined"&&temp!=null) eicontinue=temp;
if(typeof templateNum=="undefined") templateNum=0;
var templates={
de:"Normdaten",
en:["Infobox film","Taxobox","Authority control"],
fr:"Autorité",
it:"Controllo di autorità",
ja:"Normdaten",
nl:"Infobox film",
ru:"Фильм",
es:"Ficha de película"
};
var template=templates[lang];
if(typeof template!="string"&&typeof template!="undefined") template=template[templateNum];
$.get(
"//"+lang+".wikipedia.org/w/api.php",
$.extend(
{
action:"query",
format:"json",
rawcontinue:"",
generator:"embeddedin",
geinamespace:0,
geilimit:taskLength,
geititle:"Template:"+template,
geifilterredir:"nonredirects",
prop:"revisions",
rvprop:"content"
},
typeof eicontinue!="undefined"?{geicontinue:"10|"+template+"|"+eicontinue}:{}
),
function(data){
$.jStorage.set(storageName,data["query-continue"].embeddedin.geicontinue.split("|")[2]);
console.log("Continue-key: "+data["query-continue"].embeddedin.geicontinue);
var pages=$.grep(
$.map(data.query.pages,function(e){
return e;
}),
function(e){
return typeof e.revisions!="undefined";
}
);
console.log(pages.length);
$.each(pages,function(index,page){
doPage(lang,page.title,page.revisions[0]["*"],index===pages.length-1,templateNum);
});
},
"jsonp"
);
};
})();