User:Ricordisamoa/ACimport.js

From Wikidata
Jump to navigation Jump to search

Note: After publishing, you may have to bypass your browser's cache to see the changes.

  • Firefox / Safari: Hold Shift while clicking Reload, or press either Ctrl-F5 or Ctrl-R (⌘-R on a Mac)
  • Google Chrome: Press Ctrl-Shift-R (⌘-Shift-R on a Mac)
  • Edge: Hold Ctrl while clicking Refresh, or press Ctrl-F5.
/* <nowiki>
 *
 * ACimport.js by [[User:Ricordisamoa]]
 *
 * automatically imports Authority Control data from de,en,it,ja.wiki to Wikidata
 *
 * ---> optimized to run on a bot <---
 *   has served [[User:SamoaBot]]
 *
*/
(function(){
	var queue=[];
	var properties={
		VIAF:214,
		GND:227,
		LCCN:244,
		BNF:268,
		SUDOC:269,
		NDL:349,
		"imdb_id":345,
		imdb:345,
		"taxon name":225
	};
	var references={//list of Wikipedia item IDs
		de:48183,
		en:328,
		fr:8447,
		it:11920,
		ja:177837,
		nl:10000,
		ru:206855,
		es:8449
	};
	var formatIMDb=function(IMDb){
		return "tt"+Array(8-IMDb.match(/[0-9]+/)[0].length).join("0")+IMDb.replace(/^tt/,"");
	};
	var formatLCCN=function(LCCN){
		LCCN=LCCN.replace(new RegExp("\\/","g"),"");
		var numbers=LCCN.match(/[0-9]+/)[0].length;
		if(numbers<8) LCCN=LCCN.replace(/([0-9]{2})/,"$1"+Array(9-numbers).join("0"));
		return LCCN;
	};
	var setProp=function(itemId,propName,propValue,lang,claimId){
		if(Object.keys(properties).indexOf(propName)===-1) return;
		if(propName==="imdb"&&/^[0-9]+$/.test(propValue)===false) return;
		propValue=(propName==="LCCN"?formatLCCN(propValue):(propName.indexOf("imdb")!=-1?formatIMDb(propValue):propValue));
		$.post(
			mw.util.wikiScript("api"),
			$.extend(
				{
					format:"json",
					entity:itemId,
					snaktype:"value",
					value:"\""+propValue+"\"",
					summary:"Bot: importing "+propName+" from "+lang+".wiki",
					bot:true,
					assert:"bot",
					token:mw.user.tokens.get("editToken")
				},
				lang==="en"&&propName==="imdb"&&claimId!=null?{
					action:"wbsetclaimvalue",
					claim:claimId
				}:{
					action:"wbcreateclaim",
					property:"p"+properties[propName]
				}
			)
		)
		.done(function(data){
			if(data.error&&data.error.info) console.warn("Error: "+data.error.info);
			else{
				var spaces=Array(12-itemId.length).join(" ");
				if(claimId!=null){
					console.log(itemId+spaces+"|  "+propName+"  |  "+propValue);
					return;
				}
				var propId=data.claim.id;
				var snaks={
					"p143":[{
						snaktype:"value",
						property:"p143",
						datavalue:{
							type:"wikibase-entityid",
							value:{
								"entity-type":"item",
								"numeric-id":references[lang]
							}
						}
					}]
				};
				$.post(
					mw.util.wikiScript("api"),
					{
						format:"json",
						action:"wbsetreference",
						entity:itemId,
						statement:propId,
						snaks:JSON.stringify(snaks),
						summary:"importing reference from "+lang+".wiki",
						bot:true,
						assert:"bot",
						token:mw.user.tokens.get("editToken")
					}
				)
				.done(function(data){
					if(data.error&&data.error.info) console.warn("Error: "+data.error.info);
					else console.log(itemId+spaces+"|  "+propName+"  |  "+propValue);
				})
				.fail(function(){
					console.warn("Error");
				});
			}
		})
		.fail(function(){
			console.warn("Error");
		});
	};
	var startTask=function(lang,templateNum){
		console.log(queue.length+" item"+(queue.length!=1?"s":"")+" to be processed.");
		if(queue.length===0){
			ACimport.startTask(lang,500,undefined,templateNum);
			return;
		}
		var ii=0;
		var interval=setInterval(function(){
			if(ii<queue.length){
				setProp(queue[ii].itemId,queue[ii].propName,queue[ii].propValue,queue[ii].lang,typeof queue[ii].claimId!="undefined"?queue[ii].claimId:null);
				ii+=1;
			}
			else{
				console.log("  --->  Task completed; starting a new one.");
				clearInterval(interval);
				ACimport.startTask(lang,500);
			}
		},10000);
	};
	var doPage=function(lang,pageTitle,content,start,templateNum){
		$.getJSON(
			mw.util.wikiScript("api"),
			{
				action:"wbgetentities",
				sites:lang+"wiki",
				titles:pageTitle,
				format:"json",
				props:"claims"
			},
			function(data){
				console.groupCollapsed(pageTitle);
				console.log(content);
				console.groupEnd();
				var wdProperties={};
				if(typeof Object.keys(data.entities)=="undefined"||Object.keys(data.entities).length!=1) return;
				if(Object.keys(data.entities)[0]==-1) console.warn("Entity not found: "+pageTitle+"@"+lang+".wiki");
				else{
					var entity=data.entities[Object.keys(data.entities)[0]];
					if(
						lang==="en"&&(
							!entity.claims||
							!entity.claims["p345"]||
							formatIMDb(entity.claims["p345"][0].mainsnak.datavalue.value)!=entity.claims["p345"][0].mainsnak.datavalue.value
						)
					){
						var occurrences=content.match(/\{\{[Ii]MDb title(\}\}|\|)/g);
						if(occurrences!=null&&occurrences.length>1){
							console.warn("More than 1 occurrence of {{IMDb title}} found on "+pageTitle+"@"+lang+".wiki");
							return;
						}
						var wpVal=content.match(/\{\{[Ii](MD|md)b title\|([Ii]d=)?([0-9]+)(\}\}|\|)/);
						if(wpVal!=null) queue.push({itemId:Object.keys(data.entities)[0],propName:"imdb",propValue:wpVal[3],lang:lang,claimId:entity.claims&&typeof entity.claims["p345"]!="undefined"?entity.claims["p345"][0].id:undefined});
					}
					if(
						lang==="en"&&(
							!entity.claims||
							!entity.claims["p225"]
						)
					){
						var occurrences=content.match(/\{\{Taxobox[\s\|\n\r]/g);
						if(occurrences!=null&&occurrences.length>1){
							console.warn("More than 1 occurrence of {{Taxobox}} found on "+pageTitle+"@"+lang+".wiki");
							return;
						}
						var wpVal=content.match(/\| *binomial *= *''([a-z ]+)''/i);
						if(wpVal!=null&&wpVal.length===2) queue.push({itemId:Object.keys(data.entities)[0],propName:"taxon name",propValue:wpVal[1],lang:lang});
					}
					else if(entity.claims){
						$.each(properties,function(key,property){
							if(entity.claims["p"+property]){
								var wdVal=entity.claims["p"+property][0].mainsnak.datavalue.value;
								console.log("wd   "+key+": "+wdVal);
								wdProperties[key]=wdVal;
							}
						});
					}
					if(lang!="en") $.each(properties,function(key,property){
						var wpVal=content.match(new RegExp("\\| *"+key+" *= *([0-9a-zA-Z\\/\\-]+)[^0-9]"));
						if(wpVal!=null){
							wpVal=wpVal[1];
							console.log("wp   "+key+": "+wpVal);
							if(Object.keys(wdProperties).indexOf(key)!=-1){
								console.log(" -->   "+key+" already present on Wikidata");
								if(wdProperties[key]!=wpVal){
									if(key==="LCCN"&&formatLCCN(wdProperties[key])===formatLCCN(wpVal)) return;
									if(key==="imdb"&&formatIMDb(wdProperties[key])===formatIMDb(wpVal)) return;
									else console.warn("Conflict for "+key+"!");
								}
							}
							else{
								queue.push({itemId:Object.keys(data.entities)[0],propName:key,propValue:wpVal,lang:lang});
								console.log(" -->   "+key+" not present: added to queue");
							}
						}
					});
				}
				if(start){
					console.log(queue);
					startTask(lang,templateNum);
				}
			}
		);
	};
	window.ACimport={};
	window.ACimport.fromPage=function(lang,pageTitle){
		$.get(
			"//"+lang+".wikipedia.org/w/api.php",
			{
				action:"query",
				format:"json",
				titles:pageTitle,
				prop:"revisions",
				rvprop:"content"
			},
			function(data){
				doPage(lang,pageTitle,data.query.pages[Object.keys(data.query.pages)[0]].revisions[0]["*"],true);
			},
			"jsonp"
		);
	};
	window.ACimport.startTask=function(lang,taskLength,eicontinue,templateNum){
		queue=[];
		var storageName="ACimport-eicontinue-"+lang+"-"+template;
		var temp=$.jStorage.get(storageName);
		if(typeof eicontinue=="undefined"&&temp!=null) eicontinue=temp;
		if(typeof templateNum=="undefined") templateNum=0;
		var templates={
			de:"Normdaten",
			en:["Infobox film","Taxobox","Authority control"],
			fr:"Autorité",
			it:"Controllo di autorità",
			ja:"Normdaten",
			nl:"Infobox film",
			ru:"Фильм",
			es:"Ficha de película"
		};
		var template=templates[lang];
		if(typeof template!="string"&&typeof template!="undefined") template=template[templateNum];
		$.get(
			"//"+lang+".wikipedia.org/w/api.php",
			$.extend(
				{
					action:"query",
					format:"json",
					rawcontinue:"",
					generator:"embeddedin",
					geinamespace:0,
					geilimit:taskLength,
					geititle:"Template:"+template,
					geifilterredir:"nonredirects",
					prop:"revisions",
					rvprop:"content"
				},
				typeof eicontinue!="undefined"?{geicontinue:"10|"+template+"|"+eicontinue}:{}
			),
			function(data){
				$.jStorage.set(storageName,data["query-continue"].embeddedin.geicontinue.split("|")[2]);
				console.log("Continue-key: "+data["query-continue"].embeddedin.geicontinue);
				var pages=$.grep(
					$.map(data.query.pages,function(e){
						return e;
					}),
					function(e){
						return typeof e.revisions!="undefined";
					}
				);
				console.log(pages.length);
				$.each(pages,function(index,page){
					doPage(lang,page.title,page.revisions[0]["*"],index===pages.length-1,templateNum);
				});
			},
			"jsonp"
		);
	};
})();