Gebruiker:Edoderoobot/find red links.py

import pywikibot
import sys
from urllib.parse import urlparse

srcSite='en'
dstSite='nl'
pagename=''
type2find=[{'P31':'Q5'}]
skipredlinks=True
namespace=[0]

def process_one_link(site,link):
  global repo
  skip=False
  if (link.exists()):
    linkedpage=pywikibot.Page(site,link.title())
    while linkedpage.isRedirectPage():
      linkedpage=linkedpage.getRedirectTarget()
    
    skip=not (linkedpage.namespace().id in namespace)
    if (linkedpage.exists()):
      if ('wikibase_item' in linkedpage.properties()):
        wd=linkedpage.data_item()
        wd.get(get_redirect=True)
        for findtype in type2find:
          for onetype in findtype:
            if onetype in wd.claims:
              claim=wd.claims.get(onetype)[0].getTarget().title()
              if claim!=findtype[onetype]:
                skip=True
              else:
                skip= dstSite+'wiki' in wd.sitelinks
      else:
        if (not skip): 
          print('wikidata-missing: %s' %(link.title()))      
  else:   #red links skip all
    skip=skipredlinks
    
  if not (skip):
    print('Missing: %s' % link)
  return 1  

def process_one_page(srcSite, pagename):
  total_links =0
  ensite=pywikibot.Site(srcSite)
  page=pywikibot.Page(ensite, pagename)
  for onelink in page.linkedPages():
    total_links += process_one_link(ensite,onelink)
  return total_links  

def process_parameters(arg):
  global pagename
  global srcSite
  global dstSite
  global repo
  
  for x in arg:
    if (x[0:5]=='page='):
      pagename=x[5:]
      if pagename[0:2].lower()=='d:':
        wd=pywikibot.ItemPage(repo,pagename[2:])
        wd.get(get_redirect=True)
        pagename=wd.sitelinks[srcSite+'wiki']
    elif (x[0:5]=='wiki='):
      srcSite = x[5:]
    elif (x[0:6]=='check='):
      dstSite=x[6:]
    elif (x[0:10]=='type2find'):
      param=x[10:]
      if param=='human':
        type2find=[{'P31':'Q5'}]
      else:
        type2find=param


repo=pywikibot.Site('wikidata','wikidata').data_repository()
process_parameters(sys.argv)
if (pagename==''):
  print('python3 getredlinks.py page="US Open 2018" wiki=en check=nl type2find=human\npython3 getredlinks.py page=d:Q46977971')
else:
  print('Page.............: %s\nwiki.............: %s\nFind red links on: %s' % (pagename,srcSite,dstSite))
linksdone=process_one_page(srcSite,pagename)
print('Klaar: %d links bekeken' % linksdone)