From 5e1978ea088862e7b852516eb07cd159aecb7b69 Mon Sep 17 00:00:00 2001
From: GERZAC1002 <gernot.zacharias@gmx.de>
Date: Mon, 11 Apr 2022 08:55:32 +0200
Subject: [PATCH 1/3] Merged Add support to backup pages using API:Query
 instead of Special:Export https://github.com/WikiTeam/wikiteam/pull/280

into recent version of dumpgenerator.py

Adds additional parameter: --apiexport
which uses a query request instead of submit on api.php which works
without Special:Export which is disabled on some sites.
---
 dumpgenerator.py | 252 +++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 243 insertions(+), 9 deletions(-)
diff --git a/dumpgenerator.py b/dumpgenerator.py
index bd27ff17..a2d5fabc 100755
--- a/dumpgenerator.py
+++ b/dumpgenerator.py
@@ -23,10 +23,21 @@
     from kitchen.text.converters import getwriter, to_unicode
 except ImportError:
     print "Please install the kitchen module."
+
+try:
+    import xml.etree.cElementTree as ET
+except ImportError:
+    import xml.etree.ElementTree as ET
+
+import xml.dom.minidom as MD
+
 import cookielib
 import cPickle
 import datetime
 import sys
+import io
+import traceback
+
 try:
     import argparse
 except ImportError:
@@ -63,7 +74,7 @@
 UTF8Writer = getwriter('utf8')
 sys.stdout = UTF8Writer(sys.stdout)
 
-__VERSION__ = '0.4.0-alpha'  # major, minor, micro: semver.org
+__VERSION__ = '0.5.0-alpha'  # major, minor, micro: semver.org
 
 class PageMissingError(Exception):
     def __init__(self, title, xml):
@@ -164,7 +175,7 @@ def getNamespacesScraper(config={}, session=None):
     namespacenames = {0: ''}  # main is 0, no prefix
     if namespaces:
         r = session.post(
-            url=config['index'], params={'title': 'Special:Allpages'}, timeout=30)
+            url=config['index'], params={'title': 'Special:Allpages'}, timeout=120)
         raw = r.text
         delay(config=config, session=session)
 
@@ -206,7 +217,7 @@ def getNamespacesAPI(config={}, session=None):
                 'meta': 'siteinfo',
                 'siprop': 'namespaces',
                 'format': 'json'},
-            timeout=30
+            timeout=120
         )
         result = getJSON(r)
         delay(config=config, session=session)
@@ -281,7 +292,7 @@ def getPageTitlesScraper(config={}, session=None):
         print '    Retrieving titles in the namespace', namespace
         url = '%s?title=Special:Allpages&namespace=%s' % (
             config['index'], namespace)
-        r = session.get(url=url, timeout=30)
+        r = session.get(url=url, timeout=120)
         raw = r.text
         raw = cleanHTML(raw)
 
@@ -455,7 +466,7 @@ def getXMLHeader(config={}, session=None):
 
     else:
         try:
-            xml = "".join([x for x in getXMLPage(config=config, title=randomtitle, verbose=False, session=session)])
+            xml = "".join([x for x in getXMLPage_(config=config, title=randomtitle, verbose=False, session=session)])
         except PageMissingError as pme:
             # The <page> does not exist. Not a problem, if we get the <siteinfo>.
             xml = pme.xml
@@ -477,7 +488,7 @@ def getXMLHeader(config={}, session=None):
                     )
                     config['export'] = json.loads(r.text)['query']['namespaces']['-1']['*'] \
                         + ':Export'
-                    xml = "".join([x for x in getXMLPage(config=config, title=randomtitle, verbose=False, session=session)])
+                    xml = "".join([x for x in getXMLPage_(config=config, title=randomtitle, verbose=False, session=session)])
             except PageMissingError as pme:
                 xml = pme.xml
             except ExportAbortedError:
@@ -500,7 +511,7 @@ def getXMLHeader(config={}, session=None):
 def getXMLFileDesc(config={}, title='', session=None):
     """ Get XML for image description page """
     config['curonly'] = 1  # tricky to get only the most recent desc
-    return("".join([x for x in getXMLPage( config=config, title=title, verbose=False, session=session)]))
+    return("".join([x for x in getXMLPage_( config=config, title=title, verbose=False, session=session)]))
 
 
 def getUserAgent():
@@ -521,7 +532,216 @@ def logerror(config={}, text=''):
             output = u'%s: %s\n' % (
                 datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), text)
             outfile.write(output.encode('utf-8'))
+def reconstructRevisions(root=None):
+    #print ET.tostring(rev)
+    page = ET.Element('stub')
+    edits = 0
+    for rev in root.find('query').find('pages').find('page').find('revisions').findall('rev'):
+        try:
+            rev_ = ET.SubElement(page,'revision')
+            ET.SubElement(rev_,'id').text = rev.attrib['revid']
+            ET.SubElement(rev_,'timestamp').text = rev.attrib['timestamp']
+            contributor = ET.SubElement(rev_,'contributor')
+            if not rev.attrib.has_key('userhidden'):
+                ET.SubElement(contributor,'username').text = rev.attrib['user']
+                ET.SubElement(contributor,'id').text = rev.attrib['userid']
+            else:
+                contributor.set('deleted','deleted')
+            comment = ET.SubElement(rev_,'comment')
+            if not rev.attrib.has_key('commenthidden'):
+                comment.text = rev.attrib['comment']
+            else:
+                comment.set('deleted','deleted')
+
+            # some revision does not return model and format, so just use hard-code
+            ET.SubElement(rev_,'model').text = 'wikitext'
+            ET.SubElement(rev_,'format').text = 'text/x-wiki'
+            text = ET.SubElement(rev_,'text')
+            if not rev.attrib.has_key('texthidden'):
+                text.attrib['xml:space'] = "preserve"
+                text.attrib['bytes'] = rev.attrib['size']
+                text.text = rev.text
+            else:
+                text.set('deleted','deleted')
+            # delete sha1 here :)
+            #sha1 = ET.SubElement(rev_,'sha1')
+            #if not rev.attrib.has_key('sha1missing'):
+                #sha1.text = rev.attrib['sha1']
+            if rev.attrib.has_key('minor'):
+                ET.SubElement(rev_,'minor')
+            edits += 1
+        except Exception as e:
+            #logerror(config=config, text='Error reconstructing revision, xml:%s' % (ET.tostring(rev)))
+            print ET.tostring(rev)
+            traceback.print_exc()
+            page = None
+            edits = 0
+            raise e
+    return page,edits
+
+def getXMLPageCoreWithApi(headers={}, params={}, config={}, session=None):
+    """  """
+    # just send the API request
+    # if it fails, it will reduce params['rvlimit']
+    xml = ''
+    c = 0
+    maxseconds = 100  # max seconds to wait in a single sleeping
+    maxretries = config['retries']  # x retries and skip
+    increment = 20  # increment every retry
 
+    while not re.search(r'</api>' if not config['curonly'] else r'</mediawiki>', xml) or re.search(r'</error>', xml):
+        if c > 0 and c < maxretries:
+            wait = increment * c < maxseconds and increment * \
+                c or maxseconds  # incremental until maxseconds
+            print '    In attempt %d, XML for "%s" is wrong. Waiting %d seconds and reloading...'%(c, params['titles' if config['apiexport'] else 'pages'], wait)
+            time.sleep(wait)
+            # reducing server load requesting smallest chunks (if curonly then
+            # rvlimit = 1 from mother function)
+            if params['rvlimit'] > 1:
+                params['rvlimit'] = params['rvlimit'] / 2  # half
+        if c >= maxretries:
+            print '    We have retried %d times' % (c)
+            print '    MediaWiki error for "%s", network error or whatever...' % (params['titles' if config['apiexport'] else 'pages'])
+            # If it's not already what we tried: our last chance, preserve only the last revision...
+            # config['curonly'] means that the whole dump is configured to save only the last,
+            # params['curonly'] should mean that we've already tried this
+            # fallback, because it's set by the following if and passed to
+            # getXMLPageCore
+            # TODO: save only the last version when failed
+            print '    Saving in the errors log, and skipping...'
+            logerror(
+                config=config,
+                text=u'Error while retrieving the last revision of "%s". Skipping.' %
+                (params['titles' if config['apiexport'] else 'pages']).decode('utf-8'))
+            #raise ExportAbortedError(config['index'])
+            return ''  # empty xml
+
+        # FIXME HANDLE HTTP Errors HERE
+        try:
+            r = session.get(url=config['api'], params=params, headers=headers)
+            handleStatusCode(r)
+            xml = fixBOM(r)
+            #print xml
+        except requests.exceptions.ConnectionError as e:
+            print '    Connection error: %s'%(str(e[0]))
+            xml = ''
+        c += 1
+    return xml
+def getXMLPageWithApi(config={}, title='', verbose=True, session=None):
+    """ Get the full history (or current only) of a page using API:Query
+        if params['curonly'] is set, then using export&exportwrap to export
+    """
+
+    title_ = title
+    title_ = re.sub(' ', '_', title_)
+    # do not convert & into %26, title_ = re.sub('&', '%26', title_)
+    # action=query&rvlimit=50&format=xml&prop=revisions&titles=TITLE_HERE
+    # &rvprop=timestamp%7Cuser%7Ccomment%7Ccontent%7Cids%7Cuserid%7Csha1%7Csize
+    #print 'current:%s' % (title_)
+    if not config['curonly']:
+        params = {'titles': title_, 'action': 'query','format':'xml',
+            'prop':'revisions',
+            'rvprop' : 'timestamp|user|comment|content|ids|userid|sha1|size|flags',
+            'rvcontinue' : None,
+            'rvlimit' : 10 # TODO: set this by commandline
+        }
+    else:
+        params = {'titles': title_, 'action': 'query','format':'xml','export':1,'exportnowrap':1}
+    #print 'params:%s' % (params)
+    if not config['curonly']:
+        firstpartok = False
+        lastcontinue = None
+        numberofedits = 0
+        ret = ''
+        while True:
+            # in case the last request is not right, saving last time's progress
+            if not firstpartok:
+                try:
+                    lastcontinue = params['rvcontinue']
+                except:
+                    lastcontinue = None
+            
+            xml = getXMLPageCoreWithApi(params=params, config=config, session=session)
+            if xml == "":
+                #just return so that we can continue, and getXMLPageCoreWithApi will log the error
+                return
+            try:
+                root = ET.fromstring(xml.encode('utf-8'))
+            except:
+                continue
+            try:
+                retpage = root.find('query').find('pages').find('page')
+            except:
+                continue
+            if retpage.attrib.has_key('missing') or retpage.attrib.has_key('invalid'):
+                print 'Page not found'
+                raise PageMissingError(params['titles'], xml)
+            if not firstpartok:
+                try:
+                    # build the firstpart by ourselves to improve the memory usage
+                    ret  = '  <page>\n'
+                    ret += '    <title>%s</title>\n' %(retpage.attrib['title'])
+                    ret += '    <ns>%s</ns>\n' % (retpage.attrib['ns'])
+                    ret += '    <id>%s</id>\n' % (retpage.attrib['pageid'])
+                except:
+                    firstpartok = False
+                    continue
+                else:
+                    firstpartok = True
+                    yield ret
+            try:
+                ret = ''
+                edits = 0
+                if config['curonly'] or root.find('continue') == None:
+                    # transform the revision
+                    rev_,edits = reconstructRevisions(root=root)
+                    xmldom = MD.parseString('<stub1>'+ET.tostring(rev_)+'</stub1>')
+                    # convert it into text in case it throws MemoryError
+                    # delete the first three line and last two line,which is for setting the indent
+                    ret += ''.join(xmldom.toprettyxml(indent='  ').splitlines(True)[3:-2])
+                    yield ret
+                    numberofedits += edits
+                    break
+                else:
+                    rev_,edits = reconstructRevisions(root=root)
+                    xmldom = MD.parseString('<stub1>' + ET.tostring(rev_) + '</stub1>')
+                    ret += ''.join(xmldom.toprettyxml(indent='  ').splitlines(True)[3:-2])
+                    params['rvcontinue'] = root.find('continue').attrib['rvcontinue']
+                    numberofedits += edits
+                    yield ret
+            except:
+                traceback.print_exc()
+                params['rvcontinue'] = lastcontinue
+                ret = ''
+        yield '  </page>\n'
+    else:
+        xml = getXMLPageCoreWithApi(params=params, config=config, session=session)
+        if xml == "":
+            raise ExportAbortedError(config['index'])
+        if not "</page>" in xml:
+            raise PageMissingError(params['titles'], xml)
+        else:
+            # strip these sha1s sums which keep showing up in the export and
+            # which are invalid for the XML schema (they only apply to
+            # revisions)
+            xml = re.sub(r'\n\s*<sha1>\w+</sha1>\s*\n', r'\n', xml)
+            xml = re.sub(r'\n\s*<sha1/>\s*\n', r'\n', xml)
+
+        yield xml.split("</page>")[0]
+
+        # just for looking good :)
+        r_timestamp = r'<timestamp>([^<]+)</timestamp>'
+
+        numberofedits = 0
+        numberofedits += len(re.findall(r_timestamp, xml))
+
+        yield "</page>\n"
+
+    if verbose:
+        if (numberofedits == 1):
+           print '    %s, 1 edit' % (title.strip())
+        else:
+           print '    %s, %d edits' % (title.strip(), numberofedits)
 
 def getXMLPageCore(headers={}, params={}, config={}, session=None):
     """  """
@@ -694,7 +914,13 @@ def getXMLPage(config={}, title='', verbose=True, session=None):
            print '    %s, 1 edit' % (title.strip())
         else:
            print '    %s, %d edits' % (title.strip(), numberofedits)
-
+def getXMLPage_(config={}, title='', verbose=True, session=None):
+    #print config
+    if config['apiexport']:
+        return getXMLPageWithApi(config=config, title=title, verbose=verbose, session=session)
+    else:
+        return getXMLPage(config=config, title=title, verbose=verbose, session=session)
+    return ''
 
 def makeXmlPageFromRaw(xml):
     """ Discard the metadata around a <page> element in <mediawiki> string"""
@@ -775,7 +1001,7 @@ def generateXMLDump(config={}, titles=[], start=None, session=None):
             if c % 10 == 0:
                 print 'Downloaded %d pages' % (c)
             try:
-                for xml in getXMLPage(config=config, title=title, session=session):
+                for xml in getXMLPage_(config=config, title=title, session=session):
                     xml = cleanXML(xml=xml)
                     xmlfile.write(xml.encode('utf-8'))
             except PageMissingError:
@@ -1680,6 +1906,7 @@ def getParameters(params=[]):
         action='store_true',
         help='resumes previous incomplete dump (requires --path)')
     parser.add_argument('--force', action='store_true', help='')
+    parser.add_argument('--ignore-api-check', action='store_true', help='')
     parser.add_argument(
         '--user', help='Username if authentication is required.')
     parser.add_argument(
@@ -1723,6 +1950,10 @@ def getParameters(params=[]):
         '--exnamespaces',
         metavar="1,2,3",
         help='comma-separated value of namespaces to exclude')
+    groupDownload.add_argument(
+        '--apiexport',
+        action='store_true',
+        help="Using API instead of Special:Export to export pages")
 
     # Meta info params
     groupMeta = parser.add_argument_group(
@@ -1824,6 +2055,8 @@ def getParameters(params=[]):
         index2 = check[1]
         api = checkedapi
         print 'API is OK: ' + checkedapi
+    elif args.ignore_api_check:
+        print 'Error in API. Ignoring.'
     else:
         if index and not args.wiki:
             print 'API not available. Trying with index.php only.'
@@ -1921,6 +2154,7 @@ def getParameters(params=[]):
         'cookies': args.cookies or '',
         'delay': args.delay,
         'retries': int(args.retries),
+        'apiexport': args.apiexport
     }
 
     other = {

From 120f54d1132589b9b65c0f3a44631b19a1960433 Mon Sep 17 00:00:00 2001
From: GERZAC1002 <gernot.zacharias@gmx.de>
Date: Mon, 11 Apr 2022 23:22:44 +0200
Subject: [PATCH 2/3] Improvements over previous commit:  -renamed --apiexport
 to '--apiquery' and gave it a more useful help text  -renamed function
 'getXMLPage_' to 'selectXMLQuerMode' to make it distinct enough from
 'getXMLPage'  -added in checks so that '--apiquery' can only be used together
 with '--curonly'  -dialed back some timeouts from 120 to 60 which is still
 more than the previous 30

Improvements that could still be implemented:
 -Checking the availability of Special:Export before trying to download the list of titles and images wasting resources
---
 dumpgenerator.py | 63 +++++++++++++++++++++++++++---------------------
 1 file changed, 36 insertions(+), 27 deletions(-)

diff --git a/dumpgenerator.py b/dumpgenerator.py
index a2d5fabc..c22a30e9 100755
--- a/dumpgenerator.py
+++ b/dumpgenerator.py
@@ -74,7 +74,7 @@
 UTF8Writer = getwriter('utf8')
 sys.stdout = UTF8Writer(sys.stdout)
 
-__VERSION__ = '0.5.0-alpha'  # major, minor, micro: semver.org
+__VERSION__ = '0.4.1-alpha'  # major, minor, micro: semver.org
 
 class PageMissingError(Exception):
     def __init__(self, title, xml):
@@ -175,7 +175,7 @@ def getNamespacesScraper(config={}, session=None):
     namespacenames = {0: ''}  # main is 0, no prefix
     if namespaces:
         r = session.post(
-            url=config['index'], params={'title': 'Special:Allpages'}, timeout=120)
+            url=config['index'], params={'title': 'Special:Allpages'}, timeout=60)
         raw = r.text
         delay(config=config, session=session)
 
@@ -217,7 +217,7 @@ def getNamespacesAPI(config={}, session=None):
                 'meta': 'siteinfo',
                 'siprop': 'namespaces',
                 'format': 'json'},
-            timeout=120
+            timeout=60
         )
         result = getJSON(r)
         delay(config=config, session=session)
@@ -292,7 +292,7 @@ def getPageTitlesScraper(config={}, session=None):
         print '    Retrieving titles in the namespace', namespace
         url = '%s?title=Special:Allpages&namespace=%s' % (
             config['index'], namespace)
-        r = session.get(url=url, timeout=120)
+        r = session.get(url=url, timeout=60)
         raw = r.text
         raw = cleanHTML(raw)
 
@@ -466,7 +466,7 @@ def getXMLHeader(config={}, session=None):
 
     else:
         try:
-            xml = "".join([x for x in getXMLPage_(config=config, title=randomtitle, verbose=False, session=session)])
+            xml = "".join([x for x in selectXMLQueryMode(config=config, title=randomtitle, verbose=False, session=session)])
         except PageMissingError as pme:
             # The <page> does not exist. Not a problem, if we get the <siteinfo>.
             xml = pme.xml
@@ -484,11 +484,11 @@ def getXMLHeader(config={}, session=None):
                         'meta': 'siteinfo',
                         'siprop': 'namespaces',
                         'format': 'json'},
-                    timeout=120
+                    timeout=60
                     )
                     config['export'] = json.loads(r.text)['query']['namespaces']['-1']['*'] \
                         + ':Export'
-                    xml = "".join([x for x in getXMLPage_(config=config, title=randomtitle, verbose=False, session=session)])
+                    xml = "".join([x for x in selectXMLQueryMode(config=config, title=randomtitle, verbose=False, session=session)])
             except PageMissingError as pme:
                 xml = pme.xml
             except ExportAbortedError:
@@ -511,7 +511,7 @@ def getXMLHeader(config={}, session=None):
 def getXMLFileDesc(config={}, title='', session=None):
     """ Get XML for image description page """
     config['curonly'] = 1  # tricky to get only the most recent desc
-    return("".join([x for x in getXMLPage_( config=config, title=title, verbose=False, session=session)]))
+    return("".join([x for x in selectXMLQueryMode( config=config, title=title, verbose=False, session=session)]))
 
 
 def getUserAgent():
@@ -593,7 +593,7 @@ def getXMLPageCoreWithApi(headers={}, params={}, config={}, session=None):
         if c > 0 and c < maxretries:
             wait = increment * c < maxseconds and increment * \
                 c or maxseconds  # incremental until maxseconds
-            print '    In attempt %d, XML for "%s" is wrong. Waiting %d seconds and reloading...'%(c, params['titles' if config['apiexport'] else 'pages'], wait)
+            print '    In attempt %d, XML for "%s" is wrong. Waiting %d seconds and reloading...'%(c, params['titles' if config['apiquery'] else 'pages'], wait)
             time.sleep(wait)
             # reducing server load requesting smallest chunks (if curonly then
             # rvlimit = 1 from mother function)
@@ -601,7 +601,7 @@ def getXMLPageCoreWithApi(headers={}, params={}, config={}, session=None):
                 params['rvlimit'] = params['rvlimit'] / 2  # half
         if c >= maxretries:
             print '    We have retried %d times' % (c)
-            print '    MediaWiki error for "%s", network error or whatever...' % (params['titles' if config['apiexport'] else 'pages'])
+            print '    MediaWiki error for "%s", network error or whatever...' % (params['titles' if config['apiquery'] else 'pages'])
             # If it's not already what we tried: our last chance, preserve only the last revision...
             # config['curonly'] means that the whole dump is configured to save only the last,
             # params['curonly'] should mean that we've already tried this
@@ -612,7 +612,7 @@ def getXMLPageCoreWithApi(headers={}, params={}, config={}, session=None):
             logerror(
                 config=config,
                 text=u'Error while retrieving the last revision of "%s". Skipping.' %
-                (params['titles' if config['apiexport'] else 'pages']).decode('utf-8'))
+                (params['titles' if config['apiquery'] else 'pages']).decode('utf-8'))
             #raise ExportAbortedError(config['index'])
             return ''  # empty xml
 
@@ -660,7 +660,7 @@ def getXMLPageWithApi(config={}, title='', verbose=True, session=None):
                     lastcontinue = params['rvcontinue']
                 except:
                     lastcontinue = None
-            
+
             xml = getXMLPageCoreWithApi(params=params, config=config, session=session)
             if xml == "":
                 #just return so that we can continue, and getXMLPageCoreWithApi will log the error
@@ -807,7 +807,7 @@ def getXMLPageCore(headers={}, params={}, config={}, session=None):
             xml = ''
         except requests.exceptions.ReadTimeout as e:
             print '    Read timeout: %s'%(str(e[0]))
-            xml = ''       
+            xml = ''
         c += 1
 
     return xml
@@ -914,11 +914,12 @@ def getXMLPage(config={}, title='', verbose=True, session=None):
            print '    %s, 1 edit' % (title.strip())
         else:
            print '    %s, %d edits' % (title.strip(), numberofedits)
-def getXMLPage_(config={}, title='', verbose=True, session=None):
-    #print config
-    if config['apiexport']:
+def selectXMLQueryMode(config={}, title='', verbose=True, session=None):
+    if config['apiquery']:
+        #Using api.php?Query instead of relying on Special:Export
         return getXMLPageWithApi(config=config, title=title, verbose=verbose, session=session)
     else:
+        #Using the traditional method(default)
         return getXMLPage(config=config, title=title, verbose=verbose, session=session)
     return ''
 
@@ -1001,7 +1002,7 @@ def generateXMLDump(config={}, titles=[], start=None, session=None):
             if c % 10 == 0:
                 print 'Downloaded %d pages' % (c)
             try:
-                for xml in getXMLPage_(config=config, title=title, session=session):
+                for xml in selectXMLQueryMode(config=config, title=title, session=session):
                     xml = cleanXML(xml=xml)
                     xmlfile.write(xml.encode('utf-8'))
             except PageMissingError:
@@ -1122,7 +1123,7 @@ def getXMLRevisions(config={}, session=None, allpages=False, start=None):
                         # repeated header is confusing and would not even be valid
                         xml = exportrequest['query']['export']['*']
                         yield makeXmlPageFromRaw(xml)
-                        
+
                     if 'continue' in arvrequest:
                         # Get the new ones
                         arvparams['arvcontinue'] = arvrequest['continue']['arvcontinue']
@@ -1144,7 +1145,7 @@ def getXMLRevisions(config={}, session=None, allpages=False, start=None):
                     else:
                         # End of continuation. We are done with this namespace.
                         break
-                    
+
     except (KeyError, mwclient.errors.InvalidResponse) as e:
         print(e)
         # TODO: check whether the KeyError was really for a missing arv API
@@ -1376,7 +1377,7 @@ def reverse_readline(filename, buf_size=8192, truncate=False):
             if segment is not None:
                 # if the previous chunk starts right from the beginning of line
                 # do not concat the segment to the last line of new chunk
-                # instead, yield the segment first 
+                # instead, yield the segment first
                 if buffer[-1] is not '\n':
                     lines[-1] += segment
                 else:
@@ -1938,6 +1939,10 @@ def getParameters(params=[]):
         help="generates a full history XML dump (--xml --curonly for current revisions only)")
     groupDownload.add_argument('--curonly', action='store_true',
         help='store only the current version of pages')
+    groupDownload.add_argument(
+        '--apiquery',
+        action='store_true',
+        help="EXPERIMENTAL: Using api.php?query instead of Special:Export to export pages, only use with --curonly")
     groupDownload.add_argument('--xmlrevisions', action='store_true',
                                help='download all revisions from an API generator. MediaWiki 1.27+ only.')
     groupDownload.add_argument(
@@ -1950,10 +1955,6 @@ def getParameters(params=[]):
         '--exnamespaces',
         metavar="1,2,3",
         help='comma-separated value of namespaces to exclude')
-    groupDownload.add_argument(
-        '--apiexport',
-        action='store_true',
-        help="Using API instead of Special:Export to export pages")
 
     # Meta info params
     groupMeta = parser.add_argument_group(
@@ -1991,6 +1992,14 @@ def getParameters(params=[]):
             print getWikiEngine(url=args.wiki)
             sys.exit()
 
+    if (args.apiquery and not args.curonly) or (args.xmlrevisions and args.apiquery):
+        if (args.xmlrevisions):
+            print('ERROR: --apiquery conflicts with --xmlrevisions and requires --curonly')
+            sys.exit()
+        elif (args.xml):
+            print('ERROR: --apiquery conflicts requires --curonly')
+            sys.exit()
+
     # Create session
     cj = cookielib.MozillaCookieJar()
     if args.cookies:
@@ -2154,7 +2163,7 @@ def getParameters(params=[]):
         'cookies': args.cookies or '',
         'delay': args.delay,
         'retries': int(args.retries),
-        'apiexport': args.apiexport
+        'apiquery': args.apiquery,
     }
 
     other = {
@@ -2615,7 +2624,7 @@ def getWikiEngine(url=''):
     session.headers.update({'User-Agent': getUserAgent()})
     r = session.post(url=url, timeout=30)
     if r.status_code == 405 or r.text == '':
-        r = session.get(url=url, timeout=120)
+        r = session.get(url=url, timeout=60)
     result = r.text
 
     wikiengine = 'Unknown'
@@ -2698,7 +2707,7 @@ def mwGetAPIAndIndex(url=''):
     index = ''
     session = requests.Session()
     session.headers.update({'User-Agent': getUserAgent()})
-    r = session.post(url=url, timeout=120)
+    r = session.post(url=url, timeout=60)
     result = r.text
 
     # API

From 3b022354c77d1424d4258efab11c0827ee09e8f4 Mon Sep 17 00:00:00 2001
From: GERZAC1002 <gernot.zacharias@gmx.de>
Date: Tue, 12 Apr 2022 00:37:39 +0200
Subject: [PATCH 3/3] Adjusted dumpgenerator.py to allow '--apiquery' to go
 along with '--images' and '--xmlrevisions' too, accidentally disabled that in
 previous commit

---
 dumpgenerator.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/dumpgenerator.py b/dumpgenerator.py
index c22a30e9..1ba83a52 100755
--- a/dumpgenerator.py
+++ b/dumpgenerator.py
@@ -1942,7 +1942,7 @@ def getParameters(params=[]):
     groupDownload.add_argument(
         '--apiquery',
         action='store_true',
-        help="EXPERIMENTAL: Using api.php?query instead of Special:Export to export pages, only use with --curonly")
+        help="EXPERIMENTAL: Using api.php?query instead of Special:Export to export pages, works with: --curonly,--xmlrevisions,--images")
     groupDownload.add_argument('--xmlrevisions', action='store_true',
                                help='download all revisions from an API generator. MediaWiki 1.27+ only.')
     groupDownload.add_argument(
@@ -1992,13 +1992,9 @@ def getParameters(params=[]):
             print getWikiEngine(url=args.wiki)
             sys.exit()
 
-    if (args.apiquery and not args.curonly) or (args.xmlrevisions and args.apiquery):
-        if (args.xmlrevisions):
-            print('ERROR: --apiquery conflicts with --xmlrevisions and requires --curonly')
-            sys.exit()
-        elif (args.xml):
-            print('ERROR: --apiquery conflicts requires --curonly')
-            sys.exit()
+    if (args.apiquery and not args.curonly) and (args.apiquery and not args.xmlrevisions) and (args.apiquery and not args.images):
+        print('ERROR: --apiquery requires either --curonly or --images or --xmlrevisions')
+        sys.exit()
 
     # Create session
     cj = cookielib.MozillaCookieJar()