[phantomjs] Add function to execute JS without a DOM

Authored by: MinePlayersPE, pukkandan
This commit is contained in:
pukkandan 2022-08-18 21:34:47 +05:30
parent 580ce00782
commit 587021cd9f
No known key found for this signature in database
GPG key ID: 7EEE9E1E817D0A39

View file

@ -1,3 +1,4 @@
import collections
import contextlib
import json
import os
@ -9,8 +10,10 @@
ExtractorError,
Popen,
check_executable,
format_field,
get_exe_version,
is_outdated_version,
shell_quote,
)
@ -49,7 +52,7 @@ class PhantomJSwrapper:
This class is experimental.
"""
_TEMPLATE = r'''
_BASE_JS = R'''
phantom.onError = function(msg, trace) {{
var msgStack = ['PHANTOM ERROR: ' + msg];
if(trace && trace.length) {{
@ -62,6 +65,9 @@ class PhantomJSwrapper:
console.error(msgStack.join('\n'));
phantom.exit(1);
}};
'''
_TEMPLATE = R'''
var page = require('webpage').create();
var fs = require('fs');
var read = {{ mode: 'r', charset: 'utf-8' }};
@ -116,14 +122,18 @@ def __init__(self, extractor, required_version=None, timeout=10000):
'Your copy of PhantomJS is outdated, update it to version '
'%s or newer if you encounter any errors.' % required_version)
self.options = {
'timeout': timeout,
}
for name in self._TMP_FILE_NAMES:
tmp = tempfile.NamedTemporaryFile(delete=False)
tmp.close()
self._TMP_FILES[name] = tmp
self.options = collections.ChainMap({
'timeout': timeout,
}, {
x: self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"')
for x in self._TMP_FILE_NAMES
})
def __del__(self):
for name in self._TMP_FILE_NAMES:
with contextlib.suppress(OSError, KeyError):
@ -194,31 +204,35 @@ def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on w
self._save_cookies(url)
replaces = self.options
replaces['url'] = url
user_agent = headers.get('User-Agent') or self.extractor.get_param('http_headers')['User-Agent']
replaces['ua'] = user_agent.replace('"', '\\"')
replaces['jscode'] = jscode
jscode = self._TEMPLATE.format_map(self.options.new_child({
'url': url,
'ua': user_agent.replace('"', '\\"'),
'jscode': jscode,
}))
for x in self._TMP_FILE_NAMES:
replaces[x] = self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"')
stdout = self.execute(jscode, video_id, note2)
with open(self._TMP_FILES['script'].name, 'wb') as f:
f.write(self._TEMPLATE.format(**replaces).encode('utf-8'))
if video_id is None:
self.extractor.to_screen(f'{note2}')
else:
self.extractor.to_screen(f'{video_id}: {note2}')
stdout, stderr, returncode = Popen.run(
[self.exe, '--ssl-protocol=any', self._TMP_FILES['script'].name],
text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if returncode:
raise ExtractorError(f'Executing JS failed:\n{stderr}')
with open(self._TMP_FILES['html'].name, 'rb') as f:
html = f.read().decode('utf-8')
self._load_cookies()
return html, stdout
def execute(self, jscode, video_id=None, note='Executing JS'):
"""Execute JS and return stdout"""
if 'phantom.exit();' not in jscode:
jscode += ';\nphantom.exit();'
jscode = self._BASE_JS + jscode
with open(self._TMP_FILES['script'].name, 'w', encoding='utf-8') as f:
f.write(jscode)
self.extractor.to_screen(f'{format_field(video_id, None, "%s: ")}{note}')
cmd = [self.exe, '--ssl-protocol=any', self._TMP_FILES['script'].name]
self.extractor.write_debug(f'PhantomJS command line: {shell_quote(cmd)}')
stdout, stderr, returncode = Popen.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if returncode:
raise ExtractorError(f'Executing JS failed:\n{stderr.strip()}')
return stdout