I am trying to download a movie titled "Shimen," which was showcased at the 60th Golden Horse Awards. I discovered a streaming link:
https://www.fofoyy.com/dianying/96937
I was unable to locate the video source within the page's source code, but I located two M3U8 files in the network requests when I examined the page using F12. By merging these, I obtained the ultimate URL I need to access:
https://v8.longshengtea.com/yyv8/202310/06/2yJDc3LMsW1/video/2000k_0X1080_64k_25/hls/index.m3u8
Utilizing the requests library to initiate a GET request, I received files terminated with .jpeg. I attempted to interpret these and write them into files terminated with .ts, and some videos are playable.
I have two strategies:
- Employ a for loop to request each URL within the M3U8 file.
Firstly, it's excessively slow. Secondly, some are successful while others are not.
- Implement
aiohttp
to request asynchronously with coroutines.
Firstly, it's more rapid. Secondly, all requests are unsuccessful. Would any experienced individual be able to assist me with this? I am extremely grateful!
mycode:
import asyncio
import aiohttp
import aiofiles
import os
import re
async def get_urls_from_m3u8(m3u8_url):
async with aiohttp.ClientSession() as session:
async with session.get(m3u8_url) as response:
if response.status == 200:
content = await response.text()
urls = re.findall(r'https?://[^\s]+\.jpeg', content)
return urls
else:
print(f"Failed to fetch M3U8 file, status code: {response.status}")
return []
async def download_image(url, directory, max_retries=3):
for attempt in range(max_retries + 1):
try:
async with aiohttp.ClientSession() as session:
async with session.get(url, timeout=30) as response:
if response.status == 200:
ts_filename = re.search(r'\d+', url.split("/")[-1]).group()
ts_filepath = os.path.join(directory, ts_filename)
async with aiofiles.open(ts_filepath, 'wb') as ts_file:
# 使用块读取的方法
while True:
chunk = await response.content.read(1024) # 每次读取1KB
if not chunk:
break
await ts_file.write(chunk)
print(f"Successfully saved {ts_filename}")
return True
else:
print(f"Failed to download {url}, status code: {response.status}")
return False
except Exception as e:
if attempt < max_retries:
print(f"Attempt {attempt + 1} failed. Retrying...")
else:
print(f"Error downloading {url}: {e}")
return False
async def main():
directory = "downloaded_files_ts"
if not os.path.exists(directory):
os.makedirs(directory)
m3u8_url = 'https://v8.longshengtea.com/yyv8/202310/06/2yJDc3LMsW1/video/2000k_0X1080_64k_25/hls/index.m3u8'
urls = await get_urls_from_m3u8(m3u8_url)
tasks = [download_image(url, directory) for url in urls]
await asyncio.gather(*tasks)
if __name__ == '__main__':
asyncio.run(main())