@
kekeones 既然都 async 了,推荐直接 aiomysql + aiohttp ,给出一个 aiohttp 的 pipeline 示例:
import asyncio
import aiohttp
from scrapy.utils.defer import deferred_from_coro
class DemoPipeline:
def __init__(self) -> None:
# 一些参数初始化
pass
def open_spider(self, spider):
# 这里可以写入一些非 async 的预备操作,把比如默认参数设置和日志配置等
return deferred_from_coro(self._open_spider(spider))
async def _open_spider(self, spider):
# 这里一般是连接池,async 连接等预备操作
await asyncio.sleep(0.1)
async def process_item(self, item, spider):
# 这里可以使用一些 async 存储库来实现存储逻辑
...
# 看你想 post 到 data 还是 form
# post_data = json.dumps('{"content": "test"}')
post_data = {"content": "test"}
async with aiohttp.ClientSession() as session:
async with
session.post(
"
http://httpbin.org/post", data=post_data
) as additional_response:
# 获取响应内容
additional_data = await additional_response.text()
print("additional_data:", additional_data)
return item
async def _close_spider(self):
# 这里一般是 async 连接或连接池关闭逻辑
await asyncio.sleep(0.1)
def close_spider(self, spider):
return deferred_from_coro(self._close_spider())
注意:
使用以上代码时,需要在
settings.py 中或者 custom_settings 中配置 "TWISTED_REACTOR": "twisted.internet.asyncioreactor.AsyncioSelectorReactor"