>我有一个芹菜网页抓取任务,我想将所有创建的子对象返回给父对象。如何将这些 ID 取回给父级?
我正在尝试做的粗略示例
@task
def scrape_page(url):
page = Page.objects.create(
name='new_page'
)
response = requests.get(url)
children = parse_children(response)
for child in children:
result = scrape_child.delay(child.url)
page.childen.add(result)
@task
def scrape_child(url):
response = requests.get(url)
parsed = parse_child(response)
child = Child.objects.create(**parsed)
return child
您不应该等待任务中的延迟任务,您可以与.apply()
同步调用该任务,但这不会调用单独的任务。编辑:我找到了文档,请参阅避免启动同步子任务
也许你应该使用组。一种方法是延迟一组scrape_child
任务并发送父任务page_id
from celery import group
@task
def scrape_page(url):
page = Page.objects.create(
name='new_page'
)
response = requests.get(url)
children = parse_children(response)
children_group = group([
scrape_child.s(page.id, child.url) # using the signature
for child in children
])
children_group.delay()
return True
@task
def scrape_child(page_id, url):
response = requests.get(url)
parsed = parse_child(response)
child = Child.objects.create(**parsed)
child.page_id = page_id
return child