

import scrapy
class FcsItem(scrapy.Item): #items.py file
    title = scrapy.Field()
    link = scrapy.Field()
#test.py file below, different file from above
import scrapy
from fcs.items import FcsItem
class FCScrape(scrapy.Spider): 
    name = "FC"
    allowed_domains = ["finalcall.com"]
    start_urls = ["http://www.finalcall.com/artman/publish/Columns_4/index.shtml"]
    def parse(self, response):
        item = FcsItem()
        divs_title = response.selector.xpath('//div[@class="category-story"]')
        for title, link in zip(divs_title.xpath('.//a/text( )'), divs_title.xpath('.//a/@href')):
            item['title'] = title.extract()
            item['link'] = link.extract()
            #I'm actually trying to attach the title as a string as the key and the link as a string as the value in one dictionary. 


这个页面是如何设置的,以及你是如何选择的,所有你抓取的是一对,以元组的形式。当您执行zip(divs_title.xpath('.//a/text( )'), divs_title.xpath('.//a/@href')时,您返回标记文本的一项列表和href内容的一项列表。你拉上拉链,得到一件东西。


def parse(self, response):
    the_dict = {}
    for article in response.selector.xpath('//div[@class="category-story"]'):
        title = article.xpath('.//a/text( )').extract()
        link = article.xpath('.//a/@href').extract()
        the_dict[title] = link


def parse(self, response):
    for article in response.selector.xpath('//div[@class="category-story"]'):
        item = FcsItem()
        item['title'] = article.xpath('.//a/text( )').extract()
        item['link'] = article.xpath('.//a/@href').extract()
        yield item
