python - scrapy spider doesn't scrape information -
i learning how use scrapy, , have come problem, spider doesn't scrape information website choose. here spider's code:
from scrapy.spider import spider scrapy.selector import selector reddit.items import reddititem scrapy.contrib.spiders import crawlspider, rule scrapy.contrib.linkextractors.sgml import sgmllinkextractor class redditspider(spider): name = "reddit" allowed_domains =['http://www.reddit.com'] start_urls = ["http://www.reddit.com/r/funny/comments/3arta6/awkward_moment_seal/"] rules = ( rule(sgmllinkextractor(allow=r'items'), callback='parse_item', follow=true), ) def parse(self, response): sel = selector(response) sites = sel.xpath('//*[@id="sitetable_t3_3arta6"]') items = [] site in sites: item = reddititem() item['author'] = site.xpath('a/text()').extract() item['score_unvoted'] = site.xpath('//span[contains(@class, "score_unvoted")]/text()').extract() item['usertext'] = site.xpath('//*[@id="form-t1_csfkjb86q9"]/text()').extract() yield item
here items.py:
from scrapy.item import item, field import sys if "c:\\python27" not in sys.path: sys.path.append("c:\\python27") class reddititem(item): # define fields item here like: # name = scrapy.field() author = field() score_unvoted = field() usertext = field()
this happens when use scrapy crawl reddit
in terminal.
any great.
Comments
Post a Comment