Module event_processor.scrapers.chihacknight_simple_spider
Expand source code
# -*- coding: utf-8 -*-
from event_processor.base.custom_spiders import ScraperSpider
class ChiHackNightSpider(ScraperSpider):
name = 'chihacknight'
allowed_domains = ['chihacknight.org']
enabled = False
def __init__(self, name=None, **kwargs):
super().__init__(self, 'Chi Hack Night', 'https://chihacknight.org/', date_format='%b %d, %Y', **kwargs)
def start_requests(self):
yield self.get_request('events/', {})
def parse(self, response):
return {
'title': self.empty_check_extract(response.css('table tr'), self.css_func, 'td:nth-child(3) span::text'),
'url': self.empty_check_extract(response.css('table tr'), self.css_func, 'td:nth-child(3) a::attr(href)'),
#'event_time': 'January 1, 2022',
'event_time': self.create_time_data(
date=self.empty_check_extract(response.css('table tr'), self.css_func, 'td:nth-child(1) p::text', 'Jan 01, 2012')
),
'address': list(map(lambda x: '222 Merchandise Mart Plaza, Chicago, IL 60654', self.empty_check_extract(response.css('table tr'), self.css_func, 'td::text'))),
'description': self.empty_check_extract(response.css('table tr'), self.css_func, 'td:nth-child(4)::text')
}
Classes
class ChiHackNightSpider (name=None, **kwargs)
-
??? Base spider for reading websites that only need a single page load
Expand source code
class ChiHackNightSpider(ScraperSpider): name = 'chihacknight' allowed_domains = ['chihacknight.org'] enabled = False def __init__(self, name=None, **kwargs): super().__init__(self, 'Chi Hack Night', 'https://chihacknight.org/', date_format='%b %d, %Y', **kwargs) def start_requests(self): yield self.get_request('events/', {}) def parse(self, response): return { 'title': self.empty_check_extract(response.css('table tr'), self.css_func, 'td:nth-child(3) span::text'), 'url': self.empty_check_extract(response.css('table tr'), self.css_func, 'td:nth-child(3) a::attr(href)'), #'event_time': 'January 1, 2022', 'event_time': self.create_time_data( date=self.empty_check_extract(response.css('table tr'), self.css_func, 'td:nth-child(1) p::text', 'Jan 01, 2012') ), 'address': list(map(lambda x: '222 Merchandise Mart Plaza, Chicago, IL 60654', self.empty_check_extract(response.css('table tr'), self.css_func, 'td::text'))), 'description': self.empty_check_extract(response.css('table tr'), self.css_func, 'td:nth-child(4)::text') }
Ancestors
- ScraperSpider
- scrapy.spiders.Spider
- scrapy.utils.trackref.object_ref
- SpiderBase
- AggregatorBase
Class variables
var allowed_domains
-
Built-in mutable sequence.
If no argument is given, the constructor creates a new empty list. The argument must be an iterable if specified.
Methods
def parse(self, response)
-
Expand source code
def parse(self, response): return { 'title': self.empty_check_extract(response.css('table tr'), self.css_func, 'td:nth-child(3) span::text'), 'url': self.empty_check_extract(response.css('table tr'), self.css_func, 'td:nth-child(3) a::attr(href)'), #'event_time': 'January 1, 2022', 'event_time': self.create_time_data( date=self.empty_check_extract(response.css('table tr'), self.css_func, 'td:nth-child(1) p::text', 'Jan 01, 2012') ), 'address': list(map(lambda x: '222 Merchandise Mart Plaza, Chicago, IL 60654', self.empty_check_extract(response.css('table tr'), self.css_func, 'td::text'))), 'description': self.empty_check_extract(response.css('table tr'), self.css_func, 'td:nth-child(4)::text') }
def start_requests(self)
-
Expand source code
def start_requests(self): yield self.get_request('events/', {})
Inherited members