Module event_processor.scrapers.greatlakes_spider
Expand source code
# -*- coding: utf-8 -*-
from event_processor.base.custom_spiders import ScraperSpider
class GreatLakesSpider(ScraperSpider):
"""
This spider is unused but is being kept for now for test purposes
greatlakes.org has an iCal feed: https://greatlakes.org/events/?ical=1&tribe_display=list
Parsing that instead of the website would fix many of the imperfections of this scraper.
"""
name = 'greatlakesObsolete'
allowed_domains = ['greatlakes.org']
enabled = False
def __init__(self, name=None, **kwargs):
ScraperSpider.__init__(self, 'Alliance for the Great Lakes' 'https://greatlakes.org/', date_format='%B %d',
request_date_format='%Y-%m-%d', **kwargs)
def start_requests(self):
yield self.get_request('events/', {})
def parse(self, response):
return {
'title': response.css('.tribe-event-url::text').extract(),
'url': response.css('.tribe-event-url::text').extract(),
'event_time': self.extract_multiple(
{'date': lambda s: s.split('@')[0],
'time_range': lambda s: s.split('@')[1]},
response.css('.tribe-event-schedule-details').extract()),
'address': response.css('.tribe-address').extract(),
'description': response.css('.tribe-events-list-event-description').extract()
}
Classes
class GreatLakesSpider (name=None, **kwargs)
-
This spider is unused but is being kept for now for test purposes
greatlakes.org has an iCal feed: https://greatlakes.org/events/?ical=1&tribe_display=list Parsing that instead of the website would fix many of the imperfections of this scraper.
Expand source code
class GreatLakesSpider(ScraperSpider): """ This spider is unused but is being kept for now for test purposes greatlakes.org has an iCal feed: https://greatlakes.org/events/?ical=1&tribe_display=list Parsing that instead of the website would fix many of the imperfections of this scraper. """ name = 'greatlakesObsolete' allowed_domains = ['greatlakes.org'] enabled = False def __init__(self, name=None, **kwargs): ScraperSpider.__init__(self, 'Alliance for the Great Lakes' 'https://greatlakes.org/', date_format='%B %d', request_date_format='%Y-%m-%d', **kwargs) def start_requests(self): yield self.get_request('events/', {}) def parse(self, response): return { 'title': response.css('.tribe-event-url::text').extract(), 'url': response.css('.tribe-event-url::text').extract(), 'event_time': self.extract_multiple( {'date': lambda s: s.split('@')[0], 'time_range': lambda s: s.split('@')[1]}, response.css('.tribe-event-schedule-details').extract()), 'address': response.css('.tribe-address').extract(), 'description': response.css('.tribe-events-list-event-description').extract() }
Ancestors
- ScraperSpider
- scrapy.spiders.Spider
- scrapy.utils.trackref.object_ref
- SpiderBase
- AggregatorBase
Class variables
var allowed_domains
-
Built-in mutable sequence.
If no argument is given, the constructor creates a new empty list. The argument must be an iterable if specified.
Methods
def parse(self, response)
-
Expand source code
def parse(self, response): return { 'title': response.css('.tribe-event-url::text').extract(), 'url': response.css('.tribe-event-url::text').extract(), 'event_time': self.extract_multiple( {'date': lambda s: s.split('@')[0], 'time_range': lambda s: s.split('@')[1]}, response.css('.tribe-event-schedule-details').extract()), 'address': response.css('.tribe-address').extract(), 'description': response.css('.tribe-events-list-event-description').extract() }
def start_requests(self)
-
Expand source code
def start_requests(self): yield self.get_request('events/', {})
Inherited members