Module event_processor.scrapers.formyblock
Expand source code
from event_processor.base.custom_spiders import ScraperNoTransposeSpider
class ForMyBlockSpider(ScraperNoTransposeSpider):
name = 'formyblock'
allowed_domains = ['www.formyblock.org']
start_urls = [
"https://www.formyblock.org/events/",
]
def __init__(self, name=None, **kwargs):
super().__init__(self, 'My Block, My Hood, My City', base_url='https://www.formyblock.org/', date_format = '%Y-%m-%d', **kwargs)
def parse(self, response):
all_future_events=response.css(".eventlist--upcoming")
for event in all_future_events.css("article"):
yield {
"url": response.urljoin(event.css(".eventlist-title-link::attr(href)").get()),
"title": event.css(".eventlist-title-link::text").get(),
"event_time":{
"date": event.css(".event-date::attr(datetime)").get(),
"start_time": event.css(".event-time-12hr-start::text").get(),
"end_time": event.css(".event-time-12hr-end::text").get()
},
"description":event.css(".eventlist-description").get(),
"address":event.css(".eventlist-meta-address-line::text").get(
default=event.css(".eventlist-meta-address::text").get(default="").strip()),
"price":0.0,
"organization":"My Block, My Hood, My City" #they seem to only be publishing their own events
}
Classes
class ForMyBlockSpider (name=None, **kwargs)
-
Base class for scrapy spiders. All spiders must inherit from this class.
Expand source code
class ForMyBlockSpider(ScraperNoTransposeSpider): name = 'formyblock' allowed_domains = ['www.formyblock.org'] start_urls = [ "https://www.formyblock.org/events/", ] def __init__(self, name=None, **kwargs): super().__init__(self, 'My Block, My Hood, My City', base_url='https://www.formyblock.org/', date_format = '%Y-%m-%d', **kwargs) def parse(self, response): all_future_events=response.css(".eventlist--upcoming") for event in all_future_events.css("article"): yield { "url": response.urljoin(event.css(".eventlist-title-link::attr(href)").get()), "title": event.css(".eventlist-title-link::text").get(), "event_time":{ "date": event.css(".event-date::attr(datetime)").get(), "start_time": event.css(".event-time-12hr-start::text").get(), "end_time": event.css(".event-time-12hr-end::text").get() }, "description":event.css(".eventlist-description").get(), "address":event.css(".eventlist-meta-address-line::text").get( default=event.css(".eventlist-meta-address::text").get(default="").strip()), "price":0.0, "organization":"My Block, My Hood, My City" #they seem to only be publishing their own events }
Ancestors
- ScraperNoTransposeSpider
- scrapy.spiders.Spider
- scrapy.utils.trackref.object_ref
- SpiderBase
- AggregatorBase
Class variables
var allowed_domains
-
Built-in mutable sequence.
If no argument is given, the constructor creates a new empty list. The argument must be an iterable if specified.
var start_urls
-
Built-in mutable sequence.
If no argument is given, the constructor creates a new empty list. The argument must be an iterable if specified.
Methods
def parse(self, response)
-
Expand source code
def parse(self, response): all_future_events=response.css(".eventlist--upcoming") for event in all_future_events.css("article"): yield { "url": response.urljoin(event.css(".eventlist-title-link::attr(href)").get()), "title": event.css(".eventlist-title-link::text").get(), "event_time":{ "date": event.css(".event-date::attr(datetime)").get(), "start_time": event.css(".event-time-12hr-start::text").get(), "end_time": event.css(".event-time-12hr-end::text").get() }, "description":event.css(".eventlist-description").get(), "address":event.css(".eventlist-meta-address-line::text").get( default=event.css(".eventlist-meta-address::text").get(default="").strip()), "price":0.0, "organization":"My Block, My Hood, My City" #they seem to only be publishing their own events }
Inherited members