Module event_processor.apis.library_events
Expand source code
import time
from scrapy import Item
from scrapy.loader import ItemLoader
from event_processor.util.data_utils import DataUtils
from event_processor.base.custom_spiders import ApiSpider
import scrapy
class LibraryEvents(ApiSpider):
"""Api Spider for events on the chicago public library's website"""
# This is the max amount of rows that the API can return at one time
MAX_ROWS = 50
name = 'library'
def __init__(self, name=None, **kwargs):
super().__init__(self, 'Chicago Public Library', 'https://chipublib.bibliocommons.com/', date_format = '%Y-%m-%d', **kwargs)
def parse(self, response):
return self.get_events()
def get_next_events_json(self, start):
request_params = {
'client_scope': 'events',
'query': f'start={start}&rows={self.MAX_ROWS}',
'facet_fields': 'branch_location_id',
'local_start': f'{self.start_date} TO {self.end_date}',
'include_near_location': 'false'
}
return self.get_response_json(endpoint='events/events/search', request_params=request_params, property_to_return = 'events')
def get_events_json(self):
start = 0
events_json = []
more_data = True
while more_data:
# Sleep to avoid overloading the server
self.wait()
next_events_json = self.get_next_events_json(start)
num_results = len(next_events_json)
events_json.extend(next_events_json)
# Keep querying until no more data is returned
more_data = num_results > 0
start += num_results
return events_json
def get_locations_json(self, location_category):
# location_category = 'locations' for branch locations and 'places' for non-branch locations
request_params = {
'client_scope': 'events',
'limit': '0'
}
return self.get_response_json(endpoint='events/' + location_category, request_params=request_params, property_to_return = location_category)
def get_branch_locations_json(self):
return self.get_locations_json('locations')
def get_nonbranch_locations_json(self):
return self.get_locations_json('places')
def get_locations_list(self, get_locations_func):
return { location['id']: location['address'] for location in get_locations_func() }
def get_branch_locations_list(self):
return self.get_locations_list(self.get_branch_locations_json)
def get_nonbranch_locations_list(self):
return self.get_locations_list(self.get_nonbranch_locations_json)
def get_address_string(self, location):
# Any data that isn't included will be set to 'None', so just replace it with an empty string
return f"{location['number']} {location['street']} {location['city']}, {location['state']} {location['zip']}".replace('None', '')
def get_events(self):
events_json = self.get_events_json()
branch_locations = self.get_branch_locations_list()
nonbranch_locations = self.get_nonbranch_locations_list()
for event in events_json:
details = event['definition']
branch_location_id = details['branch_location_id']
# Determine if branch or non-branch event
if branch_location_id == None:
non_branch_location_id = details['non_branch_location_id']
location = nonbranch_locations[non_branch_location_id]
else:
location = branch_locations[branch_location_id]
try:
date, start_time = details['start'].split('T')
end_time = details['end'].split('T')[1]
except ValueError:
# Assume event is all day if no time is supplied
date = details['start']
start_time = end_time = 'All Day'
# Don't show cancelled or full events
if details['is_cancelled'] == True or event['is_full'] == True:
continue
yield {
'title': details['title'],
'description': details['description'],
'address': self.get_address_string(location),
'event_time': {
'date': date,
'start_time': start_time,
'end_time': end_time
},
'url': f'{self.base_url}events/search/index/event/{event["id"]}',
'price': 0.0,
'category': 'LIBRARY'
}
Classes
class LibraryEvents (name=None, **kwargs)
-
Api Spider for events on the chicago public library's website
Expand source code
class LibraryEvents(ApiSpider): """Api Spider for events on the chicago public library's website""" # This is the max amount of rows that the API can return at one time MAX_ROWS = 50 name = 'library' def __init__(self, name=None, **kwargs): super().__init__(self, 'Chicago Public Library', 'https://chipublib.bibliocommons.com/', date_format = '%Y-%m-%d', **kwargs) def parse(self, response): return self.get_events() def get_next_events_json(self, start): request_params = { 'client_scope': 'events', 'query': f'start={start}&rows={self.MAX_ROWS}', 'facet_fields': 'branch_location_id', 'local_start': f'{self.start_date} TO {self.end_date}', 'include_near_location': 'false' } return self.get_response_json(endpoint='events/events/search', request_params=request_params, property_to_return = 'events') def get_events_json(self): start = 0 events_json = [] more_data = True while more_data: # Sleep to avoid overloading the server self.wait() next_events_json = self.get_next_events_json(start) num_results = len(next_events_json) events_json.extend(next_events_json) # Keep querying until no more data is returned more_data = num_results > 0 start += num_results return events_json def get_locations_json(self, location_category): # location_category = 'locations' for branch locations and 'places' for non-branch locations request_params = { 'client_scope': 'events', 'limit': '0' } return self.get_response_json(endpoint='events/' + location_category, request_params=request_params, property_to_return = location_category) def get_branch_locations_json(self): return self.get_locations_json('locations') def get_nonbranch_locations_json(self): return self.get_locations_json('places') def get_locations_list(self, get_locations_func): return { location['id']: location['address'] for location in get_locations_func() } def get_branch_locations_list(self): return self.get_locations_list(self.get_branch_locations_json) def get_nonbranch_locations_list(self): return self.get_locations_list(self.get_nonbranch_locations_json) def get_address_string(self, location): # Any data that isn't included will be set to 'None', so just replace it with an empty string return f"{location['number']} {location['street']} {location['city']}, {location['state']} {location['zip']}".replace('None', '') def get_events(self): events_json = self.get_events_json() branch_locations = self.get_branch_locations_list() nonbranch_locations = self.get_nonbranch_locations_list() for event in events_json: details = event['definition'] branch_location_id = details['branch_location_id'] # Determine if branch or non-branch event if branch_location_id == None: non_branch_location_id = details['non_branch_location_id'] location = nonbranch_locations[non_branch_location_id] else: location = branch_locations[branch_location_id] try: date, start_time = details['start'].split('T') end_time = details['end'].split('T')[1] except ValueError: # Assume event is all day if no time is supplied date = details['start'] start_time = end_time = 'All Day' # Don't show cancelled or full events if details['is_cancelled'] == True or event['is_full'] == True: continue yield { 'title': details['title'], 'description': details['description'], 'address': self.get_address_string(location), 'event_time': { 'date': date, 'start_time': start_time, 'end_time': end_time }, 'url': f'{self.base_url}events/search/index/event/{event["id"]}', 'price': 0.0, 'category': 'LIBRARY' }
Ancestors
- ApiSpider
- scrapy.spiders.Spider
- scrapy.utils.trackref.object_ref
- ApiBase
- AggregatorBase
Class variables
var MAX_ROWS
-
int([x]) -> integer int(x, base=10) -> integer
Convert a number or string to an integer, or return 0 if no arguments are given. If x is a number, return x.int(). For floating point numbers, this truncates towards zero.
If x is not a number or if base is given, then x must be a string, bytes, or bytearray instance representing an integer literal in the given base. The literal can be preceded by '+' or '-' and be surrounded by whitespace. The base defaults to 10. Valid bases are 0 and 2-36. Base 0 means to interpret the base from the string as an integer literal.
>>> int('0b100', base=0) 4
Methods
def get_address_string(self, location)
-
Expand source code
def get_address_string(self, location): # Any data that isn't included will be set to 'None', so just replace it with an empty string return f"{location['number']} {location['street']} {location['city']}, {location['state']} {location['zip']}".replace('None', '')
def get_branch_locations_json(self)
-
Expand source code
def get_branch_locations_json(self): return self.get_locations_json('locations')
def get_branch_locations_list(self)
-
Expand source code
def get_branch_locations_list(self): return self.get_locations_list(self.get_branch_locations_json)
def get_events(self)
-
Expand source code
def get_events(self): events_json = self.get_events_json() branch_locations = self.get_branch_locations_list() nonbranch_locations = self.get_nonbranch_locations_list() for event in events_json: details = event['definition'] branch_location_id = details['branch_location_id'] # Determine if branch or non-branch event if branch_location_id == None: non_branch_location_id = details['non_branch_location_id'] location = nonbranch_locations[non_branch_location_id] else: location = branch_locations[branch_location_id] try: date, start_time = details['start'].split('T') end_time = details['end'].split('T')[1] except ValueError: # Assume event is all day if no time is supplied date = details['start'] start_time = end_time = 'All Day' # Don't show cancelled or full events if details['is_cancelled'] == True or event['is_full'] == True: continue yield { 'title': details['title'], 'description': details['description'], 'address': self.get_address_string(location), 'event_time': { 'date': date, 'start_time': start_time, 'end_time': end_time }, 'url': f'{self.base_url}events/search/index/event/{event["id"]}', 'price': 0.0, 'category': 'LIBRARY' }
def get_events_json(self)
-
Expand source code
def get_events_json(self): start = 0 events_json = [] more_data = True while more_data: # Sleep to avoid overloading the server self.wait() next_events_json = self.get_next_events_json(start) num_results = len(next_events_json) events_json.extend(next_events_json) # Keep querying until no more data is returned more_data = num_results > 0 start += num_results return events_json
def get_locations_json(self, location_category)
-
Expand source code
def get_locations_json(self, location_category): # location_category = 'locations' for branch locations and 'places' for non-branch locations request_params = { 'client_scope': 'events', 'limit': '0' } return self.get_response_json(endpoint='events/' + location_category, request_params=request_params, property_to_return = location_category)
def get_locations_list(self, get_locations_func)
-
Expand source code
def get_locations_list(self, get_locations_func): return { location['id']: location['address'] for location in get_locations_func() }
def get_next_events_json(self, start)
-
Expand source code
def get_next_events_json(self, start): request_params = { 'client_scope': 'events', 'query': f'start={start}&rows={self.MAX_ROWS}', 'facet_fields': 'branch_location_id', 'local_start': f'{self.start_date} TO {self.end_date}', 'include_near_location': 'false' } return self.get_response_json(endpoint='events/events/search', request_params=request_params, property_to_return = 'events')
def get_nonbranch_locations_json(self)
-
Expand source code
def get_nonbranch_locations_json(self): return self.get_locations_json('places')
def get_nonbranch_locations_list(self)
-
Expand source code
def get_nonbranch_locations_list(self): return self.get_locations_list(self.get_nonbranch_locations_json)
def parse(self, response)
-
Expand source code
def parse(self, response): return self.get_events()
Inherited members