diff --git a/.github/workflows/core_tests.yml b/.github/workflows/core_tests.yml index 0c11dd5df..1afe9270a 100644 --- a/.github/workflows/core_tests.yml +++ b/.github/workflows/core_tests.yml @@ -206,7 +206,6 @@ jobs: matrix: region: - prototype_mtc - - prototype_arc - placeholder_psrc - prototype_marin - prototype_mtc_extended diff --git a/activitysim/abm/models/atwork_subtour_frequency.py b/activitysim/abm/models/atwork_subtour_frequency.py index f9cae3821..348355443 100644 --- a/activitysim/abm/models/atwork_subtour_frequency.py +++ b/activitysim/abm/models/atwork_subtour_frequency.py @@ -117,6 +117,7 @@ def atwork_subtour_frequency( trace_label=trace_label, trace_choice_name="atwork_subtour_frequency", estimator=estimator, + compute_settings=model_settings.compute_settings, ) # convert indexes to alternative names diff --git a/activitysim/abm/models/atwork_subtour_scheduling.py b/activitysim/abm/models/atwork_subtour_scheduling.py index 1eec282f2..fff94ef30 100644 --- a/activitysim/abm/models/atwork_subtour_scheduling.py +++ b/activitysim/abm/models/atwork_subtour_scheduling.py @@ -56,7 +56,6 @@ def atwork_subtour_scheduling( estimator = estimation.manager.begin_estimation(state, "atwork_subtour_scheduling") model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) - sharrow_skip = model_settings.sharrow_skip coefficients_df = state.filesystem.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients( state, model_spec, coefficients_df, estimator @@ -96,7 +95,7 @@ def atwork_subtour_scheduling( estimator=estimator, chunk_size=state.settings.chunk_size, trace_label=trace_label, - sharrow_skip=sharrow_skip, + compute_settings=model_settings.compute_settings, ) if estimator: diff --git a/activitysim/abm/models/auto_ownership.py b/activitysim/abm/models/auto_ownership.py index c99344566..1f6f84648 100644 --- a/activitysim/abm/models/auto_ownership.py +++ b/activitysim/abm/models/auto_ownership.py @@ -9,20 +9,21 @@ from activitysim.core import ( config, - expressions, estimation, + expressions, simulate, tracing, workflow, ) from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable from activitysim.core.configuration.logit import LogitComponentSettings + from .util import annotate logger = logging.getLogger(__name__) -class AutoOwnershipSettings(LogitComponentSettings): +class AutoOwnershipSettings(LogitComponentSettings, extra="forbid"): """ Settings for the `auto_ownership` component. """ @@ -36,6 +37,8 @@ def auto_ownership_simulate( state: workflow.State, households: pd.DataFrame, households_merged: pd.DataFrame, + # FIXME: persons_merged not used but included, see #853 + persons_merged: pd.DataFrame, model_settings: AutoOwnershipSettings | None = None, model_settings_file_name: str = "auto_ownership.yaml", trace_label: str = "auto_ownership_simulate", @@ -75,6 +78,7 @@ def auto_ownership_simulate( locals_d.update(constants) expressions.assign_columns( + state, df=choosers, model_settings=preprocessor_settings, locals_dict=locals_d, @@ -99,6 +103,7 @@ def auto_ownership_simulate( trace_choice_name="auto_ownership", log_alt_losers=log_alt_losers, estimator=estimator, + compute_settings=model_settings.compute_settings, ) if estimator: @@ -117,7 +122,7 @@ def auto_ownership_simulate( ) if model_settings.annotate_households: - annotate.annotate_households(model_settings, trace_label) + annotate.annotate_households(state, model_settings, trace_label) if trace_hh_id: state.tracing.trace_df(households, label="auto_ownership", warn_if_empty=True) diff --git a/activitysim/abm/models/cdap.py b/activitysim/abm/models/cdap.py index 7eb082e4d..6776c06c7 100644 --- a/activitysim/abm/models/cdap.py +++ b/activitysim/abm/models/cdap.py @@ -17,7 +17,11 @@ tracing, workflow, ) -from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable +from activitysim.core.configuration.base import ( + ComputeSettings, + PreprocessorSettings, + PydanticReadable, +) from activitysim.core.util import reindex logger = logging.getLogger(__name__) @@ -30,10 +34,13 @@ class CdapSettings(PydanticReadable, extra="forbid"): FIXED_RELATIVE_PROPORTIONS_SPEC: str = "cdap_fixed_relative_proportions.csv" ADD_JOINT_TOUR_UTILITY: bool = False JOINT_TOUR_COEFFICIENTS: str = "cdap_joint_tour_coefficients.csv" + JOINT_TOUR_USEFUL_COLUMNS: list[str] | None = None + """Columns to include from the persons table that will be need to calculate household joint tour utility.""" annotate_persons: PreprocessorSettings | None = None annotate_households: PreprocessorSettings | None = None COEFFICIENTS: Path CONSTANTS: dict[str, Any] = {} + compute_settings: ComputeSettings | None = None @workflow.step @@ -202,6 +209,7 @@ def cdap_simulate( trace_hh_id=trace_hh_id, trace_label=trace_label, add_joint_tour_utility=add_joint_tour_utility, + compute_settings=model_settings.compute_settings, ) else: choices = cdap.run_cdap( @@ -215,6 +223,7 @@ def cdap_simulate( chunk_size=state.settings.chunk_size, trace_hh_id=trace_hh_id, trace_label=trace_label, + compute_settings=model_settings.compute_settings, ) if estimator: diff --git a/activitysim/abm/models/free_parking.py b/activitysim/abm/models/free_parking.py index 97b70ade3..9aa2800a6 100644 --- a/activitysim/abm/models/free_parking.py +++ b/activitysim/abm/models/free_parking.py @@ -118,6 +118,7 @@ def free_parking( trace_label=trace_label, trace_choice_name="free_parking_at_work", estimator=estimator, + compute_settings=model_settings.compute_settings, ) free_parking_alt = model_settings.FREE_PARKING_ALT diff --git a/activitysim/abm/models/initialize.py b/activitysim/abm/models/initialize.py index 8e0c758c0..af1ba079b 100644 --- a/activitysim/abm/models/initialize.py +++ b/activitysim/abm/models/initialize.py @@ -178,6 +178,10 @@ def initialize_households( suffixes = disaggregate_accessibility.disaggregate_suffixes(state) shadow_pricing.add_size_tables(state, suffixes) + # create disaggregate_accessibility table if not model was run + if state.is_table("proto_disaggregate_accessibility"): + disaggregate_accessibility.disaggregate_accessibility(state) + # - preload person_windows person_windows = state.get_dataframe("person_windows") chunk_sizer.log_df(trace_label, "person_windows", person_windows) diff --git a/activitysim/abm/models/joint_tour_composition.py b/activitysim/abm/models/joint_tour_composition.py index ab57298e0..ee4ac3a69 100644 --- a/activitysim/abm/models/joint_tour_composition.py +++ b/activitysim/abm/models/joint_tour_composition.py @@ -123,6 +123,7 @@ def joint_tour_composition( trace_label=trace_label, trace_choice_name="composition", estimator=estimator, + compute_settings=model_settings.compute_settings, ) # convert indexes to alternative names diff --git a/activitysim/abm/models/joint_tour_frequency.py b/activitysim/abm/models/joint_tour_frequency.py index 93a8aa475..1700c143b 100644 --- a/activitysim/abm/models/joint_tour_frequency.py +++ b/activitysim/abm/models/joint_tour_frequency.py @@ -112,6 +112,7 @@ def joint_tour_frequency( trace_label=trace_label, trace_choice_name="joint_tour_frequency", estimator=estimator, + compute_settings=model_settings.compute_settings, ) # convert indexes to alternative names diff --git a/activitysim/abm/models/joint_tour_frequency_composition.py b/activitysim/abm/models/joint_tour_frequency_composition.py index be52c900a..27b8347ec 100644 --- a/activitysim/abm/models/joint_tour_frequency_composition.py +++ b/activitysim/abm/models/joint_tour_frequency_composition.py @@ -26,7 +26,7 @@ logger = logging.getLogger(__name__) -class JointTourFrequencyCompositionSettings(LogitComponentSettings): +class JointTourFrequencyCompositionSettings(LogitComponentSettings, extra="forbid"): """ Settings for the `joint_tour_frequency_composition` component. """ @@ -140,6 +140,7 @@ def joint_tour_frequency_composition( trace_choice_name=trace_label, estimator=estimator, explicit_chunk_size=0, + compute_settings=model_settings.compute_settings, ) if estimator: diff --git a/activitysim/abm/models/joint_tour_participation.py b/activitysim/abm/models/joint_tour_participation.py index ac8afdce0..98a8c70c7 100644 --- a/activitysim/abm/models/joint_tour_participation.py +++ b/activitysim/abm/models/joint_tour_participation.py @@ -418,6 +418,7 @@ def joint_tour_participation( trace_choice_name="participation", custom_chooser=participants_chooser, estimator=estimator, + compute_settings=model_settings.compute_settings, ) # choice is boolean (participate or not) diff --git a/activitysim/abm/models/joint_tour_scheduling.py b/activitysim/abm/models/joint_tour_scheduling.py index 260abedc7..9bdcbe146 100644 --- a/activitysim/abm/models/joint_tour_scheduling.py +++ b/activitysim/abm/models/joint_tour_scheduling.py @@ -104,7 +104,6 @@ def joint_tour_scheduling( estimator = estimation.manager.begin_estimation(state, "joint_tour_scheduling") model_spec = state.filesystem.read_model_spec(file_name=model_settings.SPEC) - sharrow_skip = model_settings.sharrow_skip coefficients_df = state.filesystem.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients( state, model_spec, coefficients_df, estimator @@ -128,7 +127,7 @@ def joint_tour_scheduling( estimator=estimator, chunk_size=state.settings.chunk_size, trace_label=trace_label, - sharrow_skip=sharrow_skip, + compute_settings=model_settings.compute_settings, ) if estimator: diff --git a/activitysim/abm/models/location_choice.py b/activitysim/abm/models/location_choice.py index 3017235f6..cb4de93b0 100644 --- a/activitysim/abm/models/location_choice.py +++ b/activitysim/abm/models/location_choice.py @@ -192,6 +192,9 @@ def _location_sample( chunk_tag=chunk_tag, trace_label=trace_label, zone_layer=zone_layer, + compute_settings=model_settings.compute_settings.subcomponent_settings( + "sample" + ), ) return choices @@ -696,6 +699,9 @@ def run_location_simulate( trace_choice_name=model_settings.DEST_CHOICE_COLUMN_NAME, estimator=estimator, skip_choice=skip_choice, + compute_settings=model_settings.compute_settings.subcomponent_settings( + "simulate" + ), ) if not want_logsums: diff --git a/activitysim/abm/models/mandatory_tour_frequency.py b/activitysim/abm/models/mandatory_tour_frequency.py index a80b82904..8ab69710f 100644 --- a/activitysim/abm/models/mandatory_tour_frequency.py +++ b/activitysim/abm/models/mandatory_tour_frequency.py @@ -53,7 +53,7 @@ def add_null_results(state, trace_label, mandatory_tour_frequency_settings): state.add_table("persons", persons) -class MandatoryTourFrequencySettings(LogitComponentSettings): +class MandatoryTourFrequencySettings(LogitComponentSettings, extra="forbid"): """ Settings for the `mandatory_tour_frequency` component. """ @@ -134,6 +134,7 @@ def mandatory_tour_frequency( trace_label=trace_label, trace_choice_name="mandatory_tour_frequency", estimator=estimator, + compute_settings=model_settings.compute_settings, ) # convert indexes to alternative names diff --git a/activitysim/abm/models/non_mandatory_tour_frequency.py b/activitysim/abm/models/non_mandatory_tour_frequency.py index 972c4b3dc..5ec2c9407 100644 --- a/activitysim/abm/models/non_mandatory_tour_frequency.py +++ b/activitysim/abm/models/non_mandatory_tour_frequency.py @@ -12,8 +12,8 @@ from activitysim.abm.models.util import annotate from activitysim.abm.models.util.overlap import ( - person_max_window, person_available_periods, + person_max_window, ) from activitysim.abm.models.util.school_escort_tours_trips import ( recompute_tour_count_statistics, @@ -161,7 +161,7 @@ class NonMandatoryTourSpecSegment(PydanticReadable): COEFFICIENTS: Path -class NonMandatoryTourFrequencySettings(LogitComponentSettings): +class NonMandatoryTourFrequencySettings(LogitComponentSettings, extra="forbid"): """ Settings for the `non_mandatory_tour_frequency` component. """ @@ -321,6 +321,7 @@ def non_mandatory_tour_frequency( trace_choice_name="non_mandatory_tour_frequency", estimator=estimator, explicit_chunk_size=model_settings.explicit_chunk, + compute_settings=model_settings.compute_settings, ) if estimator: @@ -333,6 +334,9 @@ def non_mandatory_tour_frequency( choices_list.append(choices) + # FIXME only want to keep actual purposes, adding cols in alts will mess this up + # this is complicated by canonical_ids calculated based on alts if not specified explicitly + # thus, adding column to input alts will change IDs and break estimation mode.... del alternatives["tot_tours"] # del tot_tours column we added above # The choice value 'non_mandatory_tour_frequency' assigned by interaction_simulate diff --git a/activitysim/abm/models/parking_location_choice.py b/activitysim/abm/models/parking_location_choice.py index 2c9b5097c..d594dcfa3 100644 --- a/activitysim/abm/models/parking_location_choice.py +++ b/activitysim/abm/models/parking_location_choice.py @@ -330,6 +330,10 @@ class ParkingLocationSettings(LogitComponentSettings, extra="forbid"): SEGMENTS: list[str] | None = None + AUTO_MODES: list[str] + """List of auto modes that use parking. AUTO_MODES are used in write_trip_matrices to make sure + parking locations are accurately represented in the output trip matrices.""" + @workflow.step def parking_location( diff --git a/activitysim/abm/models/school_escorting.py b/activitysim/abm/models/school_escorting.py index b3aaf2b60..0ade57dd5 100644 --- a/activitysim/abm/models/school_escorting.py +++ b/activitysim/abm/models/school_escorting.py @@ -3,7 +3,7 @@ from __future__ import annotations import logging -from typing import Any +from typing import Any, Literal import numpy as np import pandas as pd @@ -58,7 +58,14 @@ def determine_escorting_participants( & (persons.cdap_activity == "M") ] households_with_escortees = escortees["household_id"] - choosers = choosers[choosers.index.isin(households_with_escortees)] + if len(households_with_escortees) == 0: + logger.warning("No households with escortees found!") + else: + tot_households = len(choosers) + choosers = choosers[choosers.index.isin(households_with_escortees)] + logger.info( + f"Proceeding with {len(choosers)} households with escortees out of {tot_households} total households" + ) # can specify different weights to determine chaperones persontype_weight = model_settings.PERSON_WEIGHT @@ -140,7 +147,7 @@ def add_prev_choices_to_choosers( stage_alts, how="left", left_on=escorting_choice, - right_on=stage_alts.index.name, + right_index=True, ) .set_index("household_id") ) @@ -216,8 +223,12 @@ def create_school_escorting_bundles_table(choosers, tours, stage): bundles : pd.DataFrame one school escorting bundle per row """ - # making a table of bundles - choosers = choosers.reset_index() + # want to keep household_id in columns, which is already there if running in estimation mode + if "household_id" in choosers.columns: + choosers = choosers.reset_index(drop=True) + else: + choosers = choosers.reset_index() + # creating a row for every school escorting bundle choosers = choosers.loc[choosers.index.repeat(choosers["nbundles"])] bundles = pd.DataFrame() @@ -356,7 +367,7 @@ def create_school_escorting_bundles_table(choosers, tours, stage): return bundles -class SchoolEscortSettings(BaseLogitComponentSettings): +class SchoolEscortSettings(BaseLogitComponentSettings, extra="forbid"): """ Settings for the `telecommute_frequency` component. """ @@ -380,21 +391,6 @@ class SchoolEscortSettings(BaseLogitComponentSettings): GENDER_WEIGHT: float = 10.0 AGE_WEIGHT: float = 1.0 - sharrow_skip: bool | dict[str, bool] = False - """Setting to skip sharrow. - - Sharrow can be skipped (or not) for all school escorting stages by giving - simply true or false. Alternatively, it can be skipped only for particular - stages by giving a mapping of stage name to skipping. For example: - - ```yaml - sharrow_skip: - OUTBOUND: true - INBOUND: false - OUTBOUND_COND: true - ``` - """ - SIMULATE_CHOOSER_COLUMNS: list[str] | None = None SPEC: None = None @@ -417,6 +413,13 @@ class SchoolEscortSettings(BaseLogitComponentSettings): explicit_chunk: int = 0 """If > 0, use this chunk size instead of adaptive chunking.""" + LOGIT_TYPE: Literal["MNL"] = "MNL" + """Logit model mathematical form. + + * "MNL" + Multinomial logit model. + """ + @workflow.step def school_escorting( @@ -460,7 +463,11 @@ def school_escorting( trace_hh_id = state.settings.trace_hh_id - alts = simulate.read_model_alts(state, model_settings.ALTS, set_index="Alt") + # FIXME setting index as "Alt" causes crash in estimation mode... + # happens in joint_tour_frequency_composition too! + # alts = simulate.read_model_alts(state, model_settings.ALTS, set_index="Alt") + alts = simulate.read_model_alts(state, model_settings.ALTS, set_index=None) + alts.index = alts["Alt"].values choosers, participant_columns = determine_escorting_participants( households_merged, persons, model_settings @@ -478,7 +485,9 @@ def school_escorting( for stage_num, stage in enumerate(school_escorting_stages): stage_trace_label = trace_label + "_" + stage estimator = estimation.manager.begin_estimation( - state, "school_escorting_" + stage + state, + model_name="school_escorting_" + stage, + bundle_name="school_escorting", ) model_spec_raw = state.filesystem.read_model_spec( @@ -491,19 +500,15 @@ def school_escorting( state, model_spec_raw, coefficients_df, estimator ) - # allow for skipping sharrow entirely in this model with `sharrow_skip: true` + # allow for skipping sharrow entirely in this model with `compute_settings.sharrow_skip: true` # or skipping stages selectively with a mapping of the stages to skip - sharrow_skip = model_settings.sharrow_skip - stage_sharrow_skip = False # default is false unless set below - if sharrow_skip: - if isinstance(sharrow_skip, dict): - stage_sharrow_skip = sharrow_skip.get(stage.upper(), False) - else: - stage_sharrow_skip = True - if stage_sharrow_skip: - locals_dict["_sharrow_skip"] = True - else: - locals_dict.pop("_sharrow_skip", None) + stage_compute_settings = model_settings.compute_settings.subcomponent_settings( + stage.upper() + ) + # if stage_sharrow_skip: + # locals_dict["_sharrow_skip"] = True + # else: + # locals_dict.pop("_sharrow_skip", None) # reduce memory by limiting columns if selected columns are supplied chooser_columns = model_settings.SIMULATE_CHOOSER_COLUMNS @@ -533,9 +538,26 @@ def school_escorting( if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) - estimator.write_spec(model_settings) - estimator.write_coefficients(coefficients_df, model_settings) + estimator.write_spec(model_settings, tag=stage.upper() + "_SPEC") + estimator.write_coefficients( + coefficients_df, file_name=stage.upper() + "_COEFFICIENTS" + ) estimator.write_choosers(choosers) + estimator.write_alternatives(alts, bundle_directory=True) + + # FIXME #interaction_simulate_estimation_requires_chooser_id_in_df_column + # shuold we do it here or have interaction_simulate do it? + # chooser index must be duplicated in column or it will be omitted from interaction_dataset + # estimation requires that chooser_id is either in index or a column of interaction_dataset + # so it can be reformatted (melted) and indexed by chooser_id and alt_id + assert choosers.index.name == "household_id" + assert "household_id" not in choosers.columns + choosers["household_id"] = choosers.index + + # FIXME set_alt_id - do we need this for interaction_simulate estimation bundle tables? + estimator.set_alt_id("alt_id") + + estimator.set_chooser_id(choosers.index.name) log_alt_losers = state.settings.log_alt_losers @@ -550,6 +572,7 @@ def school_escorting( trace_choice_name="school_escorting_" + stage, estimator=estimator, explicit_chunk_size=model_settings.explicit_chunk, + compute_settings=stage_compute_settings, ) if estimator: @@ -580,47 +603,74 @@ def school_escorting( if stage_num >= 1: choosers["Alt"] = choices - choosers = choosers.join(alts, how="left", on="Alt") + choosers = choosers.join(alts.set_index("Alt"), how="left", on="Alt") bundles = create_school_escorting_bundles_table( choosers[choosers["Alt"] > 1], tours, stage ) escort_bundles.append(bundles) escort_bundles = pd.concat(escort_bundles) - escort_bundles["bundle_id"] = ( - escort_bundles["household_id"] * 10 - + escort_bundles.groupby("household_id").cumcount() - + 1 - ) - escort_bundles.sort_values( - by=["household_id", "school_escort_direction"], - ascending=[True, False], - inplace=True, - ) - school_escort_tours = school_escort_tours_trips.create_pure_school_escort_tours( - state, escort_bundles - ) - chauf_tour_id_map = { - v: k for k, v in school_escort_tours["bundle_id"].to_dict().items() - } - escort_bundles["chauf_tour_id"] = np.where( - escort_bundles["escort_type"] == "ride_share", - escort_bundles["first_mand_tour_id"], - escort_bundles["bundle_id"].map(chauf_tour_id_map), - ) - assert ( - escort_bundles["chauf_tour_id"].notnull().all() - ), f"chauf_tour_id is null for {escort_bundles[escort_bundles['chauf_tour_id'].isna()]}. Check availability conditions." + # Only want to create bundles and tours and trips if at least one household has school escorting + if len(escort_bundles) > 0: + escort_bundles["bundle_id"] = ( + escort_bundles["household_id"] * 10 + + escort_bundles.groupby("household_id").cumcount() + + 1 + ) + escort_bundles.sort_values( + by=["household_id", "school_escort_direction"], + ascending=[True, False], + inplace=True, + ) - tours = school_escort_tours_trips.add_pure_escort_tours(tours, school_escort_tours) - tours = school_escort_tours_trips.process_tours_after_escorting_model( - state, escort_bundles, tours - ) + school_escort_tours = school_escort_tours_trips.create_pure_school_escort_tours( + state, escort_bundles + ) + chauf_tour_id_map = { + v: k for k, v in school_escort_tours["bundle_id"].to_dict().items() + } + escort_bundles["chauf_tour_id"] = np.where( + escort_bundles["escort_type"] == "ride_share", + escort_bundles["first_mand_tour_id"], + escort_bundles["bundle_id"].map(chauf_tour_id_map), + ) - school_escort_trips = school_escort_tours_trips.create_school_escort_trips( - escort_bundles - ) + assert ( + escort_bundles["chauf_tour_id"].notnull().all() + ), f"chauf_tour_id is null for {escort_bundles[escort_bundles['chauf_tour_id'].isna()]}. Check availability conditions." + + tours = school_escort_tours_trips.add_pure_escort_tours( + tours, school_escort_tours + ) + tours = school_escort_tours_trips.process_tours_after_escorting_model( + state, escort_bundles, tours + ) + school_escort_trips = school_escort_tours_trips.create_school_escort_trips( + escort_bundles + ) + + else: + # create empty school escort tours & trips tables to be used downstream + tours["school_esc_outbound"] = pd.NA + tours["school_esc_inbound"] = pd.NA + tours["school_escort_direction"] = pd.NA + tours["next_pure_escort_start"] = pd.NA + school_escort_tours = pd.DataFrame(columns=tours.columns) + trip_cols = [ + "household_id", + "person_id", + "tour_id", + "trip_id", + "outbound", + "depart", + "purpose", + "destination", + "escort_participants", + "chauf_tour_id", + "primary_purpose", + ] + school_escort_trips = pd.DataFrame(columns=trip_cols) school_escort_trips["primary_purpose"] = school_escort_trips[ "primary_purpose" diff --git a/activitysim/abm/models/stop_frequency.py b/activitysim/abm/models/stop_frequency.py index 4513c2477..2f0253f21 100644 --- a/activitysim/abm/models/stop_frequency.py +++ b/activitysim/abm/models/stop_frequency.py @@ -214,6 +214,7 @@ def stop_frequency( trace_label=tracing.extend_trace_label(trace_label, segment_name), trace_choice_name="stops", estimator=estimator, + compute_settings=model_settings.compute_settings, ) # convert indexes to alternative names diff --git a/activitysim/abm/models/telecommute_frequency.py b/activitysim/abm/models/telecommute_frequency.py index 19bd850f8..f98791a2a 100755 --- a/activitysim/abm/models/telecommute_frequency.py +++ b/activitysim/abm/models/telecommute_frequency.py @@ -20,7 +20,7 @@ logger = logging.getLogger("activitysim") -class TelecommuteFrequencySettings(LogitComponentSettings): +class TelecommuteFrequencySettings(LogitComponentSettings, extra="forbid"): """ Settings for the `telecommute_frequency` component. """ @@ -99,6 +99,7 @@ def telecommute_frequency( trace_label=trace_label, trace_choice_name="telecommute_frequency", estimator=estimator, + compute_settings=model_settings.compute_settings, ) choices = pd.Series(model_spec.columns[choices.values], index=choices.index) diff --git a/activitysim/abm/models/transit_pass_ownership.py b/activitysim/abm/models/transit_pass_ownership.py index 48e01c47d..9a34b7b0b 100644 --- a/activitysim/abm/models/transit_pass_ownership.py +++ b/activitysim/abm/models/transit_pass_ownership.py @@ -20,7 +20,7 @@ logger = logging.getLogger("activitysim") -class TransitPassOwnershipSettings(LogitComponentSettings): +class TransitPassOwnershipSettings(LogitComponentSettings, extra="forbid"): """ Settings for the `transit_pass_ownership` component. """ @@ -93,6 +93,7 @@ def transit_pass_ownership( trace_label=trace_label, trace_choice_name="transit_pass_ownership", estimator=estimator, + compute_settings=model_settings.compute_settings, ) if estimator: diff --git a/activitysim/abm/models/transit_pass_subsidy.py b/activitysim/abm/models/transit_pass_subsidy.py index 7d1f320e2..0f71279cd 100644 --- a/activitysim/abm/models/transit_pass_subsidy.py +++ b/activitysim/abm/models/transit_pass_subsidy.py @@ -28,6 +28,9 @@ class TransitPassSubsidySettings(LogitComponentSettings, extra="forbid"): preprocessor: PreprocessorSettings | None = None """Setting for the preprocessor.""" + CHOOSER_FILTER_COLUMN_NAME: str | None = None + """Column name which selects choosers. If None, all persons are choosers.""" + @workflow.step def transit_pass_subsidy( @@ -48,7 +51,6 @@ def transit_pass_subsidy( ) choosers = persons_merged - logger.info("Running %s with %d persons", trace_label, len(choosers)) estimator = estimation.manager.begin_estimation(state, "transit_pass_subsidy") @@ -69,6 +71,11 @@ def transit_pass_subsidy( trace_label=trace_label, ) + filter_col = model_settings.CHOOSER_FILTER_COLUMN_NAME + if filter_col is not None: + choosers = choosers[choosers[filter_col]] + logger.info("Running %s with %d persons", trace_label, len(choosers)) + model_spec = state.filesystem.read_model_spec(model_settings.SPEC) coefficients_df = state.filesystem.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients( @@ -92,6 +99,7 @@ def transit_pass_subsidy( trace_label=trace_label, trace_choice_name="transit_pass_subsidy", estimator=estimator, + compute_settings=model_settings.compute_settings, ) if estimator: @@ -102,7 +110,9 @@ def transit_pass_subsidy( estimator.write_override_choices(choices) estimator.end_estimation() - persons["transit_pass_subsidy"] = choices.reindex(persons.index) + persons["transit_pass_subsidy"] = ( + choices.reindex(persons.index).fillna(0).astype(int) + ) state.add_table("persons", persons) diff --git a/activitysim/abm/models/trip_departure_choice.py b/activitysim/abm/models/trip_departure_choice.py index 43f02df34..a0ddb363d 100644 --- a/activitysim/abm/models/trip_departure_choice.py +++ b/activitysim/abm/models/trip_departure_choice.py @@ -3,7 +3,6 @@ from __future__ import annotations import logging -from pathlib import Path from typing import Any import numpy as np @@ -20,7 +19,11 @@ tracing, workflow, ) -from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable +from activitysim.core.configuration.base import ( + ComputeSettings, + PreprocessorSettings, + PydanticCompute, +) from activitysim.core.skim_dataset import SkimDataset from activitysim.core.skim_dictionary import SkimDict from activitysim.core.util import reindex @@ -188,6 +191,7 @@ def choose_tour_leg_pattern( trace_label="trace_label", *, chunk_sizer: chunk.ChunkSizer, + compute_settings: ComputeSettings | None = None, ): alternatives = generate_alternatives(trip_segment, STOP_TIME_DURATION).sort_index() have_trace_targets = state.tracing.has_trace_targets(trip_segment) @@ -234,7 +238,14 @@ def choose_tour_leg_pattern( interaction_utilities, trace_eval_results, ) = interaction_simulate.eval_interaction_utilities( - state, spec, interaction_df, None, trace_label, trace_rows, estimator=None + state, + spec, + interaction_df, + None, + trace_label, + trace_rows, + estimator=None, + compute_settings=compute_settings, ) interaction_utilities = pd.concat( @@ -385,7 +396,14 @@ def choose_tour_leg_pattern( return choices -def apply_stage_two_model(state, omnibus_spec, trips, chunk_size, trace_label): +def apply_stage_two_model( + state: workflow.State, + omnibus_spec, + trips, + chunk_size, + trace_label: str, + compute_settings: ComputeSettings | None = None, +): if not trips.index.is_monotonic: trips = trips.sort_index() @@ -436,7 +454,7 @@ def apply_stage_two_model(state, omnibus_spec, trips, chunk_size, trace_label): trip_list = [] for ( - i, + _i, chooser_chunk, chunk_trace_label, chunk_sizer, @@ -444,7 +462,7 @@ def apply_stage_two_model(state, omnibus_spec, trips, chunk_size, trace_label): for is_outbound, trip_segment in chooser_chunk.groupby(OUTBOUND): direction = OUTBOUND if is_outbound else "inbound" spec = get_spec_for_segment(omnibus_spec, direction) - segment_trace_label = "{}_{}".format(direction, chunk_trace_label) + segment_trace_label = f"{direction}_{chunk_trace_label}" patterns = build_patterns(trip_segment, time_windows) @@ -455,6 +473,7 @@ def apply_stage_two_model(state, omnibus_spec, trips, chunk_size, trace_label): spec, trace_label=segment_trace_label, chunk_sizer=chunk_sizer, + compute_settings=compute_settings, ) choices = pd.merge( @@ -482,7 +501,7 @@ def apply_stage_two_model(state, omnibus_spec, trips, chunk_size, trace_label): return trips["depart"].astype(int) -class TripDepartureChoiceSettings(PydanticReadable, extra="forbid"): +class TripDepartureChoiceSettings(PydanticCompute, extra="forbid"): """ Settings for the `trip_departure_choice` component. """ @@ -506,7 +525,6 @@ def trip_departure_choice( model_settings_file_name: str = "trip_departure_choice.yaml", trace_label: str = "trip_departure_choice", ) -> None: - if model_settings is None: model_settings = TripDepartureChoiceSettings.read_settings_file( state.filesystem, @@ -557,7 +575,12 @@ def trip_departure_choice( ) choices = apply_stage_two_model( - state, spec, trips_merged_df, state.settings.chunk_size, trace_label + state, + spec, + trips_merged_df, + state.settings.chunk_size, + trace_label, + compute_settings=model_settings.compute_settings, ) trips_df = trips diff --git a/activitysim/abm/models/trip_destination.py b/activitysim/abm/models/trip_destination.py index 6dea50980..2e07220c6 100644 --- a/activitysim/abm/models/trip_destination.py +++ b/activitysim/abm/models/trip_destination.py @@ -215,6 +215,9 @@ def _destination_sample( chunk_tag=chunk_tag, trace_label=trace_label, zone_layer=zone_layer, + compute_settings=model_settings.compute_settings.subcomponent_settings( + "sample" + ), ) return choices diff --git a/activitysim/abm/models/trip_matrices.py b/activitysim/abm/models/trip_matrices.py index 12c2e4f6a..5552e3b00 100644 --- a/activitysim/abm/models/trip_matrices.py +++ b/activitysim/abm/models/trip_matrices.py @@ -13,6 +13,7 @@ from activitysim.core import config, expressions, los, workflow from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable from activitysim.core.configuration.logit import LogitComponentSettings +from activitysim.abm.models.parking_location_choice import ParkingLocationSettings logger = logging.getLogger(__name__) @@ -93,16 +94,35 @@ def write_trip_matrices( state.add_table("trips", trips_df) if "parking_location" in state.settings.models: - parking_settings = state.filesystem.read_model_settings( - "parking_location_choice.yaml" + parking_settings = ParkingLocationSettings.read_settings_file( + state.filesystem, + "parking_location_choice.yaml", ) - parking_taz_col_name = parking_settings["ALT_DEST_COL_NAME"] + parking_taz_col_name = parking_settings.ALT_DEST_COL_NAME + if ~(trips_df["trip_mode"].isin(parking_settings.AUTO_MODES)).any(): + logger.warning( + f"Parking location choice model is enabled, but none of {parking_settings.AUTO_MODES} auto modes found in trips table." + "See AUTO_MODES setting in parking_location_choice.yaml." + ) + if parking_taz_col_name in trips_df: - # TODO make parking zone negative, not zero, if not used + trips_df["true_origin"] = trips_df["origin"] + trips_df["true_destination"] = trips_df["destination"] + + # Get origin parking zone if vehicle not parked at origin + trips_df["origin_parking_zone"] = np.where( + (trips_df["tour_id"] == trips_df["tour_id"].shift(1)) + & trips_df["trip_mode"].isin(parking_settings.AUTO_MODES), + trips_df[parking_taz_col_name].shift(1), + -1, + ) + trips_df.loc[trips_df[parking_taz_col_name] > 0, "destination"] = trips_df[ parking_taz_col_name ] - # Also need address the return trip + trips_df.loc[trips_df["origin_parking_zone"] > 0, "origin"] = trips_df[ + "origin_parking_zone" + ] # write matrices by zone system type if network_los.zone_system == los.ONE_ZONE: # taz trips written to taz matrices @@ -269,6 +289,24 @@ def write_trip_matrices( True, ) + if "parking_location" in state.settings.models: + # Set trip origin and destination to be the actual location the person is and not where their vehicle is parked + trips_df["origin"] = trips_df["true_origin"] + trips_df["destination"] = trips_df["true_destination"] + del trips_df["true_origin"], trips_df["true_destination"] + if ( + network_los.zone_system == los.TWO_ZONE + or network_los.zone_system == los.THREE_ZONE + ): + trips_df["otaz"] = ( + state.get_table("land_use").reindex(trips_df["origin"]).TAZ.tolist() + ) + trips_df["dtaz"] = ( + state.get_table("land_use") + .reindex(trips_df["destination"]) + .TAZ.tolist() + ) + def annotate_trips( state: workflow.State, diff --git a/activitysim/abm/models/trip_mode_choice.py b/activitysim/abm/models/trip_mode_choice.py index b9091522c..e3e530fb7 100644 --- a/activitysim/abm/models/trip_mode_choice.py +++ b/activitysim/abm/models/trip_mode_choice.py @@ -277,6 +277,7 @@ def trip_mode_choice( trace_label=segment_trace_label, trace_choice_name="trip_mode_choice", estimator=estimator, + compute_settings=model_settings.compute_settings, ) if state.settings.trace_hh_id: diff --git a/activitysim/abm/models/util/annotate.py b/activitysim/abm/models/util/annotate.py index 69958b1ab..5c0f62821 100644 --- a/activitysim/abm/models/util/annotate.py +++ b/activitysim/abm/models/util/annotate.py @@ -36,6 +36,7 @@ def annotate_households( locals_dict = {} households = state.get_dataframe("households") expressions.assign_columns( + state, df=households, model_settings=model_settings.get("annotate_households"), locals_dict=locals_dict, @@ -64,6 +65,7 @@ def annotate_persons( locals_dict = {} persons = state.get_dataframe("persons") expressions.assign_columns( + state, df=persons, model_settings=model_settings.get("annotate_persons"), locals_dict=locals_dict, diff --git a/activitysim/abm/models/util/cdap.py b/activitysim/abm/models/util/cdap.py index eab26bc7b..f52713ebf 100644 --- a/activitysim/abm/models/util/cdap.py +++ b/activitysim/abm/models/util/cdap.py @@ -9,6 +9,7 @@ import pandas as pd from activitysim.core import chunk, logit, simulate, tracing, workflow +from activitysim.core.configuration.base import ComputeSettings logger = logging.getLogger(__name__) @@ -184,6 +185,7 @@ def individual_utilities( trace_label=None, *, chunk_sizer, + compute_settings: ComputeSettings | None = None, ): """ Calculate CDAP utilities for all individuals. @@ -211,6 +213,7 @@ def individual_utilities( locals_d, trace_label=trace_label, chunk_sizer=chunk_sizer, + compute_settings=compute_settings, ) # add columns from persons to facilitate building household interactions @@ -218,8 +221,10 @@ def individual_utilities( indiv_utils[useful_columns] = persons[useful_columns] # add attributes for joint tour utility - model_settings = state.filesystem.read_model_settings("cdap.yaml") - additional_useful_columns = model_settings.get("JOINT_TOUR_USEFUL_COLUMNS", None) + from activitysim.abm.models.cdap import CdapSettings + + model_settings = CdapSettings.read_settings_file(state.filesystem, "cdap.yaml") + additional_useful_columns = model_settings.JOINT_TOUR_USEFUL_COLUMNS if additional_useful_columns is not None: indiv_utils[additional_useful_columns] = persons[additional_useful_columns] @@ -847,8 +852,10 @@ def hh_choosers(state: workflow.State, indiv_utils, hhsize): merge_cols = [_hh_id_, _ptype_, "M", "N", "H"] # add attributes for joint tour utility - model_settings = state.filesystem.read_model_settings("cdap.yaml") - additional_merge_cols = model_settings.get("JOINT_TOUR_USEFUL_COLUMNS", None) + from activitysim.abm.models.cdap import CdapSettings + + model_settings = CdapSettings.read_settings_file(state.filesystem, "cdap.yaml") + additional_merge_cols = model_settings.JOINT_TOUR_USEFUL_COLUMNS if additional_merge_cols is not None: merge_cols.extend(additional_merge_cols) @@ -909,6 +916,7 @@ def household_activity_choices( add_joint_tour_utility=False, *, chunk_sizer, + compute_settings: ComputeSettings | None = None, ): """ Calculate household utilities for each activity pattern alternative for households of hhsize @@ -957,13 +965,17 @@ def household_activity_choices( ) utils = simulate.eval_utilities( - state, spec, choosers, trace_label=trace_label, chunk_sizer=chunk_sizer + state, + spec, + choosers, + trace_label=trace_label, + chunk_sizer=chunk_sizer, + compute_settings=compute_settings, ) if len(utils.index) == 0: return pd.Series(dtype="float64") - probs = logit.utils_to_probs(state, utils, trace_label=trace_label) # calculate joint tour utility if add_joint_tour_utility & (hhsize > 1): # calculate joint utils @@ -981,11 +993,14 @@ def household_activity_choices( choosers, trace_label=trace_label, chunk_sizer=chunk_sizer, + compute_settings=compute_settings, ) # add joint util to util utils = utils.add(joint_tour_utils) + probs = logit.utils_to_probs(state, utils, trace_label=trace_label) + # select an activity pattern alternative for each household based on probability # result is a series indexed on _hh_index_ with the (0 based) index of the column from probs idx_choices, rands = logit.make_choices(state, probs, trace_label=trace_label) @@ -1183,6 +1198,7 @@ def _run_cdap( add_joint_tour_utility, *, chunk_sizer, + compute_settings: ComputeSettings | None = None, ) -> pd.DataFrame | tuple: """ Implements core run_cdap functionality on persons df (or chunked subset thereof) @@ -1213,6 +1229,7 @@ def _run_cdap( trace_hh_id, trace_label, chunk_sizer=chunk_sizer, + compute_settings=compute_settings, ) chunk_sizer.log_df(trace_label, "indiv_utils", indiv_utils) @@ -1229,6 +1246,7 @@ def _run_cdap( trace_label=trace_label, add_joint_tour_utility=add_joint_tour_utility, chunk_sizer=chunk_sizer, + compute_settings=compute_settings, ) hh_choices_list.append(choices) @@ -1265,20 +1283,6 @@ def _run_cdap( persons["cdap_activity"] = person_choices chunk_sizer.log_df(trace_label, "persons", persons) - # return household joint tour flag - if add_joint_tour_utility: - hh_activity_choices = hh_activity_choices.to_frame(name="hh_choices") - hh_activity_choices["has_joint_tour"] = hh_activity_choices["hh_choices"].apply( - lambda x: 1 if "J" in x else 0 - ) - - # return household joint tour flag - if add_joint_tour_utility: - hh_activity_choices = hh_activity_choices.to_frame(name="hh_choices") - hh_activity_choices["has_joint_tour"] = hh_activity_choices["hh_choices"].apply( - lambda x: 1 if "J" in x else 0 - ) - # return household joint tour flag if add_joint_tour_utility: hh_activity_choices = hh_activity_choices.to_frame(name="hh_choices") @@ -1315,6 +1319,7 @@ def run_cdap( trace_hh_id=None, trace_label=None, add_joint_tour_utility=False, + compute_settings: ComputeSettings | None = None, ): """ Choose individual activity patterns for persons. @@ -1378,6 +1383,7 @@ def run_cdap( chunk_trace_label, add_joint_tour_utility, chunk_sizer=chunk_sizer, + compute_settings=compute_settings, ) else: cdap_results = _run_cdap( @@ -1392,6 +1398,7 @@ def run_cdap( chunk_trace_label, add_joint_tour_utility, chunk_sizer=chunk_sizer, + compute_settings=compute_settings, ) result_list.append(cdap_results) diff --git a/activitysim/abm/models/util/mode.py b/activitysim/abm/models/util/mode.py index b6e6bbb23..49776684a 100644 --- a/activitysim/abm/models/util/mode.py +++ b/activitysim/abm/models/util/mode.py @@ -9,6 +9,7 @@ import pandas as pd from activitysim.core import config, expressions, simulate, workflow +from activitysim.core.configuration.base import ComputeSettings from activitysim.core.configuration.logit import TourModeComponentSettings from activitysim.core.estimation import Estimator @@ -34,6 +35,7 @@ def mode_choice_simulate( trace_choice_name, trace_column_names=None, estimator: Optional[Estimator] = None, + compute_settings: ComputeSettings | None = None, ): """ common method for both tour_mode_choice and trip_mode_choice @@ -51,6 +53,7 @@ def mode_choice_simulate( trace_label trace_choice_name estimator + compute_settings : ComputeSettings Returns ------- @@ -70,6 +73,7 @@ def mode_choice_simulate( trace_choice_name=trace_choice_name, estimator=estimator, trace_column_names=trace_column_names, + compute_settings=compute_settings, ) # for consistency, always return dataframe, whether or not logsums were requested @@ -170,6 +174,7 @@ def run_tour_mode_choice_simulate( trace_choice_name=trace_choice_name, trace_column_names=trace_column_names, estimator=estimator, + compute_settings=model_settings.compute_settings, ) return choices diff --git a/activitysim/abm/models/util/school_escort_tours_trips.py b/activitysim/abm/models/util/school_escort_tours_trips.py index d8b5386eb..e22a7198f 100644 --- a/activitysim/abm/models/util/school_escort_tours_trips.py +++ b/activitysim/abm/models/util/school_escort_tours_trips.py @@ -405,6 +405,19 @@ def merge_school_escort_trips_into_pipeline(state: workflow.State): tours = state.get_dataframe("tours") trips = state.get_dataframe("trips") + # checking to see if there are school escort trips to merge in + if len(school_escort_trips) == 0: + # if no trips, fill escorting columns with NA + trips[ + [ + "escort_participants", + "school_escort_direction", + "school_escort_trip_id", + ] + ] = pd.NA + state.add_table("trips", trips) + return trips + # want to remove stops if school escorting takes place on that half tour so we can replace them with the actual stops out_se_tours = tours[ tours["school_esc_outbound"].isin(["pure_escort", "ride_share"]) @@ -643,6 +656,10 @@ def force_escortee_tour_modes_to_match_chauffeur(state: workflow.State, tours): # Does it even matter if trip modes are getting matched later? escort_bundles = state.get_dataframe("escort_bundles") + if len(escort_bundles) == 0: + # do not need to do anything if no escorting + return tours + # grabbing the school tour ids for each school escort bundle se_tours = escort_bundles[["school_tour_ids", "chauf_tour_id"]].copy() # merging in chauffeur tour mode diff --git a/activitysim/abm/models/util/test/configs/cdap.yaml b/activitysim/abm/models/util/test/configs/cdap.yaml index f20d2979a..e2efdfa88 100644 --- a/activitysim/abm/models/util/test/configs/cdap.yaml +++ b/activitysim/abm/models/util/test/configs/cdap.yaml @@ -6,3 +6,6 @@ PERSON_TYPE_MAP: - 6 - 7 - 8 + +INDIV_AND_HHSIZE1_SPEC: cdap_indiv_and_hhsize1.csv +COEFFICIENTS: cdap_coefficients.csv \ No newline at end of file diff --git a/activitysim/abm/models/util/tour_destination.py b/activitysim/abm/models/util/tour_destination.py index fbc8113e2..22b0744da 100644 --- a/activitysim/abm/models/util/tour_destination.py +++ b/activitysim/abm/models/util/tour_destination.py @@ -123,6 +123,9 @@ def _destination_sample( chunk_tag=chunk_tag, trace_label=trace_label, zone_layer=zone_layer, + compute_settings=model_settings.compute_settings.subcomponent_settings( + "sample" + ), ) # if special person id is passed diff --git a/activitysim/abm/models/util/tour_frequency.py b/activitysim/abm/models/util/tour_frequency.py index eff354965..2709fa4b8 100644 --- a/activitysim/abm/models/util/tour_frequency.py +++ b/activitysim/abm/models/util/tour_frequency.py @@ -637,7 +637,7 @@ class JointTourFreqCompAlts(PydanticReadable): COMPOSITION: JointTourFreqCompContent -class JointTourFreqCompSettings(LogitComponentSettings): +class JointTourFreqCompSettings(LogitComponentSettings, extra="forbid"): """ Settings for joint tour frequency and composition. """ diff --git a/activitysim/abm/models/util/tour_od.py b/activitysim/abm/models/util/tour_od.py index 22ea4a310..89dc7fdc3 100644 --- a/activitysim/abm/models/util/tour_od.py +++ b/activitysim/abm/models/util/tour_od.py @@ -216,6 +216,9 @@ def _od_sample( chunk_tag=chunk_tag, trace_label=trace_label, zone_layer="taz", + compute_settings=model_settings.compute_settings.subcomponent_settings( + "sample" + ), ) return choices diff --git a/activitysim/abm/models/util/tour_scheduling.py b/activitysim/abm/models/util/tour_scheduling.py index f52d0db44..db003786f 100644 --- a/activitysim/abm/models/util/tour_scheduling.py +++ b/activitysim/abm/models/util/tour_scheduling.py @@ -16,11 +16,11 @@ def run_tour_scheduling( state: workflow.State, - model_name, - chooser_tours, - persons_merged, - tdd_alts, - tour_segment_col, + model_name: str, + chooser_tours: pd.DataFrame, + persons_merged: pd.DataFrame, + tdd_alts: pd.DataFrame, + tour_segment_col: str, ): trace_label = model_name model_settings_file_name = f"{model_name}.yaml" @@ -70,7 +70,7 @@ def run_tour_scheduling( # load segmented specs spec_segment_settings = model_settings.SPEC_SEGMENTS specs = {} - sharrow_skips = {} + compute_settings = {} for spec_segment_name, spec_settings in spec_segment_settings.items(): bundle_name = f"{model_name}_{spec_segment_name}" @@ -85,7 +85,9 @@ def run_tour_scheduling( specs[spec_segment_name] = simulate.eval_coefficients( state, model_spec, coefficients_df, estimator ) - sharrow_skips[spec_segment_name] = spec_settings.sharrow_skip + compute_settings[ + spec_segment_name + ] = spec_settings.compute_settings.subcomponent_settings(spec_segment_name) if estimator: estimators[spec_segment_name] = estimator # add to local list @@ -100,7 +102,7 @@ def run_tour_scheduling( tour_segments[tour_segment_name] = {} tour_segments[tour_segment_name]["spec_segment_name"] = spec_segment_name tour_segments[tour_segment_name]["spec"] = specs[spec_segment_name] - tour_segments[tour_segment_name]["sharrow_skip"] = sharrow_skips[ + tour_segments[tour_segment_name]["compute_settings"] = compute_settings[ spec_segment_name ] tour_segments[tour_segment_name]["estimator"] = estimators.get( @@ -123,7 +125,6 @@ def run_tour_scheduling( spec_file_name = model_settings.SPEC model_spec = state.filesystem.read_model_spec(file_name=spec_file_name) - sharrow_skip = model_settings.sharrow_skip coefficients_df = state.filesystem.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients( state, model_spec, coefficients_df, estimator @@ -139,7 +140,7 @@ def run_tour_scheduling( tour_segments = { "spec": model_spec, "estimator": estimator, - "sharrow_skip": sharrow_skip, + "compute_settings": model_settings.compute_settings, } if estimators: diff --git a/activitysim/abm/models/util/vectorize_tour_scheduling.py b/activitysim/abm/models/util/vectorize_tour_scheduling.py index 17425d75b..6bdc907bd 100644 --- a/activitysim/abm/models/util/vectorize_tour_scheduling.py +++ b/activitysim/abm/models/util/vectorize_tour_scheduling.py @@ -14,7 +14,7 @@ from activitysim.core import chunk, config, expressions, los, simulate from activitysim.core import timetable as tt from activitysim.core import tracing, workflow -from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable +from activitysim.core.configuration.base import ComputeSettings, PreprocessorSettings from activitysim.core.configuration.logit import LogitComponentSettings from activitysim.core.interaction_sample_simulate import interaction_sample_simulate from activitysim.core.util import reindex @@ -224,6 +224,7 @@ def _compute_logsums( locals_d=locals_dict, chunk_size=0, trace_label=trace_label, + compute_settings=model_settings.compute_settings, ) return logsums @@ -705,8 +706,8 @@ def _schedule_tours( tour_owner_id_col, estimator, tour_trace_label, - sharrow_skip=False, *, + compute_settings: ComputeSettings | None = None, chunk_sizer: chunk.ChunkSizer, ): """ @@ -821,11 +822,6 @@ def _schedule_tours( if constants is not None: locals_d.update(constants) - if sharrow_skip: - locals_d["_sharrow_skip"] = True - else: - locals_d["_sharrow_skip"] = False - if not RUN_ALTS_PREPROCESSOR_BEFORE_MERGE: # Note: Clint was running alts_preprocessor here on tdd_interaction_dataset instead of on raw (unmerged) alts # and he was using logsum_tour_purpose as selector, although logically it should be the spec_segment @@ -861,6 +857,7 @@ def _schedule_tours( chunk_size=0, trace_label=tour_trace_label, estimator=estimator, + compute_settings=compute_settings, ) chunk_sizer.log_df(tour_trace_label, "choices", choices) @@ -891,7 +888,7 @@ def schedule_tours( chunk_size, tour_trace_label, tour_chunk_tag, - sharrow_skip=False, + compute_settings: ComputeSettings | None = None, ): """ chunking wrapper for _schedule_tours @@ -949,7 +946,7 @@ def schedule_tours( tour_owner_id_col, estimator, tour_trace_label=chunk_trace_label, - sharrow_skip=sharrow_skip, + compute_settings=compute_settings, chunk_sizer=chunk_sizer, ) @@ -1102,7 +1099,7 @@ def vectorize_tour_scheduling( chunk_size=chunk_size, tour_trace_label=segment_trace_label, tour_chunk_tag=segment_chunk_tag, - sharrow_skip=tour_segment_info.get("sharrow_skip"), + compute_settings=tour_segment_info.get("compute_settings"), ) choice_list.append(choices) @@ -1132,7 +1129,7 @@ def vectorize_tour_scheduling( chunk_size=chunk_size, tour_trace_label=tour_trace_label, tour_chunk_tag=tour_chunk_tag, - sharrow_skip=tour_segments.get("sharrow_skip"), + compute_settings=tour_segments.get("compute_settings"), ) choice_list.append(choices) @@ -1152,7 +1149,7 @@ def vectorize_subtour_scheduling( estimator, chunk_size=0, trace_label=None, - sharrow_skip=False, + compute_settings: ComputeSettings | None = None, ): """ Like vectorize_tour_scheduling but specifically for atwork subtours @@ -1251,7 +1248,7 @@ def vectorize_subtour_scheduling( state.settings.chunk_size, tour_trace_label, tour_chunk_tag, - sharrow_skip=sharrow_skip, + compute_settings=compute_settings, ) choice_list.append(choices) @@ -1306,7 +1303,7 @@ def vectorize_joint_tour_scheduling( estimator, chunk_size=0, trace_label=None, - sharrow_skip=False, + compute_settings: ComputeSettings | None = None, ): """ Like vectorize_tour_scheduling but specifically for joint tours @@ -1399,7 +1396,7 @@ def vectorize_joint_tour_scheduling( chunk_size, tour_trace_label, tour_chunk_tag, - sharrow_skip=sharrow_skip, + compute_settings=compute_settings, ) # - update timetables of all joint tour participants diff --git a/activitysim/abm/models/vehicle_allocation.py b/activitysim/abm/models/vehicle_allocation.py index a341493ff..9dcaf8c71 100644 --- a/activitysim/abm/models/vehicle_allocation.py +++ b/activitysim/abm/models/vehicle_allocation.py @@ -247,6 +247,7 @@ def vehicle_allocation( trace_label=trace_label, trace_choice_name="vehicle_allocation", estimator=estimator, + compute_settings=model_settings.compute_settings, ) # matching alt names to choices diff --git a/activitysim/abm/models/vehicle_type_choice.py b/activitysim/abm/models/vehicle_type_choice.py index 813652459..cfe6fc398 100644 --- a/activitysim/abm/models/vehicle_type_choice.py +++ b/activitysim/abm/models/vehicle_type_choice.py @@ -485,6 +485,7 @@ def iterate_vehicle_type_choice( trace_choice_name="vehicle_type", estimator=estimator, explicit_chunk_size=model_settings.explicit_chunk, + compute_settings=model_settings.compute_settings, ) # otherwise, "simple simulation" should suffice, with a model spec that enumerates @@ -500,6 +501,7 @@ def iterate_vehicle_type_choice( trace_label=trace_label, trace_choice_name="vehicle_type", estimator=estimator, + compute_settings=model_settings.compute_settings, ) else: raise NotImplementedError(simulation_type) @@ -554,7 +556,7 @@ def iterate_vehicle_type_choice( return all_choices, all_choosers -class VehicleTypeChoiceSettings(LogitComponentSettings): +class VehicleTypeChoiceSettings(LogitComponentSettings, extra="forbid"): """ Settings for the `vehicle_type_choice` component. """ diff --git a/activitysim/abm/models/work_from_home.py b/activitysim/abm/models/work_from_home.py index 234302b70..8b96dafa1 100755 --- a/activitysim/abm/models/work_from_home.py +++ b/activitysim/abm/models/work_from_home.py @@ -50,9 +50,6 @@ class WorkFromHomeSettings(LogitComponentSettings, extra="forbid"): WORK_FROM_HOME_TARGET_PERCENT_TOLERANCE: float = None """Setting to set work from home target percent tolerance.""" - sharrow_skip: bool = False - """Setting to skip sharrow.""" - DEST_CHOICE_COLUMN_NAME: str = "workplace_zone_id" """Column name in persons dataframe to specify the workplace zone id. """ @@ -140,9 +137,6 @@ def work_from_home( state, model_spec, coefficients_df, estimator ) - if model_settings.sharrow_skip: - constants["disable_sharrow"] = True - choices = simulate.simple_simulate( state, choosers=choosers, @@ -152,6 +146,7 @@ def work_from_home( trace_label=trace_label, trace_choice_name="work_from_home", estimator=estimator, + compute_settings=model_settings.compute_settings, ) if iterations_target_percent is not None: diff --git a/activitysim/abm/tables/persons.py b/activitysim/abm/tables/persons.py index 8825ebe84..d5ab67fb5 100644 --- a/activitysim/abm/tables/persons.py +++ b/activitysim/abm/tables/persons.py @@ -99,7 +99,8 @@ def persons_merged( households, left_on="household_id", ) - if disaggregate_accessibility is not None and not disaggregate_accessibility.empty: + if state.is_table("disaggregate_accessibility"): + disaggregate_accessibility = state.get_table("disaggregate_accessibility") persons = simple_table_join( persons, disaggregate_accessibility, diff --git a/activitysim/core/configuration/base.py b/activitysim/core/configuration/base.py index 754865dc1..556dd9916 100644 --- a/activitysim/core/configuration/base.py +++ b/activitysim/core/configuration/base.py @@ -1,8 +1,10 @@ from __future__ import annotations +from contextlib import contextmanager from pathlib import Path from typing import Any, Literal, TypeVar, Union # noqa: F401 +import pandas as pd from pydantic import BaseModel as PydanticBase from activitysim.core import configuration @@ -126,3 +128,115 @@ class PreprocessorSettings(PydanticBase): number of merged tables as the memory requirements for the preprocessor will increase with each table. """ + + +class ComputeSettings(PydanticBase): + """ + Sharrow settings for a component. + """ + + sharrow_skip: bool | dict[str, bool] = False + """Skip sharrow when evaluating this component. + + This overrides the global sharrow setting, and is useful if you want to skip + sharrow for particular components, either because their specifications are + not compatible with sharrow or if the sharrow performance is known to be + poor on this component. + + When a component has multiple subcomponents, the `sharrow_skip` setting can be + a dictionary that maps the names of the subcomponents to boolean values. + For example, to skip sharrow for an OUTBOUND and OUTBOUND_COND subcomponent + but not the INBOUND subcomponent, use the following setting: + + ```yaml + sharrow_skip: + OUTBOUND: true + INBOUND: false + OUTBOUND_COND: true + ``` + + Alternatively, even for components with multiple subcomponents, the `sharrow_skip` + value can be a single boolean true or false, which will be used for all + subcomponents. + + """ + + fastmath: bool = True + """Use fastmath when evaluating this component with sharrow. + + The fastmath option can be used to speed up the evaluation of expressions in + this component's spec files, but it does so by making some simplifying + assumptions about the math, e.g. that neither inputs nor outputs of any + computations are NaN or Inf. This can lead to errors when the assumptions + are violated. If running in sharrow test mode generates errors, try turning + this setting off. + """ + + use_bottleneck: bool | None = None + """Use the bottleneck library with pandas.eval. + + Set to True or False to force the use of bottleneck or not. If set to None, + the current pandas option setting of `compute.use_bottleneck` will be used. + + See https://pandas.pydata.org/docs/reference/api/pandas.set_option.html + for more information.""" + + use_numexpr: bool | None = None + """Use the numexpr library with pandas.eval. + + Set to True or False to force the use of numexpr or not. If set to None, + the current pandas option setting of `compute.use_numexpr` will be used. + + See https://pandas.pydata.org/docs/reference/api/pandas.set_option.html + for more information. + """ + + use_numba: bool | None = None + """Use the numba library with pandas.eval. + + Set to True or False to force the use of numba or not. If set to None, + the current pandas option setting of `compute.use_numba` will be used. + + See https://pandas.pydata.org/docs/reference/api/pandas.set_option.html + for more information. + """ + + def should_skip(self, subcomponent: str) -> bool: + """Check if sharrow should be skipped for a particular subcomponent.""" + if isinstance(self.sharrow_skip, dict): + return self.sharrow_skip.get(subcomponent, False) + else: + return bool(self.sharrow_skip) + + @contextmanager + def pandas_option_context(self): + """Context manager to set pandas options for compute settings.""" + args = () + if self.use_bottleneck is not None: + args += ("compute.use_bottleneck", self.use_bottleneck) + if self.use_numexpr is not None: + args += ("compute.use_numexpr", self.use_numexpr) + if self.use_numba is not None: + args += ("compute.use_numba", self.use_numba) + if args: + with pd.option_context(*args): + yield + else: + yield + + def subcomponent_settings(self, subcomponent: str) -> ComputeSettings: + """Get the sharrow settings for a particular subcomponent.""" + return ComputeSettings( + sharrow_skip=self.should_skip(subcomponent), + fastmath=self.fastmath, + use_bottleneck=self.use_bottleneck, + use_numexpr=self.use_numexpr, + use_numba=self.use_numba, + ) + + +class PydanticCompute(PydanticReadable): + """Base class for component settings that include optional sharrow controls.""" + + compute_settings: ComputeSettings = ComputeSettings() + """Sharrow settings for this component.""" diff --git a/activitysim/core/configuration/logit.py b/activitysim/core/configuration/logit.py index a7f507aeb..9c93d9f97 100644 --- a/activitysim/core/configuration/logit.py +++ b/activitysim/core/configuration/logit.py @@ -1,12 +1,14 @@ from __future__ import annotations +import warnings from pathlib import Path from typing import Any, Literal +import pydantic from pydantic import BaseModel as PydanticBase -from pydantic import validator +from pydantic import model_validator, validator -from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable +from activitysim.core.configuration.base import PreprocessorSettings, PydanticCompute class LogitNestSpec(PydanticBase): @@ -43,7 +45,7 @@ def prefer_float_to_str(cls, coefficient_value): return coefficient_value -class BaseLogitComponentSettings(PydanticReadable): +class BaseLogitComponentSettings(PydanticCompute): """ Base configuration class for components that are logit models. @@ -75,8 +77,35 @@ class BaseLogitComponentSettings(PydanticReadable): CONSTANTS: dict[str, Any] = {} """Named constants usable in the utility expressions.""" - sharrow_skip: bool = False - """Skip sharrow when evaluating this component.""" + # sharrow_skip is deprecated in factor of compute_settings.sharrow_skip + @model_validator(mode="before") + @classmethod + def update_sharrow_skip(cls, data: Any) -> Any: + if isinstance(data, dict): + if "sharrow_skip" in data: + if "compute_settings" not in data: + # move to new format + data["compute_settings"] = {"sharrow_skip": data["sharrow_skip"]} + del data["sharrow_skip"] + warnings.warn( + "sharrow_skip is deprecated in favor of compute_settings.sharrow_skip", + DeprecationWarning, + ) + elif ( + isinstance(data["compute_settings"], dict) + and "sharrow_skip" not in data["compute_settings"] + ): + data["compute_settings"]["sharrow_skip"] = data["sharrow_skip"] + del data["sharrow_skip"] + warnings.warn( + "sharrow_skip is deprecated in favor of compute_settings.skip", + DeprecationWarning, + ) + elif "sharrow_skip" in data["compute_settings"]: + raise ValueError( + "sharrow_skip and compute_settings.sharrow_skip cannot both be defined" + ) + return data class LogitComponentSettings(BaseLogitComponentSettings): @@ -134,7 +163,7 @@ def nests_are_for_nl(cls, nests, values): return nests -class TemplatedLogitComponentSettings(LogitComponentSettings): +class TemplatedLogitComponentSettings(LogitComponentSettings, extra="forbid"): """ Base configuration for segmented logit models with a coefficient template. """ @@ -183,7 +212,7 @@ class TourLocationComponentSettings(LocationComponentSettings, extra="forbid"): DEST_CHOICE_SAMPLE_TABLE_NAME: str | None = None CHOOSER_TABLE_NAME: str | None = None CHOOSER_SEGMENT_COLUMN_NAME: str | None = None - SEGMENT_IDS: dict[str, int] | None = None + SEGMENT_IDS: dict[str, int] | dict[str, str] | dict[str, bool] | None = None SHADOW_PRICE_TABLE: str | None = None MODELED_SIZE_TABLE: str | None = None annotate_persons: PreprocessorSettings | None = None @@ -191,7 +220,7 @@ class TourLocationComponentSettings(LocationComponentSettings, extra="forbid"): SIMULATE_CHOOSER_COLUMNS: list[str] | None = None ALT_DEST_COL_NAME: str LOGSUM_TOUR_PURPOSE: str | dict[str, str] | None = None - MODEL_SELECTOR: Literal["workplace", "school", None] = None + MODEL_SELECTOR: str | None = None SAVED_SHADOW_PRICE_TABLE_NAME: str | None = None CHOOSER_ID_COLUMN: str = "person_id" @@ -203,7 +232,7 @@ class TourLocationComponentSettings(LocationComponentSettings, extra="forbid"): The number of alternatives to sample for estimation mode. If zero, then all alternatives are used. Truth alternative will be included in the sample. - Larch does not yet support sampling alternatives for estimation, + Larch does not yet support sampling alternatives for estimation, but this setting is still helpful for estimation mode runtime. """ diff --git a/activitysim/core/flow.py b/activitysim/core/flow.py index 92429e7d4..39f706a6c 100644 --- a/activitysim/core/flow.py +++ b/activitysim/core/flow.py @@ -16,6 +16,7 @@ import activitysim.core.skim_dataset # noqa: F401 from activitysim import __version__ from activitysim.core import tracing, workflow +from activitysim.core.configuration.base import ComputeSettings from activitysim.core.simulate_consts import SPEC_EXPRESSION_NAME, SPEC_LABEL_NAME from activitysim.core.timetable import ( sharrow_tt_adjacent_window_after, @@ -142,6 +143,7 @@ def get_flow( choosers=None, interacts=None, zone_layer=None, + compute_settings: ComputeSettings | None = None, ): extra_vars = only_simple(local_d) orig_col_name = local_d.get("orig_col_name", None) @@ -184,6 +186,7 @@ def get_flow( zone_layer=zone_layer, aux_vars=aux_vars, primary_origin_col_name=primary_origin_col_name, + compute_settings=compute_settings, ) flow.tree.aux_vars = aux_vars return flow @@ -465,6 +468,7 @@ def new_flow( zone_layer=None, aux_vars=None, primary_origin_col_name=None, + compute_settings: ComputeSettings | None = None, ): """ Setup a new sharrow flow. @@ -516,12 +520,15 @@ def new_flow( aux_vars : Mapping Extra values that are available to expressions and which are written only by reference into compiled code (and thus can be changed later). + compute_settings : ComputeSettings, optional + Settings for the sharrow flow. Returns ------- sharrow.Flow """ - + if compute_settings is None: + compute_settings = ComputeSettings() with logtime(f"setting up flow {trace_label}"): if choosers is None: chooser_cols = [] @@ -700,6 +707,7 @@ def _apply_filter(_dataset, renames: list): extra_hash_data=extra_hash_data, hashing_level=0, boundscheck=False, + fastmath=compute_settings.fastmath, ) @@ -750,6 +758,7 @@ def apply_flow( required=False, interacts=None, zone_layer=None, + compute_settings: ComputeSettings | None = None, ): """ Apply a sharrow flow. @@ -779,6 +788,8 @@ def apply_flow( Specify which zone layer of the skims is to be used. You cannot use the 'maz' zone layer in a one-zone model, but you can use the 'taz' layer in a two- or three-zone model (e.g. for destination pre-sampling). + compute_settings : ComputeSettings, optional + Settings for the sharrow flow, including for skipping and fastmath. Returns ------- @@ -807,6 +818,7 @@ def apply_flow( choosers=choosers, interacts=interacts, zone_layer=zone_layer, + compute_settings=compute_settings, ) except ValueError as err: if "unable to rewrite" in str(err): diff --git a/activitysim/core/interaction_sample.py b/activitysim/core/interaction_sample.py index 33f498029..27ba5ca74 100644 --- a/activitysim/core/interaction_sample.py +++ b/activitysim/core/interaction_sample.py @@ -16,6 +16,7 @@ workflow, util, ) +from activitysim.core.configuration.base import ComputeSettings from activitysim.core.skim_dataset import DatasetWrapper from activitysim.core.skim_dictionary import SkimWrapper @@ -133,6 +134,7 @@ def _interaction_sample( trace_label=None, zone_layer=None, chunk_sizer=None, + compute_settings: ComputeSettings | None = None, ): """ Run a MNL simulation in the situation in which alternatives must @@ -179,6 +181,9 @@ def _interaction_sample( 'maz' zone layer in a one-zone model, but you can use the 'taz' layer in a two- or three-zone model (e.g. for destination pre-sampling). + compute_settings : ComputeSettings, optional + Settings to use if compiling with sharrow + Returns ------- choices_df : pandas.DataFrame @@ -224,6 +229,10 @@ def _interaction_sample( chooser_index_id = interaction_simulate.ALT_CHOOSER_ID if log_alt_losers else None sharrow_enabled = state.settings.sharrow + if compute_settings is None: + compute_settings = ComputeSettings() + if compute_settings.sharrow_skip: + sharrow_enabled = False # - cross join choosers and alternatives (cartesian product) # for every chooser, there will be a row for each alternative @@ -262,6 +271,7 @@ def _interaction_sample( log_alt_losers=log_alt_losers, extra_data=alternatives, zone_layer=zone_layer, + compute_settings=compute_settings, ) chunk_sizer.log_df(trace_label, "interaction_utilities", interaction_utilities) if sharrow_enabled == "test" or True: @@ -318,6 +328,7 @@ def _interaction_sample( estimator=None, log_alt_losers=log_alt_losers, zone_layer=zone_layer, + compute_settings=ComputeSettings(sharrow_skip=True), ) chunk_sizer.log_df(trace_label, "interaction_utilities", interaction_utilities) @@ -537,6 +548,7 @@ def interaction_sample( chunk_tag: str | None = None, trace_label: str | None = None, zone_layer: str | None = None, + compute_settings: ComputeSettings | None = None, ): """ Run a simulation in the situation in which alternatives must @@ -632,6 +644,7 @@ def interaction_sample( trace_label=chunk_trace_label, zone_layer=zone_layer, chunk_sizer=chunk_sizer, + compute_settings=compute_settings, ) if choices.shape[0] > 0: diff --git a/activitysim/core/interaction_sample_simulate.py b/activitysim/core/interaction_sample_simulate.py index bee7c79a7..e27716dfb 100644 --- a/activitysim/core/interaction_sample_simulate.py +++ b/activitysim/core/interaction_sample_simulate.py @@ -7,7 +7,9 @@ import numpy as np import pandas as pd + from activitysim.core import chunk, interaction_simulate, logit, tracing, workflow, util +from activitysim.core.configuration.base import ComputeSettings from activitysim.core.simulate import set_skim_wrapper_targets logger = logging.getLogger(__name__) @@ -31,6 +33,7 @@ def _interaction_sample_simulate( skip_choice=False, *, chunk_sizer: chunk.ChunkSizer, + compute_settings: ComputeSettings | None = None, ): """ Run a MNL simulation in the situation in which alternatives must @@ -197,6 +200,7 @@ def _interaction_sample_simulate( trace_rows, estimator=estimator, log_alt_losers=log_alt_losers, + compute_settings=compute_settings, ) chunk_sizer.log_df(trace_label, "interaction_utilities", interaction_utilities) @@ -391,6 +395,8 @@ def interaction_sample_simulate( trace_choice_name=None, estimator=None, skip_choice=False, + *, + compute_settings: ComputeSettings | None = None, ): """ Run a simulation in the situation in which alternatives must @@ -480,6 +486,7 @@ def interaction_sample_simulate( estimator, skip_choice, chunk_sizer=chunk_sizer, + compute_settings=compute_settings, ) result_list.append(choices) diff --git a/activitysim/core/interaction_simulate.py b/activitysim/core/interaction_simulate.py index dedc6759f..fbd375de6 100644 --- a/activitysim/core/interaction_simulate.py +++ b/activitysim/core/interaction_simulate.py @@ -14,6 +14,7 @@ from . import chunk, config, logit, simulate, tracing, workflow from activitysim.core import util +from .configuration.base import ComputeSettings logger = logging.getLogger(__name__) @@ -33,6 +34,7 @@ def eval_interaction_utilities( log_alt_losers=False, extra_data=None, zone_layer=None, + compute_settings: ComputeSettings | None = None, ): """ Compute the utilities for a single-alternative spec evaluated in the context of df @@ -83,13 +85,11 @@ def eval_interaction_utilities( logger.info("Running eval_interaction_utilities on %s rows" % df.shape[0]) sharrow_enabled = state.settings.sharrow - - if locals_d is not None and locals_d.get("_sharrow_skip", False): + if compute_settings is None: + compute_settings = ComputeSettings() + if compute_settings.sharrow_skip: sharrow_enabled = False - # if trace_label.startswith("trip_destination"): - # sharrow_enabled = False - logger.info(f"{trace_label} sharrow_enabled is {sharrow_enabled}") trace_eval_results = None @@ -185,6 +185,7 @@ def replace_in_index_level(mi, level, *repls): trace_label, interacts=extra_data, zone_layer=zone_layer, + compute_settings=compute_settings, ) if sh_util is not None: chunk_sizer.log_df(trace_label, "sh_util", sh_util) @@ -262,107 +263,112 @@ def to_series(x): exprs = spec.index labels = spec.index - for expr, label, coefficient in zip(exprs, labels, spec.iloc[:, 0]): - try: - # - allow temps of form _od_DIST@od_skim['DIST'] - if expr.startswith("_"): - target = expr[: expr.index("@")] - rhs = expr[expr.index("@") + 1 :] - v = to_series(eval(rhs, globals(), locals_d)) - - # update locals to allows us to ref previously assigned targets - locals_d[target] = v - chunk_sizer.log_df( - trace_label, target, v - ) # track temps stored in locals - - if trace_eval_results is not None: - trace_eval_results[expr] = v[trace_rows] + with compute_settings.pandas_option_context(): + for expr, label, coefficient in zip(exprs, labels, spec.iloc[:, 0]): + try: + # - allow temps of form _od_DIST@od_skim['DIST'] + if expr.startswith("_"): + target = expr[: expr.index("@")] + rhs = expr[expr.index("@") + 1 :] + v = to_series(eval(rhs, globals(), locals_d)) - # don't add temps to utility sums - # they have a non-zero dummy coefficient to avoid being removed from spec as NOPs - continue + # update locals to allows us to ref previously assigned targets + locals_d[target] = v + chunk_sizer.log_df( + trace_label, target, v + ) # track temps stored in locals - if expr.startswith("@"): - v = to_series(eval(expr[1:], globals(), locals_d)) - else: - v = df.eval(expr, resolvers=[locals_d]) + if trace_eval_results is not None: + trace_eval_results[expr] = v[trace_rows] - if check_for_variability and v.std() == 0: - logger.info( - "%s: no variability (%s) in: %s" - % (trace_label, v.iloc[0], expr) - ) - no_variability += 1 - - # FIXME - how likely is this to happen? Not sure it is really a problem? - if ( - check_for_variability - and np.count_nonzero(v.isnull().values) > 0 - ): - logger.info("%s: missing values in: %s" % (trace_label, expr)) - has_missing_vals += 1 - - if estimator: - # in case we modified expression_values_df index - expression_values_df.insert( - loc=len(expression_values_df.columns), - column=label, - value=v.values if isinstance(v, pd.Series) else v, - ) + # don't add temps to utility sums + # they have a non-zero dummy coefficient to avoid being removed from spec as NOPs + continue - utility = (v * coefficient).astype("float") + if expr.startswith("@"): + v = to_series(eval(expr[1:], globals(), locals_d)) + else: + v = df.eval(expr, resolvers=[locals_d]) - if log_alt_losers: - assert ALT_CHOOSER_ID in df - max_utils_by_chooser = utility.groupby(df[ALT_CHOOSER_ID]).max() + if check_for_variability and v.std() == 0: + logger.info( + "%s: no variability (%s) in: %s" + % (trace_label, v.iloc[0], expr) + ) + no_variability += 1 - if (max_utils_by_chooser < simulate.ALT_LOSER_UTIL).any(): - losers = max_utils_by_chooser[ - max_utils_by_chooser < simulate.ALT_LOSER_UTIL - ] - logger.warning( - f"{trace_label} - {len(losers)} choosers of {len(max_utils_by_chooser)} " - f"with prohibitive utilities for all alternatives for expression: {expr}" + # FIXME - how likely is this to happen? Not sure it is really a problem? + if ( + check_for_variability + and np.count_nonzero(v.isnull().values) > 0 + ): + logger.info( + "%s: missing values in: %s" % (trace_label, expr) + ) + has_missing_vals += 1 + + if estimator: + # in case we modified expression_values_df index + expression_values_df.insert( + loc=len(expression_values_df.columns), + column=label, + value=v.values if isinstance(v, pd.Series) else v, ) - # loser_df = df[df[ALT_CHOOSER_ID].isin(losers.index)] - # print(f"\nloser_df\n{loser_df}\n") - # print(f"\nloser_max_utils_by_chooser\n{losers}\n") - # bug + utility = (v * coefficient).astype("float") - del max_utils_by_chooser + if log_alt_losers: + assert ALT_CHOOSER_ID in df + max_utils_by_chooser = utility.groupby( + df[ALT_CHOOSER_ID] + ).max() - utilities.utility.values[:] += utility + if (max_utils_by_chooser < simulate.ALT_LOSER_UTIL).any(): + losers = max_utils_by_chooser[ + max_utils_by_chooser < simulate.ALT_LOSER_UTIL + ] + logger.warning( + f"{trace_label} - {len(losers)} choosers of {len(max_utils_by_chooser)} " + f"with prohibitive utilities for all alternatives for expression: {expr}" + ) - if trace_eval_results is not None: - # expressions should have been uniquified when spec was read - # (though we could do it here if need be...) - # expr = assign.uniquify_key(trace_eval_results, expr, template="{} # ({})") - assert expr not in trace_eval_results + # loser_df = df[df[ALT_CHOOSER_ID].isin(losers.index)] + # print(f"\nloser_df\n{loser_df}\n") + # print(f"\nloser_max_utils_by_chooser\n{losers}\n") + # bug - trace_eval_results[expr] = v[trace_rows] - k = "partial utility (coefficient = %s) for %s" % ( - coefficient, - expr, - ) - trace_eval_results[k] = v[trace_rows] * coefficient + del max_utils_by_chooser - del v - # chunk_sizer.log_df(trace_label, 'v', None) + utilities.utility.values[:] += utility - except Exception as err: - logger.exception( - f"{trace_label} - {type(err).__name__} ({str(err)}) evaluating: {str(expr)}" - ) - if isinstance( - err, AssertionError - ) and "od pairs not in skim" in str(err): - logger.warning( - f"recode_pipeline_columns is set to {state.settings.recode_pipeline_columns}, " - f"you may want to check this" + if trace_eval_results is not None: + # expressions should have been uniquified when spec was read + # (though we could do it here if need be...) + # expr = assign.uniquify_key(trace_eval_results, expr, template="{} # ({})") + assert expr not in trace_eval_results + + trace_eval_results[expr] = v[trace_rows] + k = "partial utility (coefficient = %s) for %s" % ( + coefficient, + expr, + ) + trace_eval_results[k] = v[trace_rows] * coefficient + + del v + # chunk_sizer.log_df(trace_label, 'v', None) + + except Exception as err: + logger.exception( + f"{trace_label} - {type(err).__name__} ({str(err)}) evaluating: {str(expr)}" ) - raise err + if isinstance( + err, AssertionError + ) and "od pairs not in skim" in str(err): + logger.warning( + f"recode_pipeline_columns is set to {state.settings.recode_pipeline_columns}, " + f"you may want to check this" + ) + raise err if estimator: estimator.log( @@ -566,11 +572,13 @@ def to_series(x): retrace_eval_data_ = pd.concat(retrace_eval_data, axis=1) retrace_eval_parts_ = pd.concat(retrace_eval_parts, axis=1) - re_sh_flow_load = sh_flow.load( - dtype=np.float32, - ) + re_sh_flow_load = sh_flow.load(sh_tree, dtype=np.float32) re_sh_flow_load_ = re_sh_flow_load[re_trace] + use_bottleneck = pd.get_option("compute.use_bottleneck") + use_numexpr = pd.get_option("compute.use_numexpr") + use_numba = pd.get_option("compute.use_numba") + look_for_problems_here = np.where( ~np.isclose( re_sh_flow_load_[ @@ -607,6 +615,7 @@ def _interaction_simulate( log_alt_losers=False, estimator=None, chunk_sizer=None, + compute_settings: ComputeSettings | None = None, ): """ Run a MNL simulation in the situation in which alternatives must @@ -694,9 +703,14 @@ def _interaction_simulate( alt_index_id = estimator.get_alt_id() if estimator else None chooser_index_id = ALT_CHOOSER_ID if log_alt_losers else None - sharrow_enabled = state.settings.sharrow + if compute_settings is None: + compute_settings = ComputeSettings() + if compute_settings.sharrow_skip: + sharrow_enabled = False + else: + sharrow_enabled = state.settings.sharrow interaction_utilities = None - + # drop variables before the interaction dataframe is created # check if tracing is enabled and if we have trace targets @@ -711,9 +725,6 @@ def _interaction_simulate( sharrow_enabled=sharrow_enabled, ) - if locals_d is not None and locals_d.get("_sharrow_skip", False): - sharrow_enabled = False - if ( sharrow_enabled and skims is None @@ -736,6 +747,7 @@ def _interaction_simulate( estimator=estimator, log_alt_losers=log_alt_losers, extra_data=alternatives, + compute_settings=compute_settings, ) # set this index here as this is how later code extracts the chosen alt id's @@ -800,6 +812,7 @@ def _interaction_simulate( trace_rows, estimator=estimator, log_alt_losers=log_alt_losers, + compute_settings=compute_settings, ) chunk_sizer.log_df(trace_label, "interaction_utilities", interaction_utilities) # mem.trace_memory_info(f"{trace_label}.init interaction_utilities", force_garbage_collect=True) @@ -907,6 +920,7 @@ def interaction_simulate( trace_choice_name=None, estimator=None, explicit_chunk_size=0, + compute_settings: ComputeSettings | None = None, ): """ Run a simulation in the situation in which alternatives must @@ -984,6 +998,7 @@ def interaction_simulate( log_alt_losers=log_alt_losers, estimator=estimator, chunk_sizer=chunk_sizer, + compute_settings=compute_settings, ) result_list.append(choices) diff --git a/activitysim/core/pathbuilder.py b/activitysim/core/pathbuilder.py index 266a99832..31393ceea 100644 --- a/activitysim/core/pathbuilder.py +++ b/activitysim/core/pathbuilder.py @@ -20,6 +20,7 @@ tracing, workflow, ) +from activitysim.core.configuration.base import ComputeSettings from activitysim.core.pathbuilder_cache import memo from activitysim.core.util import reindex @@ -56,7 +57,7 @@ def compute_utilities( f"{trace_label} Running compute_utilities with {choosers.shape[0]} choosers" ) - locals_dict = {"np": np, "los": network_los, "disable_sharrow": True} + locals_dict = {"np": np, "los": network_los} locals_dict.update(model_constants) # we don't grok coefficients, but allow them to use constants in spec alt columns @@ -88,6 +89,7 @@ def compute_utilities( trace_label=trace_label, trace_column_names=trace_column_names, chunk_sizer=chunk_sizer, + compute_settings=ComputeSettings(sharrow_skip=True), ) return utilities diff --git a/activitysim/core/simulate.py b/activitysim/core/simulate.py index db67687c6..0a3e598c3 100644 --- a/activitysim/core/simulate.py +++ b/activitysim/core/simulate.py @@ -25,7 +25,7 @@ util, workflow, ) -from activitysim.core.configuration.base import PydanticBase +from activitysim.core.configuration.base import ComputeSettings, PydanticBase from activitysim.core.configuration.logit import ( BaseLogitComponentSettings, LogitNestSpec, @@ -57,7 +57,7 @@ def random_rows(state: workflow.State, df, n): return df -def uniquify_spec_index(spec): +def uniquify_spec_index(spec: pd.DataFrame): # uniquify spec index inplace # ensure uniqueness of spec index by appending comment with dupe count # this allows us to use pandas dot to compute_utilities @@ -532,6 +532,7 @@ def eval_utilities( spec_sh=None, *, chunk_sizer, + compute_settings: ComputeSettings | None = None, ): """ Evaluate a utility function as defined in a spec file. @@ -571,6 +572,8 @@ def eval_utilities( This is meant to give the same result, but allows for some optimizations or preprocessing outside the sharrow framework (e.g. to run the Python based transit virtual path builder and cache relevant values). + compute_settings : ComputeSettings, optional + Settings for sharrow. If not given, the default settings are used. Returns ------- @@ -592,7 +595,9 @@ def eval_utilities( if spec_sh is None: spec_sh = spec - if locals_d is not None and "disable_sharrow" in locals_d: + if compute_settings is None: + compute_settings = ComputeSettings() + if compute_settings.sharrow_skip: sharrow_enabled = False if sharrow_enabled: @@ -610,6 +615,7 @@ def eval_utilities( trace_label, sharrow_enabled == "require", zone_layer=zone_layer, + compute_settings=compute_settings, ) utilities = sh_util timelogger.mark("sharrow flow", True, logger, trace_label) @@ -641,42 +647,43 @@ def eval_utilities( chunk_sizer.log_df(trace_label, "expression_values", expression_values) i = 0 - for expr, coefficients in zip(exprs, spec.values): - try: - with warnings.catch_warnings(record=True) as w: - # Cause all warnings to always be triggered. - warnings.simplefilter("always") - if expr.startswith("@"): - expression_value = eval(expr[1:], globals_dict, locals_dict) - else: - expression_value = choosers.eval(expr) - - if len(w) > 0: - for wrn in w: - logger.warning( - f"{trace_label} - {type(wrn).__name__} ({wrn.message}) evaluating: {str(expr)}" - ) - - except Exception as err: - logger.exception( - f"{trace_label} - {type(err).__name__} ({str(err)}) evaluating: {str(expr)}" - ) - raise err - - if log_alt_losers: - # utils for each alt for this expression - # FIXME if we always did tis, we cold uem these and skip np.dot below - utils = np.outer(expression_value, coefficients) - losers = np.amax(utils, axis=1) < ALT_LOSER_UTIL - - if losers.any(): - logger.warning( - f"{trace_label} - {sum(losers)} choosers of {len(losers)} " - f"with prohibitive utilities for all alternatives for expression: {expr}" + with compute_settings.pandas_option_context(): + for expr, coefficients in zip(exprs, spec.values): + try: + with warnings.catch_warnings(record=True) as w: + # Cause all warnings to always be triggered. + warnings.simplefilter("always") + if expr.startswith("@"): + expression_value = eval(expr[1:], globals_dict, locals_dict) + else: + expression_value = choosers.eval(expr) + + if len(w) > 0: + for wrn in w: + logger.warning( + f"{trace_label} - {type(wrn).__name__} ({wrn.message}) evaluating: {str(expr)}" + ) + + except Exception as err: + logger.exception( + f"{trace_label} - {type(err).__name__} ({str(err)}) evaluating: {str(expr)}" ) + raise err + + if log_alt_losers: + # utils for each alt for this expression + # FIXME if we always did tis, we cold uem these and skip np.dot below + utils = np.outer(expression_value, coefficients) + losers = np.amax(utils, axis=1) < ALT_LOSER_UTIL + + if losers.any(): + logger.warning( + f"{trace_label} - {sum(losers)} choosers of {len(losers)} " + f"with prohibitive utilities for all alternatives for expression: {expr}" + ) - expression_values[i] = expression_value - i += 1 + expression_values[i] = expression_value + i += 1 chunk_sizer.log_df(trace_label, "expression_values", expression_values) @@ -1157,6 +1164,7 @@ def eval_mnl( trace_column_names=None, *, chunk_sizer, + compute_settings: ComputeSettings | None = None, ): """ Run a simulation for when the model spec does not involve alternative @@ -1220,6 +1228,7 @@ def eval_mnl( estimator=estimator, trace_column_names=trace_column_names, chunk_sizer=chunk_sizer, + compute_settings=compute_settings, ) chunk_sizer.log_df(trace_label, "utilities", utilities) @@ -1278,6 +1287,7 @@ def eval_nl( trace_column_names=None, *, chunk_sizer: chunk.ChunkSizer, + compute_settings: ComputeSettings | None = None, ): """ Run a nested-logit simulation for when the model spec does not involve alternative @@ -1308,6 +1318,8 @@ def eval_nl( This is the column label to be used in trace file csv dump of choices trace_column_names: str or list of str chooser columns to include when tracing expression_values + fastmath : bool, default True + Use fastmath for sharrow compiled code. Returns ------- @@ -1339,6 +1351,7 @@ def eval_nl( trace_column_names=trace_column_names, spec_sh=spec_sh, chunk_sizer=chunk_sizer, + compute_settings=compute_settings, ) chunk_sizer.log_df(trace_label, "raw_utilities", raw_utilities) @@ -1465,6 +1478,7 @@ def _simple_simulate( trace_column_names=None, *, chunk_sizer, + compute_settings: ComputeSettings | None = None, ): """ Run an MNL or NL simulation for when the model spec does not involve alternative @@ -1545,6 +1559,7 @@ def _simple_simulate( trace_choice_name=trace_choice_name, trace_column_names=trace_column_names, chunk_sizer=chunk_sizer, + compute_settings=compute_settings, ) else: choices = eval_nl( @@ -1561,6 +1576,7 @@ def _simple_simulate( trace_choice_name=trace_choice_name, trace_column_names=trace_column_names, chunk_sizer=chunk_sizer, + compute_settings=compute_settings, ) return choices @@ -1599,6 +1615,7 @@ def simple_simulate( trace_label=None, trace_choice_name=None, trace_column_names=None, + compute_settings: ComputeSettings | None = None, ): """ Run an MNL or NL simulation for when the model spec does not involve alternative @@ -1633,6 +1650,7 @@ def simple_simulate( trace_choice_name=trace_choice_name, trace_column_names=trace_column_names, chunk_sizer=chunk_sizer, + compute_settings=compute_settings, ) result_list.append(choices) @@ -1660,6 +1678,7 @@ def simple_simulate_by_chunk_id( estimator=None, trace_label=None, trace_choice_name=None, + compute_settings: ComputeSettings | None = None, ): """ chunk_by_chunk_id wrapper for simple_simulate @@ -1686,6 +1705,7 @@ def simple_simulate_by_chunk_id( trace_label=chunk_trace_label, trace_choice_name=trace_choice_name, chunk_sizer=chunk_sizer, + compute_settings=compute_settings, ) result_list.append(choices) @@ -1699,7 +1719,14 @@ def simple_simulate_by_chunk_id( def eval_mnl_logsums( - state: workflow.State, choosers, spec, locals_d, trace_label=None, *, chunk_sizer + state: workflow.State, + choosers, + spec, + locals_d, + trace_label=None, + *, + chunk_sizer, + compute_settings: ComputeSettings | None = None, ): """ like eval_nl except return logsums instead of making choices @@ -1729,6 +1756,7 @@ def eval_mnl_logsums( trace_label, have_trace_targets, chunk_sizer=chunk_sizer, + compute_settings=compute_settings, ) chunk_sizer.log_df(trace_label, "utilities", utilities) @@ -1842,6 +1870,7 @@ def eval_nl_logsums( trace_label=None, *, chunk_sizer: chunk.ChunkSizer, + compute_settings: ComputeSettings | None = None, ): """ like eval_nl except return logsums instead of making choices @@ -1872,6 +1901,7 @@ def eval_nl_logsums( have_trace_targets=have_trace_targets, spec_sh=spec_sh, chunk_sizer=chunk_sizer, + compute_settings=compute_settings, ) chunk_sizer.log_df(trace_label, "raw_utilities", raw_utilities) @@ -1922,6 +1952,7 @@ def _simple_simulate_logsums( trace_label=None, *, chunk_sizer, + compute_settings: ComputeSettings | None = None, ): """ like simple_simulate except return logsums instead of making choices @@ -1957,6 +1988,7 @@ def _simple_simulate_logsums( locals_d, trace_label=trace_label, chunk_sizer=chunk_sizer, + compute_settings=compute_settings, ) else: logsums = eval_nl_logsums( @@ -1967,6 +1999,7 @@ def _simple_simulate_logsums( locals_d, trace_label=trace_label, chunk_sizer=chunk_sizer, + compute_settings=compute_settings, ) return logsums @@ -1983,6 +2016,7 @@ def simple_simulate_logsums( chunk_size=0, trace_label=None, chunk_tag=None, + compute_settings: ComputeSettings | None = None, ): """ like simple_simulate except return logsums instead of making choices @@ -2015,6 +2049,7 @@ def simple_simulate_logsums( locals_d, chunk_trace_label, chunk_sizer=chunk_sizer, + compute_settings=compute_settings, ) result_list.append(logsums) diff --git a/activitysim/examples/prototype_arc/configs/auto_ownership.yaml b/activitysim/examples/prototype_arc/configs/auto_ownership.yaml index 9e85945f9..09540f154 100644 --- a/activitysim/examples/prototype_arc/configs/auto_ownership.yaml +++ b/activitysim/examples/prototype_arc/configs/auto_ownership.yaml @@ -17,8 +17,3 @@ NESTS: SPEC: auto_ownership.csv COEFFICIENTS: auto_ownership_coeffs.csv - -LOGSUM_CHOOSER_COLUMNS: - - num_drivers - - num_workers - \ No newline at end of file diff --git a/activitysim/examples/prototype_mtc_extended/configs/school_escorting.yaml b/activitysim/examples/prototype_mtc_extended/configs/school_escorting.yaml index 31527cb1d..ff04d214e 100644 --- a/activitysim/examples/prototype_mtc_extended/configs/school_escorting.yaml +++ b/activitysim/examples/prototype_mtc_extended/configs/school_escorting.yaml @@ -1,10 +1,8 @@ -# The school escort model as written in this prototype is not -# compatible with sharrow, so "sharrow_skip" must be activated here. -# Currently the spec file has a few lines that evaluate differently in -# the sharrow implementation, resulting in failure that are flagged by -# the `test` mode. Once these are fixed (and string comparisons are -# minimized for performance) this `sharrow_skip` setting can be removed. -sharrow_skip: true +# Some data values in the spec file will refer to missing values stored +# as NaN in the data. This requires the `sharrow_fastmath` setting to +# be set to `false` to avoid errors in the sharrow implementation. +compute_settings: + fastmath: false OUTBOUND_SPEC: school_escorting_outbound.csv OUTBOUND_COEFFICIENTS: school_escorting_coefficients_outbound.csv diff --git a/activitysim/examples/prototype_mtc_extended/configs/vehicle_type_choice.yaml b/activitysim/examples/prototype_mtc_extended/configs/vehicle_type_choice.yaml index d0b9a91f6..38ceca185 100644 --- a/activitysim/examples/prototype_mtc_extended/configs/vehicle_type_choice.yaml +++ b/activitysim/examples/prototype_mtc_extended/configs/vehicle_type_choice.yaml @@ -2,7 +2,7 @@ SPEC: vehicle_type_choice_op4.csv COEFFICIENTS: vehicle_type_choice_op4_coefficients.csv -ALTS: vehicle_type_choice_op4_alternatives.csv +#ALTS: vehicle_type_choice_op4_alternatives.csv # SPEC: vehicle_type_choice_op2.csv # COEFFICIENTS: vehicle_type_choice_op2_coefficients.csv diff --git a/docs/dev-guide/using-sharrow.md b/docs/dev-guide/using-sharrow.md index 1feb8178a..e2b0093d4 100644 --- a/docs/dev-guide/using-sharrow.md +++ b/docs/dev-guide/using-sharrow.md @@ -213,8 +213,14 @@ as needed. For models with utility expressions that include a lot of string comparisons, (e.g. because they are built for the legacy `pandas.eval` interpreter and have not -been updated) sharrow can be disabled by setting `sharrow_skip: true` in the -component's configuration yaml file. +been updated) sharrow can be disabled by setting + +```yaml +compute_settings: + sharrow_skip: true +``` + +in the component's configuration yaml file. ### Multiprocessing Performance