Skip to content

Issue with pandas upgrade #46

Closed
@TianyuDu

Description

@TianyuDu

Description of the Issue

While running using the following code from the main branch,

import warnings
warnings.filterwarnings("ignore")

import random
from time import time
import numpy as np
import pandas as pd
import torch
import torch_choice
from torch_choice import run
from tqdm import tqdm
from torch_choice.data import ChoiceDataset, JointDataset, utils, load_mode_canada_dataset, load_house_cooling_dataset_v1
from torch_choice.model import ConditionalLogitModel, NestedLogitModel

# set the random seed to enforce reproducibility.
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.use_deterministic_algorithms(True)

car_choice = pd.read_csv("./tutorials/public_datasets/car_choice.csv")
car_choice.head()

user_observable_columns=["gender", "income"]
from torch_choice.utils.easy_data_wrapper import EasyDatasetWrapper
data_wrapper_from_columns = EasyDatasetWrapper(
    main_data=car_choice,
    purchase_record_column='record_id',
    choice_column='purchase',
    item_name_column='car',
    user_index_column='consumer_id',
    session_index_column='session_id',
    user_observable_columns=['gender', 'income'],
    item_observable_columns=['speed'],
    session_observable_columns=['discount'],
    itemsession_observable_columns=['price'])

data_wrapper_from_columns.summary()
dataset = data_wrapper_from_columns.choice_dataset
# ChoiceDataset(label=[], item_index=[885], provided_num_items=[], user_index=[885], session_index=[885], item_availability=[885, 4], item_speed=[4, 1], user_gender=[885, 1], user_income=[885, 1], session_discount=[885, 1], itemsession_price=[885, 4, 1], device=cpu)

Depeneding the pandas version, one may encounter a pandas error:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[1], line 27
     25 user_observable_columns=["gender", "income"]
     26 from torch_choice.utils.easy_data_wrapper import EasyDatasetWrapper
---> 27 data_wrapper_from_columns = EasyDatasetWrapper(
     28     main_data=car_choice,
     29     purchase_record_column='record_id',
     30     choice_column='purchase',
     31     item_name_column='car',
     32     user_index_column='consumer_id',
     33     session_index_column='session_id',
     34     user_observable_columns=['gender', 'income'],
     35     item_observable_columns=['speed'],
     36     session_observable_columns=['discount'],
     37     itemsession_observable_columns=['price'])
     39 data_wrapper_from_columns.summary()
     40 dataset = data_wrapper_from_columns.choice_dataset

File ~/Development/torch-choice/torch_choice/utils/easy_data_wrapper.py:142, in EasyDatasetWrapper.__init__(self, main_data, purchase_record_column, item_name_column, choice_column, user_index_column, session_index_column, user_observable_data, item_observable_data, useritem_observable_data, session_observable_data, price_observable_data, itemsession_observable_data, useritemsession_observable_data, user_observable_columns, item_observable_columns, useritem_observable_columns, session_observable_columns, price_observable_columns, itemsession_observable_columns, useritemsession_observable_columns, device)
    135 self.derive_observable_from_main_data(item_observable_columns,
    136                                       user_observable_columns,
    137                                       session_observable_columns,
    138                                       price_observable_columns)
    140 self.observable_data_to_observable_tensors()
--> 142 self.create_choice_dataset()
    143 print('Finished Creating Choice Dataset.')

File ~/Development/torch-choice/torch_choice/utils/easy_data_wrapper.py:303, in EasyDatasetWrapper.create_choice_dataset(self)
    301 if len(np.unique(choice_set_size)) > 1:
    302     print(f'Note: choice sets of different sizes found in different purchase records: {rep}')
--> 303     self.item_availability = self.get_item_availability_tensor()
    304 else:
    305     # None means all items are available.
    306     self.item_availability = None

File ~/Development/torch-choice/torch_choice/utils/easy_data_wrapper.py:349, in EasyDatasetWrapper.get_item_availability_tensor(self)
    347 if self.session_index_column is None:
    348     raise ValueError(f'Item availability cannot be constructed without session index column.')
--> 349 A = self.main_data.pivot(self.session_index_column, self.item_name_column, self.choice_column)
    350 return torch.BoolTensor(~np.isnan(A.values))

TypeError: pivot() takes 1 positional argument but 4 were given

Cause of the Issue

After pandas 2.0.0 upgrade, there was a change in the pivot method (see this issue). The position argument to pivot has been disabled. Previous we could simply write df.pivot("A", "B", "C") but now we need to specify df.pivot(index="A", columns="B", values="C").

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions