"""Heart Transplant Data, Miller 1976""" __docformat__ = 'restructuredtext' COPYRIGHT = """???""" TITLE = """Transplant Survival Data""" SOURCE = """ Miller, R. (1976). Least squares regression with censored dara. Biometrica, 63 (3). 449-464. """ DESCRSHORT = """Survival times after receiving a heart transplant""" DESCRLONG = """This data contains the survival time after receiving a heart transplant, the age of the patient and whether or not the survival time was censored. """ NOTE = """:: Number of Observations - 69 Number of Variables - 3 Variable name definitions:: death - Days after surgery until death age - age at the time of surgery censored - indicates if an observation is censored. 1 is uncensored """ import numpy as np from statsmodels.datasets import utils as du from os.path import dirname, abspath def load(): """ Load the data and return a Dataset class instance. Returns ------- Dataset instance: See DATASET_PROPOSAL.txt for more information. """ data = _get_data() ##### SET THE INDICES ##### #NOTE: None for exog_idx is the complement of endog_idx dset = du.process_recarray(data, endog_idx=0, exog_idx=None, dtype=float) dset.censors = dset.exog[:,0] dset.exog = dset.exog[:,1] return dset def load_pandas(): data = _get_data() ##### SET THE INDICES ##### #NOTE: None for exog_idx is the complement of endog_idx return du.process_recarray_pandas(data, endog_idx=0, exog_idx=None, dtype=float) def _get_data(): filepath = dirname(abspath(__file__)) ##### EDIT THE FOLLOWING TO POINT TO DatasetName.csv ##### data = np.recfromtxt(open(filepath + '/heart.csv', 'rb'), delimiter=",", names = True, dtype=float) return data