Source code for labbookdb.report.selection

import pandas as pd
from labbookdb.db import query
from labbookdb.report.utilities import concurrent_cagetreatment

[docs]def animal_id(db_path, database, identifier, reverse=False):
	"""Return the main LabbookDB animal identifier given an external database identifier.

	Parameters
	----------

	db_path : string
		Path to the database file to query.
	database : string
		Valid `AnimalExternalIdentifier.database` value.
	identifier : string
		Valid `AnimalExternalIdentifier.identifier` value.
	reverse : bool, optional
		Whether to reverse the query.
		A reverse query means that a LabbookDB `Animal.id` filter is applied and an `AnimalExternalIdentifier.identifier` value is returned.

	Returns
	-------
	int
		LabbookDB animal identifier.
	"""

	col_entries=[
		("Animal","id"),
		("AnimalExternalIdentifier",),
		]
	join_entries=[
		("Animal.external_ids",),
		]

	my_filters = []
	if reverse:
		my_filter = ["Animal","id",identifier]
		my_filters.append(my_filter)
	else:
		my_filter = ["AnimalExternalIdentifier","identifier",identifier]
		my_filters.append(my_filter)
	my_filter = ["AnimalExternalIdentifier","database",database]
	my_filters.append(my_filter)

	df = query.get_df(db_path,col_entries=col_entries, join_entries=join_entries, filters=my_filters)
	try:
		if reverse:
			labbookdb_id = df['AnimalExternalIdentifier_identifier'].item()
		else:
			labbookdb_id = df['Animal_id'].item()
	except ValueError:
		labbookdb_id = 'FailedIDQuery'
		print('This may be happening because the identifier query value you have provided is of the wrong type (LabbookDB lookups are type-sensitive).')

	return labbookdb_id

[docs]def stimulation_protocol(db_path, code):
	"""Select a `pandas.DataFrame`object containing all events and associated measurement units for a specific stimulation protocol.

	Parameters
	----------

	db_path : string
		Path to the database file to query.

	code : string
		Code (valid `StimulationProtocol.code` value) which identifies the stimulation protocol to format.
	"""

	col_entries=[
		("StimulationProtocol",),
		("StimulationEvent",),
		("MeasurementUnit",),
		]
	join_entries=[
		("StimulationProtocol.events",),
		("StimulationEvent.unit",),
		]
	my_filters=[]
	my_filter = ["StimulationProtocol","code",code]
	my_filters.append(my_filter)

	df = query.get_df(db_path,col_entries=col_entries, join_entries=join_entries, filters=my_filters)

	return df

[docs]def animals_by_cage_treatment(db_path,
	codes=[],
	end_dates=[],
	start_dates=[],
	):
	"""Select a dataframe of animals and all related tables through to cage treatments based on cage_treatment filters.

	Parameters
	----------

	db_path : string
	Path to a LabbookDB formatted database.

	codes : list, optional
	Related TreatmentProtocol.code values based on which to filter dataframe

	end_dates : list, optional
	Related Treatment.end_date values based on which to filter dataframe

	start_dates : list, optional
	Related Treatment.start_date values based on which to filter dataframe
	"""

	col_entries=[
		("Animal","id"),
		("Cage","id"),
		("Treatment",),
		("TreatmentProtocol","code"),
		]
	join_entries=[
		("Animal.cage_stays",),
		("CageStay.cage",),
		("Cage.treatments",),
		("Treatment.protocol",),
		]
	my_filters=[]
	if codes:
		my_filter = ["TreatmentProtocol","code"]
		my_filter.extend(codes)
		my_filters.append(my_filter)
	if end_dates:
		my_filter = ["Treatment","end_date"]
		my_filter.extend(end_dates)
		my_filters.append(my_filter)
	if start_dates:
		my_filter = ["Treatment","start_date"]
		my_filter.extend(start_dates)
		my_filters.append(my_filter)
	if not my_filters:
		my_filters=[None]

	df = query.get_df(db_path,col_entries=col_entries, join_entries=join_entries, filters=my_filters)

	return df

[docs]def animals_by_treatment(db_path,
	codes=[],
	end_dates=[],
	start_dates=[],
	):
	"""Select a dataframe of animals and all related tables through to treatments based on treatment filters.

	Parameters
	----------

	db_path : string
	Path to a LabbookDB formatted database.

	codes : list, optional
	Related TreatmentProtocol.code values based on which to filter dataframe

	end_dates : list, optional
	Related Treatment.end_date values based on which to filter dataframe

	start_dates : list, optional
	Related Treatment.start_date values based on which to filter dataframe
	"""

	col_entries=[
		("Animal","id"),
		("Treatment",),
		("TreatmentProtocol","code"),
		]
	join_entries=[
		("Animal.treatments",),
		("Treatment.protocol",),
		]
	my_filters=[]
	if codes:
		my_filter = ["TreatmentProtocol","code"]
		my_filter.extend(codes)
		my_filters.append(my_filter)
	if end_dates:
		my_filter = ["Treatment","end_date"]
		my_filter.extend(end_dates)
		my_filters.append(my_filter)
	if start_dates:
		my_filter = ["Treatment","start_date"]
		my_filter.extend(start_dates)
		my_filters.append(my_filter)
	if not my_filters:
		my_filters=[None]

	df = query.get_df(db_path,col_entries=col_entries, join_entries=join_entries, filters=my_filters)

	return df

[docs]def animal_operations(db_path,
	animal_ids=[],
	implant_targets=[],
	virus_targets=[],
	):
	"""Select a dataframe of animals having been subjected to operative interventions with the given anatomical targets.

	Parameters
	----------

	db_path : str
	Path to a LabbookDB formatted database.

	animal_ids : list, optional
		A List of LabbookDB `Animal.id` values by which to filter the query.
		It is faster to filter using this mechanism than to return a dataframe for all animals and then filter that.

	implant_targets : list, optional
		A List of LabbookDB `OrthogonalStereotacticTarget.code` values which should be used to filter the query, while being joined to `Operation` objects via the `OpticFiberImplantProtocol` class.
		It is faster to filter using this mechanism than to return a dataframe for all animals and then filter that.

	virus_targets : list, optional
		A List of LabbookDB `OrthogonalStereotacticTarget.code` values which should be used to filter the query, while being joined to `Operation` objects via the `VirusInjectionProtocol` class.
		It is faster to filter using this mechanism than to return a dataframe for all animals and then filter that.

	Notes
	-----

	CAREFUL: Providing both `implant_targets` and `virus_targets` will return entries including only animals which have undergone an operation which has included protocols targeting both areas.
	If the areas were targeted by protocols included in different operations, the correspondence will not be detected.
	To obtain such a list please call the function twice and create a new dataframe from the intersection of the inputs on the `Animal_id` column.
	"""

	filters = []
	join_type = 'outer'
	col_entries=[
		("Animal","id"),
		("Operation",),
		("OpticFiberImplantProtocol",),
		("VirusInjectionProtocol",),
		("OrthogonalStereotacticTarget",),
		("Virus","OrthogonalStereotacticTarget",""),
		]
	join_entries=[
		("Animal.operations",),
		("OpticFiberImplantProtocol","Operation.protocols"),
		("VirusInjectionProtocol","Operation.protocols"),
		("OrthogonalStereotacticTarget","OpticFiberImplantProtocol.stereotactic_target"),
		("Virus_OrthogonalStereotacticTarget","VirusInjectionProtocol.stereotactic_target"),
		]
	my_filter=[]
	if animal_ids:
		my_filter = ["Animal","id"]
		my_filter.extend(animal_ids)
	if implant_targets:
		my_filter = ["OrthogonalStereotacticTarget","code"]
		my_filter.extend(implant_targets)
	if virus_targets:
		my_filter = ["Virus_OrthogonalStereotacticTarget","code"]
		my_filter.extend(virus_targets)
	filters.append(my_filter)

	df = query.get_df(db_path, col_entries=col_entries, join_entries=join_entries, filters=filters, default_join=join_type)

	return df


[docs]def animal_treatments(db_path,
	animal_ids=[],
	animal_treatments=[],
	cage_treatments=[],
	conjunctive=True,
	):
	"""Select a dataframe of animals and all treatments including animal-level or cage-level treatments.

	Parameters
	----------

	db_path : str
		Path to a LabbookDB formatted database.

	animal_ids : list, optional
		A List of LabbookDB `Animal.id` values by which to filter the query.
		It is faster to filter using this mechanism than to return a dataframe for all animals and then filter that.

	animal_treatments : list, optional
		A List of LabbookDB `Treatment.code` values which should be used to filter the query, while being joined to `Animal` objects.

	cage_treatments : list, optional
		A List of LabbookDB `Treatment.code` values which should be used to filter the query, while being joined to `Cage` objects - and further to `Animal` objects via `CageStay` objects.
		An onset check is also applied by the function, to ascertain that there is an overlap between the animal's presence in the cage and the cage treatment application.

	conjunctive : bool, optional
		Whether both `cage_treatments` and `animal_treatments` need to be satisfied (statements within each list are always disjunctive).

	Notes
	-----

	Currently `conjunctive=False` does not work; cage treatment and animal treatment filters are always conjunctive.
	"""

	filters = []
	join_type = 'outer'
	col_entries=[
		("Animal","id"),
		("Animal","death_date"),
		("Treatment",),
		("TreatmentProtocol","code"),
		("CageStay","start_date"),
		("Cage","id"),
		("Cage","Treatment",""),
		("Cage","TreatmentProtocol","code"),
		]
	join_entries=[
		("Animal.treatments",),
		("Treatment.protocol",),
		("Animal.cage_stays",),
		("CageStay.cage",),
		("Cage_Treatment","Cage.treatments"),
		("Cage_TreatmentProtocol","Cage_Treatment.protocol"),
		]
	my_filter=[]
	if animal_ids:
		my_filter = ["Animal","id"]
		my_filter.extend(animal_ids)
	if animal_treatments:
		my_filter = ["TreatmentProtocol","code"]
		my_filter.extend(animal_treatments)
	if cage_treatments:
		my_filter = ["Cage_TreatmentProtocol","code"]
		my_filter.extend(cage_treatments)
	filters.append(my_filter)

	df = query.get_df(db_path, col_entries=col_entries, join_entries=join_entries, filters=filters, default_join=join_type)

	# Generally dataframe operations should be performed in `labbookdb.report.tracking`, however, if animals are selected by cage treatment, we need to determine which animals actually received the treatment.
	# The following is therefore a selection issue.
	cage_treatment_columns = ['Cage_Treatment_id','Cage_Treatment_end_date','Cage_Treatment_start_date','Cage_TreatmentProtocol_code','Cage_Treatment_protocol_id']
	animals = list(df["Animal_id"].unique())
	cage_stays = cage_periods(db_path, animal_filter=animals)
	df = concurrent_cagetreatment(df, cage_stays)

	# The concurrent cage treatment function cannot delete the entire entry for animals which have only one entry in the dataframe.
	# This is because the row may still contain unique animal treatment information, which would otherwise be lost.
	# As the `concurrent_cagetreatment()` function is unaware of this context, we perform this filtering here.
	if cage_treatments:
		df = df.loc[df['Cage_TreatmentProtocol_code'].isin(cage_treatments)]

	return df


[docs]def by_animals(db_path, select, animals):
	"""Select a dataframe of animals and all related tables through to the "select" table based on animal filters.

	Parameters
	----------

	db_path : str
	Path to a LabbookDB formatted database.

	select : str
	For which kind of evaluation to select dataframe.

	animals : list of str
	Animal.id values based on which to filter dataframe
	"""

	accepted_select_values = ["sucrose preference","timetable"]

	if select == "sucrose preference":
		join_types = ["inner","inner","inner"]
		col_entries=[
			("Animal","id"),
			("Cage","id"),
			("SucrosePreferenceMeasurement",),
			]
		join_entries=[
			("Animal.cage_stays",),
			("CageStay.cage",),
			("SucrosePreferenceMeasurement",),
			]
	elif select == "timetable":
		col_entries=[
			("Animal","id"),
			("Treatment",),
			("FMRIMeasurement","date"),
			("OpenFieldTestMeasurement","date"),
			("ForcedSwimTestMeasurement","date"),
			("TreatmentProtocol","code"),
			("Cage","id"),
			("Cage","Treatment",""),
			("Cage","TreatmentProtocol","code"),
			("SucrosePreferenceMeasurement","date"),
			]
		join_entries=[
			("Animal.treatments",),
			("FMRIMeasurement",),
			("OpenFieldTestMeasurement","Animal.measurements"),
			("ForcedSwimTestMeasurement","Animal.measurements"),
			("Treatment.protocol",),
			("Animal.cage_stays",),
			("CageStay.cage",),
			("Cage_Treatment","Cage.treatments"),
			("Cage_TreatmentProtocol","Cage_Treatment.protocol"),
			("SucrosePreferenceMeasurement","Cage.measurements"),
			]
	else:
		raise ValueError("The value for select needs to be one of {}".format(accepted_select_values))

	animals = [str(i) for i in animals] #for some reason the Animal.id values need to be string :-/
	my_filter = ["Animal","id"]
	my_filter.extend(animals)

	df = query.get_df(db_path, col_entries=col_entries, join_entries=join_entries, filters=[my_filter], join_types=join_types)

	return df


[docs]def cage_drinking_measurements(db_path,
	treatments=[]
	):
	""""""
	filters = []
	join_type = 'inner'
	col_entries=[
		("Cage","id"),
		("Treatment",),
		("TreatmentProtocol",'code'),
		('DrinkingMeasurement',)
		]
	join_entries=[
		("Cage.treatments",),
		("Treatment.protocol",),
		('DrinkingMeasurement',)
		]
	my_filter = []
	if treatments:
		my_filter = ["TreatmentProtocol","code"]
		my_filter.extend(treatments)
	filters.append(my_filter)

	df = query.get_df(db_path, col_entries=col_entries, join_entries=join_entries, filters=filters, default_join=join_type)

	return df


[docs]def animals_by_genotype(db_path, genotypes,
	attribute='code',
	):
	"""Return `pandas.Dataframe` object containing ID and genotype table columns of animals as matched by selected values on a selected Genotype attribute field.

	Parameters
	----------
	db_path : string
		Path to database file to query.
	genotypes : list
		List of strings containing values to be matched for the selected Genotype attribute.
	attribute : str
		Genotype attribute to match on.
	"""
	filters = []
	join_type = 'inner'
	col_entries=[
		("Animal","id"),
		("Genotype",),
		]
	join_entries=[
		("Animal.genotypes",),
		]
	my_filter = ["Genotype", attribute]
	my_filter.extend(genotypes)
	filters.append(my_filter)

	df = query.get_df(db_path, col_entries=col_entries, join_entries=join_entries, filters=filters, default_join=join_type)

	return df


[docs]def cage_periods(db_path,
	animal_filter=[],
	cage_filter=[],
	):
	"""
	Return a `pandas.DataFrame` object containing the periods which animals spent in which cages.

	Parameters
	----------
	db_path : string
		Path to database file to query.
	animal_filter : list, optional
		A list of `Animal.id` attribute values for which to specifically filter the query.

	Notes
	-----
	Operations on `pandas.DataFrame` objects should be performed in `labbookdb.report.tracking`, however, the cagestay end date is not explicitly recordes, so to select it or select animals by it, we calculate it here.
	"""
	df = parameterized(db_path, animal_filter=animal_filter, cage_filter=cage_filter, data_type='cage list')
	df['CageStay_end_date'] = ''
	for subject in df['Animal_id'].unique():
		for start_date in df[df['Animal_id']==subject]['CageStay_start_date'].tolist():
			possible_end_dates = df[(df['Animal_id']==subject)&(df['CageStay_start_date']>start_date)]['CageStay_start_date'].tolist()
			try:
				end_date = min(possible_end_dates)
			except ValueError:
				end_date = None
			if not end_date:
				end_date = df[df['Animal_id']==subject]['Animal_death_date'].tolist()[0]
			df.loc[(df['Animal_id']==subject)&(df['CageStay_start_date']==start_date),'CageStay_end_date'] = end_date
	return df

[docs]def timetable(db_path, filters,
	default_join="outer",
	join_types=[],
	):
	"""Select a dataframe with animals as rown and all timetable-relevant events as columns.

	Parameters
	----------

	db_path : str
	Path to a LabbookDB formatted database.

	filters : list of list
	A list of lists giving filters for the query. It is passed to `..query.get_df()`.

	outerjoin_all : bool
	Pased as outerjoin_all to `..query.get_df()`
	"""

	col_entries=[
		("Animal","id"),
		("Animal","death_date"),
		("Treatment",),
		("FMRIMeasurement","date"),
		("OpenFieldTestMeasurement","date"),
		("ForcedSwimTestMeasurement","date"),
		("TreatmentProtocol","code"),
		("CageStay","start_date"),
		("Cage","id"),
		("Cage","Treatment",""),
		("Cage","TreatmentProtocol","code"),
		("SucrosePreferenceMeasurement","date"),
		]
	join_entries=[
		("Animal.treatments",),
		("FMRIMeasurement",),
		("OpenFieldTestMeasurement","Animal.measurements"),
		("ForcedSwimTestMeasurement","Animal.measurements"),
		("Treatment.protocol",),
		("Animal.cage_stays",),
		("CageStay.cage",),
		("Cage_Treatment","Cage.treatments"),
		("Cage_TreatmentProtocol","Cage_Treatment.protocol"),
		("SucrosePreferenceMeasurement","Cage.measurements"),
		]

	# setting outerjoin to true will indirectly include controls
	df = query.get_df(db_path, col_entries=col_entries, join_entries=join_entries, filters=filters, default_join=default_join, join_types=join_types)

	return df


[docs]def parameterized(db_path, data_type,
	animal_filter=[],
	cage_filter=[],
	treatment_start_dates=[],
	):
	"""Select dataframe from a LabbookDB style database.

	Parameters
	----------

	db_path : string
		Path to a LabbookDB formatted database.
	data_type : {"animals id", "animals info", "animals measurements", "animals measurements irregularities", "cage list", "forced swim"}
		What type of data should be selected values can be:
	animal_filter : list, optional
		A list of animal identifiers (`Animal.id` attributes) for which to limit the query.
	treatment_start_dates : list, optional
		A list containing the treatment start date or dates by which to filter the cages for the sucrose preference measurements.
		Items should be strings in datetime format, e.g. "2016,4,25,19,30".
	"""

	default_join = "inner"
	my_filter = []

	allowed_data_types = [
			"animals base",
			"animals id",
			"animals info",
			"animals measurements",
			"animals measurements irregularities",
			"cage list",
			"forced swim",
			]

	if data_type == "animals base":
		col_entries=[
			("Animal","id"),
			("Animal","birth_date"),
			("Animal","sex"),
			]
		join_entries=[]
	elif data_type == "animals id":
		col_entries=[
			("Animal","id"),
			("AnimalExternalIdentifier",),
			]
		join_entries=[
			("Animal.external_ids",),
			]
	elif data_type == "animals info":
		col_entries=[
			("Animal","death_date"),
			("Animal","birth_date"),
			("Animal","sex"),
			("AnimalExternalIdentifier",),
			("Genotype",),
			]
		join_entries=[
			("Animal.external_ids",),
			("Animal.genotypes",),
			]
		if animal_filter:
			my_filter = ['Animal','id']
			# for some reason this needs to be str
			my_filter.extend([str(i) for i in animal_filter])
	elif data_type == "animals measurements":
		col_entries=[
			("Animal","id"),
			("Measurement","id"),
			("StimulationProtocol","code"),
			]
		join_entries=[
			("Animal.measurements",),
			("FMRIMeasurement.stimulations",),
			]
	elif data_type == "animals measurements irregularities":
		col_entries=[
			("Animal","id"),
			("Measurement","id"),
			("Measurement","date"),
			("Irregularity","description"),
			]
		join_entries=[
			("Animal.measurements",),
			("FMRIMeasurement.irregularities",),
			]
		if animal_filter:
			my_filter = ['Animal','id']
			# for some reason this needs to be str
			my_filter.extend([str(i) for i in animal_filter])
	elif data_type == "animals weights":
		col_entries=[
			("Animal","id"),
			("AnimalExternalIdentifier",),
			("WeightMeasurement",),
			]
		join_entries=[
			("Animal.external_ids",),
			("WeightMeasurement",),
			]
		my_filter = ["Measurement","type","weight"]
	elif data_type == "cage list":
		col_entries=[
			("Animal",),
			("CageStay",),
			("Cage","id"),
			]
		join_entries=[
			("Animal.cage_stays",),
			("CageStay.cage",),
			]
	elif data_type == "forced swim":
		col_entries=[
			("Animal","id"),
			("Cage","id"),
			("Treatment",),
			("TreatmentProtocol","code"),
			("ForcedSwimTestMeasurement",),
			("Evaluation",),
			]
		join_entries=[
			("Animal.cage_stays",),
			("ForcedSwimTestMeasurement",),
			("Evaluation",),
			("CageStay.cage",),
			("Cage.treatments",),
			("Treatment.protocol",),
			]
	else:
		raise ValueError("The `data_type` value needs to be one of: {}. You specified \"{}\"".format(", ".join(allowed_data_types), data_type))

	if animal_filter:
		my_filter = ['Animal','id']
		# for some reason this needs to be str
		my_filter.extend([str(i) for i in animal_filter])
	if cage_filter:
		my_filter = ['Cage','id']
		# for some reason this needs to be str
		my_filter.extend([str(i) for i in cage_filter])
	if treatment_start_dates:
		my_filter = ["Treatment","start_date"]
		my_filter.extend(treatment_start_dates)
	df = query.get_df(db_path,col_entries=col_entries, join_entries=join_entries, filters=[my_filter], default_join=default_join)
	return df