Source code for labbookdb.report.formatting

import pandas as pd
from . import processing

[docs]def plottable_sums(reference_df, behaviour, identifier_column="Animal_id", periods={}, period_label="period", metadata_columns={"TreatmentProtocol_code":"Treatment"}):
	identifiers = list(set(reference_df[identifier_column]))
	evaluation_df = pd.DataFrame({})
	for identifier in identifiers:
		identifier_df = reference_df[reference_df[identifier_column]==identifier]
		evaluation_path = identifier_df["Evaluation_path"].values[0]
		identifier_data = {}
		for metadata_column in metadata_columns:
			identifier_data[metadata_columns[metadata_column]] = identifier_df[metadata_column].values[0]
		for period in periods:
			period_start, period_end = periods[period]
			sums = processing.timedelta_sums(evaluation_path, index_name=identifier, period_start=period_start, period_end=period_end)
			#We need to calculate this explicitly since the start/end of th experiment may not align perfecty with the theoretical period
			real_period_duration = sums.sum(axis=1).values[0]
			#if the behaviour key is not found, there was none of that behaviour type in the period
			try:
				behaviour_ratio = sums[behaviour].values[0]/real_period_duration
			except KeyError:
				behaviour_ratio = 0
			identifier_data[behaviour.title()+" Ratio"] = behaviour_ratio
			identifier_data[period_label] = period
			identifier_data["Identifier"] = identifier
			period_df_slice = pd.DataFrame(identifier_data, index=[identifier])
			evaluation_df = pd.concat([evaluation_df, period_df_slice])

	#data is usually ordered as it comes, for nicer plots we sort it here
	evaluation_df = evaluation_df.sort_values([period_label], ascending=True)
	evaluation_df = evaluation_df.sort_values(list(metadata_columns.values()), ascending=False)
	return evaluation_df

[docs]def plottable_sucrosepreference_df(reference_df):
	cage_ids = list(set(reference_df["Cage_id"]))
	preferences_df = pd.DataFrame({})
	for cage_id in cage_ids:
		cage_id_df = reference_df[reference_df["Cage_id"]==cage_id]
		reference_dates = list(set(cage_id_df["SucrosePreferenceMeasurement_reference_date"]))
		reference_dates.sort()
		measurement_dates = list(set(cage_id_df["SucrosePreferenceMeasurement_date"]))
		measurement_dates.sort()
		first_date = reference_dates[0]
		preferences={}
		for measurement_date in measurement_dates:
			cage_id_measurement_df = cage_id_df[cage_id_df["SucrosePreferenceMeasurement_date"] == measurement_date]
			start_date = cage_id_measurement_df["SucrosePreferenceMeasurement_reference_date"].tolist()[0]
			relative_start_day = start_date-first_date
			rounded_relative_start_day = processing.rounded_days(relative_start_day)
			relative_end_day = measurement_date-first_date
			rounded_relative_end_day = processing.rounded_days(relative_end_day)
			key = "{} to {}".format(rounded_relative_start_day, rounded_relative_end_day)
			water_start = cage_id_measurement_df["SucrosePreferenceMeasurement_water_start_amount"].tolist()[0]
			water_end = cage_id_measurement_df["SucrosePreferenceMeasurement_water_end_amount"].tolist()[0]
			sucrose_start = cage_id_measurement_df["SucrosePreferenceMeasurement_sucrose_start_amount"].tolist()[0]
			sucrose_end = cage_id_measurement_df["SucrosePreferenceMeasurement_sucrose_end_amount"].tolist()[0]
			water_consumption = water_end - water_start
			sucrose_consumption = sucrose_end - sucrose_start
			sucrose_prefernce = sucrose_consumption/(water_consumption + sucrose_consumption)
			preferences["Period [days]"] = key
			preferences["Sucrose Preference Ratio"] = sucrose_prefernce
			preferences["Sucrose Bottle Position"] = cage_id_measurement_df["SucrosePreferenceMeasurement_sucrose_bottle_position"].tolist()[0]
			preferences["Sucrose Concentration"] = cage_id_measurement_df["SucrosePreferenceMeasurement_sucrose_concentration"].tolist()[0]
			preferences["Treatment"] = cage_id_measurement_df["TreatmentProtocol_code"].tolist()[0]
			preferences["Cage ID"] = cage_id # this may not actually be needed, as the same info is contained in the index
			preferences_df_slice = pd.DataFrame(preferences, index=[cage_id])
			preferences_df = pd.concat([preferences_df, preferences_df_slice])

	return preferences_df