Source code for labbookdb.report.formatting
import pandas as pd
from . import processing
[docs]def plottable_sums(reference_df, behaviour, identifier_column="Animal_id", periods={}, period_label="period", metadata_columns={"TreatmentProtocol_code":"Treatment"}):
identifiers = list(set(reference_df[identifier_column]))
evaluation_df = pd.DataFrame({})
for identifier in identifiers:
identifier_df = reference_df[reference_df[identifier_column]==identifier]
evaluation_path = identifier_df["Evaluation_path"].values[0]
identifier_data = {}
for metadata_column in metadata_columns:
identifier_data[metadata_columns[metadata_column]] = identifier_df[metadata_column].values[0]
for period in periods:
period_start, period_end = periods[period]
sums = processing.timedelta_sums(evaluation_path, index_name=identifier, period_start=period_start, period_end=period_end)
#We need to calculate this explicitly since the start/end of th experiment may not align perfecty with the theoretical period
real_period_duration = sums.sum(axis=1).values[0]
#if the behaviour key is not found, there was none of that behaviour type in the period
try:
behaviour_ratio = sums[behaviour].values[0]/real_period_duration
except KeyError:
behaviour_ratio = 0
identifier_data[behaviour.title()+" Ratio"] = behaviour_ratio
identifier_data[period_label] = period
identifier_data["Identifier"] = identifier
period_df_slice = pd.DataFrame(identifier_data, index=[identifier])
evaluation_df = pd.concat([evaluation_df, period_df_slice])
#data is usually ordered as it comes, for nicer plots we sort it here
evaluation_df = evaluation_df.sort_values([period_label], ascending=True)
evaluation_df = evaluation_df.sort_values(list(metadata_columns.values()), ascending=False)
return evaluation_df
[docs]def plottable_sucrosepreference_df(reference_df):
cage_ids = list(set(reference_df["Cage_id"]))
preferences_df = pd.DataFrame({})
for cage_id in cage_ids:
cage_id_df = reference_df[reference_df["Cage_id"]==cage_id]
reference_dates = list(set(cage_id_df["SucrosePreferenceMeasurement_reference_date"]))
reference_dates.sort()
measurement_dates = list(set(cage_id_df["SucrosePreferenceMeasurement_date"]))
measurement_dates.sort()
first_date = reference_dates[0]
preferences={}
for measurement_date in measurement_dates:
cage_id_measurement_df = cage_id_df[cage_id_df["SucrosePreferenceMeasurement_date"] == measurement_date]
start_date = cage_id_measurement_df["SucrosePreferenceMeasurement_reference_date"].tolist()[0]
relative_start_day = start_date-first_date
rounded_relative_start_day = processing.rounded_days(relative_start_day)
relative_end_day = measurement_date-first_date
rounded_relative_end_day = processing.rounded_days(relative_end_day)
key = "{} to {}".format(rounded_relative_start_day, rounded_relative_end_day)
water_start = cage_id_measurement_df["SucrosePreferenceMeasurement_water_start_amount"].tolist()[0]
water_end = cage_id_measurement_df["SucrosePreferenceMeasurement_water_end_amount"].tolist()[0]
sucrose_start = cage_id_measurement_df["SucrosePreferenceMeasurement_sucrose_start_amount"].tolist()[0]
sucrose_end = cage_id_measurement_df["SucrosePreferenceMeasurement_sucrose_end_amount"].tolist()[0]
water_consumption = water_end - water_start
sucrose_consumption = sucrose_end - sucrose_start
sucrose_prefernce = sucrose_consumption/(water_consumption + sucrose_consumption)
preferences["Period [days]"] = key
preferences["Sucrose Preference Ratio"] = sucrose_prefernce
preferences["Sucrose Bottle Position"] = cage_id_measurement_df["SucrosePreferenceMeasurement_sucrose_bottle_position"].tolist()[0]
preferences["Sucrose Concentration"] = cage_id_measurement_df["SucrosePreferenceMeasurement_sucrose_concentration"].tolist()[0]
preferences["Treatment"] = cage_id_measurement_df["TreatmentProtocol_code"].tolist()[0]
preferences["Cage ID"] = cage_id # this may not actually be needed, as the same info is contained in the index
preferences_df_slice = pd.DataFrame(preferences, index=[cage_id])
preferences_df = pd.concat([preferences_df, preferences_df_slice])
return preferences_df