import pandas as pd
import numpy as np
import os

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)


df_interaction = pd.read_json("interaction-export_filtered.json", encoding='utf-8')
df_completed_participation = pd.read_csv("participation-export_filtered.csv", index_col=0)


df_interaction.head()


df_interaction.interaction_type.unique()

array(['loaded-page', 'changed-viewport', 'on-input', 'selected-item',
       'elicitation-ended', 'iteration-started', 'iteration-ended',
       'study-ended', 'deselected-item'], dtype=object)


df_completed_participation.head()


N_ITERATIONS = 8
def get_iteration(x):
    return json.loads(x)["iteration"]


#This cell takes up to a minute or two to complete
import json
def set_iteration(row):
    if row.interaction_type == "iteration-started" or row.interaction_type == "iteration-ended":
        row['iteration'] = json.loads(row.data)['iteration']
    else:
        row['iteration'] = None
    return row

def set_result_layout(row):
    if row.interaction_type == "iteration-started":
        row['result_layout'] = json.loads(row.data)['result_layout']
    else:
        row['result_layout'] = None
    return row

#'algorithm_assignment': {'0': {'algorithm': 'relevance_based',
#   'name': 'gamma',
#   'order': 1},
#  '1': {'algorithm': 'weighted_average', 'name': 'delta', 'order': 0}},

def set_mapping(row):
    if row.interaction_type == 'iteration-started':
        dat = json.loads(row.data)['algorithm_assignment'].values()
        for mapping in dat:
            row[mapping['name'].upper()] = mapping['order']
    else:
        row['GAMMA'] = None
        row['DELTA'] = None
    return row




d = df_interaction.copy()
d = d.set_index("id")
d = d.apply(set_iteration, axis=1).apply(set_result_layout, axis=1).apply(set_mapping, axis=1)
d['iteration'] = d.groupby(['participation'], sort=False)['iteration'].apply(lambda x: x.ffill())
d['result_layout'] = d.groupby(['participation'], sort=False)['result_layout'].apply(lambda x: x.ffill())
d['GAMMA'] = d.groupby(['participation'], sort=False)['GAMMA'].apply(lambda x: x.ffill())
d['DELTA'] = d.groupby(['participation'], sort=False)['DELTA'].apply(lambda x: x.ffill())
d = d[d.iteration.notna()]


#in case of problems with the code above, use the following:
d = pd.read_json("interaction-export_filteredEnriched.json", encoding='utf-8')


# how does the record with iteration-started looks like?
d.loc[26009]

participation                                                     173
interaction_type                                    iteration-started
time                                       2023-01-20 10:33:24.402124
data                {"iteration": 3, "weights": [0.333333333333333...
iteration                                                         3.0
result_layout                                                    rows
GAMMA                                                             1.0
DELTA                                                             0.0
Name: 26009, dtype: object


# GAMMA and DELTA fields denote which algorithm was at advantaged (0) or disadvantaged (1) position. 
#    - Advantaged position denote left column, top row and so on


# the main payload reside in the "data" field
# it contains (among other) list of movies as appeared in the list for both gamma and delta algorithms
# full history of shown items also from previous iterations is available from "shown"
# movie_idx vs. movie_id: the ordering in our reduced dataset vs. the original ID from MovieLens
json.loads(d.loc[26009]["data"])

{'iteration': 3,
 'weights': [0.33333333333333337, 0.33333333333333337, 0.33333333333333337],
 'movies': {'gamma': {'movies': [{'movie': 'Valerian and the City of a Thousand Planets (2017)',
     'url': '/assets/utils/ml-latest/img/173291.jpg',
     'movie_idx': '1462',
     'movie_id': 173291,
     'genres': ['Action', 'Adventure', 'Sci-Fi']},
    {'movie': 'Transcendence (2014)',
     'url': '/assets/utils/ml-latest/img/110730.jpg',
     'movie_idx': '1072',
     'movie_id': 110730,
     'genres': ['Drama', 'Sci-Fi', 'IMAX']},
    {'movie': 'Rampage (2018)',
     'url': '/assets/utils/ml-latest/img/186587.jpg',
     'movie_idx': '1665',
     'movie_id': 186587,
     'genres': ['Action', 'Adventure', 'Sci-Fi']},
    {'movie': 'Resident Evil: The Final Chapter (2017)',
     'url': '/assets/utils/ml-latest/img/168498.jpg',
     'movie_idx': '1429',
     'movie_id': 168498,
     'genres': ['Action', 'Horror', 'Sci-Fi']},
    {'movie': 'Project Almanac (2015)',
     'url': '/assets/utils/ml-latest/img/127096.jpg',
     'movie_idx': '1191',
     'movie_id': 127096,
     'genres': ['Sci-Fi', 'Thriller']},
    {'movie': 'Pixels (2015)',
     'url': '/assets/utils/ml-latest/img/135137.jpg',
     'movie_idx': '1229',
     'movie_id': 135137,
     'genres': ['Action', 'Comedy', 'Sci-Fi']},
    {'movie': 'Independence Day: Resurgence (2016)',
     'url': '/assets/utils/ml-latest/img/135567.jpg',
     'movie_idx': '1238',
     'movie_id': 135567,
     'genres': ['Action', 'Adventure', 'Sci-Fi']},
    {'movie': 'Transformers: The Last Knight (2017)',
     'url': '/assets/utils/ml-latest/img/174585.jpg',
     'movie_idx': '1468',
     'movie_id': 174585,
     'genres': ['Action', 'Adventure', 'Sci-Fi', 'Thriller']},
    {'movie': 'Hansel & Gretel: Witch Hunters (2013)',
     'url': '/assets/utils/ml-latest/img/100163.jpg',
     'movie_idx': '966',
     'movie_id': 100163,
     'genres': ['Action', 'Fantasy', 'Horror', 'IMAX']},
    {'movie': 'Seven Sisters (2017)',
     'url': '/assets/utils/ml-latest/img/173925.jpg',
     'movie_idx': '1464',
     'movie_id': 173925,
     'genres': ['Sci-Fi', 'Thriller']}],
   'order': 1},
  'delta': {'movies': [{'movie': 'In Time (2011)',
     'url': '/assets/utils/ml-latest/img/90405.jpg',
     'movie_idx': '871',
     'movie_id': 90405,
     'genres': ['Crime', 'Sci-Fi', 'Thriller']},
    {'movie': "Ender's Game (2013)",
     'url': '/assets/utils/ml-latest/img/106002.jpg',
     'movie_idx': '1034',
     'movie_id': 106002,
     'genres': ['Action', 'Adventure', 'Sci-Fi', 'IMAX']},
    {'movie': 'Transcendence (2014)',
     'url': '/assets/utils/ml-latest/img/110730.jpg',
     'movie_idx': '1072',
     'movie_id': 110730,
     'genres': ['Drama', 'Sci-Fi', 'IMAX']},
    {'movie': 'Lucy (2014)',
     'url': '/assets/utils/ml-latest/img/111360.jpg',
     'movie_idx': '1077',
     'movie_id': 111360,
     'genres': ['Action', 'Sci-Fi']},
    {'movie': 'Passengers (2016)',
     'url': '/assets/utils/ml-latest/img/166635.jpg',
     'movie_idx': '1399',
     'movie_id': 166635,
     'genres': ['Adventure', 'Drama', 'Romance', 'Sci-Fi']},
    {'movie': 'Riddick (2013)',
     'url': '/assets/utils/ml-latest/img/104243.jpg',
     'movie_idx': '1016',
     'movie_id': 104243,
     'genres': ['Action', 'Sci-Fi', 'Thriller', 'IMAX']},
    {'movie': 'John Carter (2012)',
     'url': '/assets/utils/ml-latest/img/93363.jpg',
     'movie_idx': '900',
     'movie_id': 93363,
     'genres': ['Action', 'Adventure', 'Sci-Fi', 'IMAX']},
    {'movie': 'Terminator Genisys (2015)',
     'url': '/assets/utils/ml-latest/img/120799.jpg',
     'movie_idx': '1170',
     'movie_id': 120799,
     'genres': ['Action', 'Adventure', 'Sci-Fi', 'Thriller']},
    {'movie': 'Total Recall (2012)',
     'url': '/assets/utils/ml-latest/img/95875.jpg',
     'movie_idx': '923',
     'movie_id': 95875,
     'genres': ['Action', 'Sci-Fi', 'Thriller']},
    {'movie': 'Elysium (2013)',
     'url': '/assets/utils/ml-latest/img/103253.jpg',
     'movie_idx': '998',
     'movie_id': 103253,
     'genres': ['Action', 'Drama', 'Sci-Fi', 'IMAX']}],
   'order': 0}},
 'algorithm_assignment': {'0': {'algorithm': 'relevance_based',
   'name': 'gamma',
   'order': 1},
  '1': {'algorithm': 'weighted_average', 'name': 'delta', 'order': 0}},
 'result_layout': 'rows',
 'refinement_layout': '3',
 'shown': {'relevance_based': [[1288,
    1108,
    1327,
    972,
    1084,
    1367,
    1326,
    1204,
    1377,
    1107],
   [1422, 1236, 1346, 1409, 1386, 1471, 1352, 1433, 1403, 1349],
   [1462, 1072, 1665, 1429, 1191, 1229, 1238, 1468, 966, 1464]],
  'weighted_average': [[1084,
    1204,
    1163,
    972,
    954,
    973,
    797,
    1206,
    1275,
    1089],
   [1054, 1230, 1205, 1213, 1154, 813, 733, 1117, 764, 1166],
   [871, 1034, 1072, 1077, 1399, 1016, 900, 1170, 923, 998]]}}


# how does the record with iteration-ended looks like?
d.loc[3377]

participation                                                      36
interaction_type                                      iteration-ended
time                                       2023-01-15 23:19:22.851985
data                {"iteration": 6, "selected": [[1040, 314, 355,...
iteration                                                         6.0
result_layout                                                 columns
GAMMA                                                             1.0
DELTA                                                             0.0
Name: 3377, dtype: object


json.loads(d.loc[3377]["data"])
#selected_variants: did the click appear on advanteged or disadvantaged algorithm?
#selected: sequence of all selected items (movie_idx) in all iterations so far

{'iteration': 6,
 'selected': [[1040, 314, 355, 956, 1231, 468],
  [1155, 1228, 1142, 1039, 883],
  [1140, 1054],
  [293, 392, 885, 1345],
  [1417, 329],
  [345, 437]],
 'new_weights': [0.33333333333333337,
  0.33333333333333337,
  0.33333333333333337],
 'selected_variants': [[0, 0, 0, 0, 1, 1],
  [0, 0, 0, 0, 0],
  [1, 0],
  [0, 0, 0, 0],
  [0, 1],
  [0, 0]],
 'dont_like_anything': [False, False, False, False, False, False],
 'algorithm_comparison': ['third',
  'third',
  'fourth',
  'second',
  'third',
  'first'],
 'ratings': [{'gamma': 4.0, 'delta': 4.0},
  {'gamma': 4.0, 'delta': 4.0},
  {'gamma': 2.0, 'delta': 3.0},
  {'gamma': 2.0, 'delta': 3.0},
  {'gamma': 2.0, 'delta': 2.0},
  {'gamma': 1.0, 'delta': 3.0}]}


# adding information on whether the selected item was displayed on an advantaged position (variant=0), or not (variant=1)
d["variant"] = -1
print(d.shape)
d.loc[d["interaction_type"] == "selected-item", "variant"] = d[d["interaction_type"] == "selected-item"].data.map(lambda x: json.loads(x)["selected_item"]).map(lambda x: x.get("variant", -1))
d = d.loc[d.iteration <= 8] 
print(d.shape)

(30305, 9)
(30264, 9)


selected_item_interactions = d[d.variant >= 0].copy()
selected_item_interactions.shape

(6980, 9)


selected_item_interactions.head()


# information available for selected-item interaction_type
#selected_item: current selection
#selected_items: previous selections
json.loads(selected_item_interactions.loc[47768]["data"])

{'selected_item': {'genres': ['Action', 'Fantasy', 'Horror', 'Thriller'],
  'movie': 'Underworld: Vzpoura Lycanů (2009) Akční|Fantasy|Horor|Thriller',
  'movie_id': 65682,
  'movie_idx': '650',
  'url': '/assets/utils/ml-latest/img/65682.jpg',
  'variant': 1},
 'selected_items': [{'genres': ['Action', 'Comedy', 'IMAX'],
   'movie': 'Noc v muzeu 2 (2009) Akční|Komedie|IMAX',
   'movie_id': 68793,
   'movie_idx': '674',
   'url': '/assets/utils/ml-latest/img/68793.jpg',
   'variant': 0},
  {'genres': ['Action', 'Fantasy', 'Horror', 'IMAX'],
   'movie': 'Underworld: Probuzení (2012) Akční|Fantasy|Horor|IMAX',
   'movie_id': 91974,
   'movie_idx': '890',
   'url': '/assets/utils/ml-latest/img/91974.jpg',
   'variant': 0},
  {'genres': ['Action', 'Fantasy', 'Horror'],
   'movie': 'Underworld: Evolution (2006) Akční|Fantasy|Horor',
   'movie_id': 42738,
   'movie_idx': '472',
   'url': '/assets/utils/ml-latest/img/42738.jpg',
   'variant': 1},
  {'genres': ['Action', 'Fantasy', 'Horror', 'Thriller'],
   'movie': 'Underworld: Vzpoura Lycanů (2009) Akční|Fantasy|Horor|Thriller',
   'movie_id': 65682,
   'movie_idx': '650',
   'url': '/assets/utils/ml-latest/img/65682.jpg',
   'variant': 1}],
 'context': {'url': 'http://hmon.ms.mff.cuni.cz:5000/plugin1/compare-algorithms',
  'time': '2023-02-05T18:46:36.111Z',
  'viewport': {'left': 0,
   'top': -620,
   'width': 1249.3333740234375,
   'height': 1856.3958740234375},
  'extra': {'variant': 1}}}


# adding information on corresponding MovieID
def getSelectedMovieId(x):
    return json.loads(x)["selected_item"]["movie_id"]

selected_item_interactions["movieID"] = np.nan
selected_item_interactions.movieID = selected_item_interactions.data.map(lambda x: getSelectedMovieId(x))


# adding information on which algorithm is responsible for the selection
selected_item_interactions["selected_algorithm"] = "GAMMA"
selected_item_interactions.loc[selected_item_interactions.variant == selected_item_interactions.DELTA, "selected_algorithm"] = "DELTA"


selected_item_interactions.head()

	id	participation	interaction_type	time	data
2718	3219	36	loaded-page	2023-01-15 23:13:22.508734	{"page": "preference_elicitation", "context": ...
2719	3220	36	changed-viewport	2023-01-15 23:13:27.502013	{"viewport": {"left": 0, "top": 0, "width": 25...
2720	3221	36	on-input	2023-01-15 23:13:31.569834	{"search_text_box_value": "potter", "context":...
2721	3222	36	on-input	2023-01-15 23:13:31.580454	{"id": "", "text": "Search", "name": "search",...
2722	3223	36	selected-item	2023-01-15 23:13:32.643579	{"selected_item": {"movieName": "Harry Potter ...

	age_group	gender	education	ml_familiar	user_study_id	time_joined	time_finished	uuid	language
id
36	21.0	0.0	4.0	True	13	2023-01-15 23:13:22.047657	2023-01-15 23:20:18.022112	JQpLq2n0cE-86IMSaaIuig	en
37	29.0	0.0	5.0	True	13	2023-01-15 23:19:44.236601	2023-01-15 23:27:10.655670	H43BQ14jGPykvJK-BPMPlg	en
39	21.0	0.0	2.0	True	13	2023-01-16 17:04:24.403909	2023-01-16 17:20:09.564196	Bunsc02cUiXVrGE_1R7StQ	en
40	21.0	1.0	2.0	False	13	2023-01-16 18:59:21.828438	2023-01-16 19:12:04.004289	9fKA90WNfh5mzgD_v1Otgg	cs
42	21.0	0.0	4.0	False	13	2023-01-16 23:14:00.585090	2023-01-16 23:23:47.331900	pVVFqHS8_nWp0uISHV61qg	en

Working with study results in EasyStudy¶

Study details:¶

Your task:¶

Pre-process Participants data¶

filter-out test users¶

filter-out unfinished participations¶

remove sensitive data and save for further usage¶

Pre-process Interactions¶

Only get interactions of non-dummy-data participants who completed the study¶

Toy dataset for faster downloads (using only 20 participants)¶

Load data¶

Enrich the interactions data frame¶

Filter only the information about item selections¶

Task 1: which algorithm (GAMMA or DELTA) attracted more selections?¶

Task 1.1: were there some differences if the algorithm was displayed at (dis)advantaged position?¶

Task 1.2: were there some differences w.r.t. result_layout and (dis)advantaged position?¶

Task 2: what are average DCG scores for both GAMMA and DELTA?¶

Task 3: does GAMMA or DELTA substantially differ in the diversity, novelty, or popularity lift of provided recommendations?¶

	participation	interaction_type	time	data	iteration	result_layout	GAMMA	DELTA	variant
id
3271	36	selected-item	2023-01-15 23:15:59.742434	{"selected_item": {"genres": ["Adventure", "Fa...	1.0	row-single-scrollable	1.0	0.0	0
3272	36	selected-item	2023-01-15 23:16:00.293013	{"selected_item": {"genres": ["Adventure", "Fa...	1.0	row-single-scrollable	1.0	0.0	0
3273	36	selected-item	2023-01-15 23:16:00.636370	{"selected_item": {"genres": ["Action", "Adven...	1.0	row-single-scrollable	1.0	0.0	0
3274	36	selected-item	2023-01-15 23:16:02.500438	{"selected_item": {"genres": ["Adventure", "Fa...	1.0	row-single-scrollable	1.0	0.0	0
3277	36	selected-item	2023-01-15 23:16:09.472159	{"selected_item": {"genres": ["Fantasy"], "mov...	1.0	row-single-scrollable	1.0	0.0	1