Two main tables: participants and interactions
import pandas as pd
import numpy as np
import os
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
df_participation = pd.read_csv("participation-export.csv", index_col=0)
df_participation = df_participation.loc[31:] df_participation.participant_email = df_participation.participant_email.fillna("") df_participation = df_participation[~df_participation.participant_email.str.contains("testuser")] df_participation.shape
df_participation = df_participation[df_participation.age_group.notna()] df_completed_participation = df_participation[df_participation.time_finished.notna()] df_uncompleted_participation = df_participation[df_participation.time_finished.isna()]
df_completed_participation = df_completed_participation.drop(["participant_email","extra_data"], axis=1) df_completed_participation.to_csv("participation-export_filtered.csv")
df_interaction = pd.read_json("interaction-export.json", encoding='utf-8')
def get_participants_interaction(df_i, df_p): return df_i[df_i.participation.isin(df_p.index)]
df_interaction = get_participants_interaction(df_interaction, df_completed_participation) df_interaction.to_json("interaction-export_filtered.json")
df_interactionTop20 = get_participants_interaction(df_interaction, df_completed_participation.iloc[0:20]) df_interactionTop20.to_json("interaction-export_filteredSmall.json")
df_interaction = pd.read_json("interaction-export_filtered.json", encoding='utf-8')
df_completed_participation = pd.read_csv("participation-export_filtered.csv", index_col=0)
df_interaction.head()
id | participation | interaction_type | time | data | |
---|---|---|---|---|---|
2718 | 3219 | 36 | loaded-page | 2023-01-15 23:13:22.508734 | {"page": "preference_elicitation", "context": ... |
2719 | 3220 | 36 | changed-viewport | 2023-01-15 23:13:27.502013 | {"viewport": {"left": 0, "top": 0, "width": 25... |
2720 | 3221 | 36 | on-input | 2023-01-15 23:13:31.569834 | {"search_text_box_value": "potter", "context":... |
2721 | 3222 | 36 | on-input | 2023-01-15 23:13:31.580454 | {"id": "", "text": "Search", "name": "search",... |
2722 | 3223 | 36 | selected-item | 2023-01-15 23:13:32.643579 | {"selected_item": {"movieName": "Harry Potter ... |
df_interaction.interaction_type.unique()
array(['loaded-page', 'changed-viewport', 'on-input', 'selected-item', 'elicitation-ended', 'iteration-started', 'iteration-ended', 'study-ended', 'deselected-item'], dtype=object)
df_completed_participation.head()
age_group | gender | education | ml_familiar | user_study_id | time_joined | time_finished | uuid | language | |
---|---|---|---|---|---|---|---|---|---|
id | |||||||||
36 | 21.0 | 0.0 | 4.0 | True | 13 | 2023-01-15 23:13:22.047657 | 2023-01-15 23:20:18.022112 | JQpLq2n0cE-86IMSaaIuig | en |
37 | 29.0 | 0.0 | 5.0 | True | 13 | 2023-01-15 23:19:44.236601 | 2023-01-15 23:27:10.655670 | H43BQ14jGPykvJK-BPMPlg | en |
39 | 21.0 | 0.0 | 2.0 | True | 13 | 2023-01-16 17:04:24.403909 | 2023-01-16 17:20:09.564196 | Bunsc02cUiXVrGE_1R7StQ | en |
40 | 21.0 | 1.0 | 2.0 | False | 13 | 2023-01-16 18:59:21.828438 | 2023-01-16 19:12:04.004289 | 9fKA90WNfh5mzgD_v1Otgg | cs |
42 | 21.0 | 0.0 | 4.0 | False | 13 | 2023-01-16 23:14:00.585090 | 2023-01-16 23:23:47.331900 | pVVFqHS8_nWp0uISHV61qg | en |
N_ITERATIONS = 8
def get_iteration(x):
return json.loads(x)["iteration"]
#This cell takes up to a minute or two to complete
import json
def set_iteration(row):
if row.interaction_type == "iteration-started" or row.interaction_type == "iteration-ended":
row['iteration'] = json.loads(row.data)['iteration']
else:
row['iteration'] = None
return row
def set_result_layout(row):
if row.interaction_type == "iteration-started":
row['result_layout'] = json.loads(row.data)['result_layout']
else:
row['result_layout'] = None
return row
#'algorithm_assignment': {'0': {'algorithm': 'relevance_based',
# 'name': 'gamma',
# 'order': 1},
# '1': {'algorithm': 'weighted_average', 'name': 'delta', 'order': 0}},
def set_mapping(row):
if row.interaction_type == 'iteration-started':
dat = json.loads(row.data)['algorithm_assignment'].values()
for mapping in dat:
row[mapping['name'].upper()] = mapping['order']
else:
row['GAMMA'] = None
row['DELTA'] = None
return row
d = df_interaction.copy()
d = d.set_index("id")
d = d.apply(set_iteration, axis=1).apply(set_result_layout, axis=1).apply(set_mapping, axis=1)
d['iteration'] = d.groupby(['participation'], sort=False)['iteration'].apply(lambda x: x.ffill())
d['result_layout'] = d.groupby(['participation'], sort=False)['result_layout'].apply(lambda x: x.ffill())
d['GAMMA'] = d.groupby(['participation'], sort=False)['GAMMA'].apply(lambda x: x.ffill())
d['DELTA'] = d.groupby(['participation'], sort=False)['DELTA'].apply(lambda x: x.ffill())
d = d[d.iteration.notna()]
#in case of problems with the code above, use the following:
d = pd.read_json("interaction-export_filteredEnriched.json", encoding='utf-8')
# how does the record with iteration-started looks like?
d.loc[26009]
participation 173 interaction_type iteration-started time 2023-01-20 10:33:24.402124 data {"iteration": 3, "weights": [0.333333333333333... iteration 3.0 result_layout rows GAMMA 1.0 DELTA 0.0 Name: 26009, dtype: object
# GAMMA and DELTA fields denote which algorithm was at advantaged (0) or disadvantaged (1) position.
# - Advantaged position denote left column, top row and so on
# the main payload reside in the "data" field
# it contains (among other) list of movies as appeared in the list for both gamma and delta algorithms
# full history of shown items also from previous iterations is available from "shown"
# movie_idx vs. movie_id: the ordering in our reduced dataset vs. the original ID from MovieLens
json.loads(d.loc[26009]["data"])
{'iteration': 3, 'weights': [0.33333333333333337, 0.33333333333333337, 0.33333333333333337], 'movies': {'gamma': {'movies': [{'movie': 'Valerian and the City of a Thousand Planets (2017)', 'url': '/assets/utils/ml-latest/img/173291.jpg', 'movie_idx': '1462', 'movie_id': 173291, 'genres': ['Action', 'Adventure', 'Sci-Fi']}, {'movie': 'Transcendence (2014)', 'url': '/assets/utils/ml-latest/img/110730.jpg', 'movie_idx': '1072', 'movie_id': 110730, 'genres': ['Drama', 'Sci-Fi', 'IMAX']}, {'movie': 'Rampage (2018)', 'url': '/assets/utils/ml-latest/img/186587.jpg', 'movie_idx': '1665', 'movie_id': 186587, 'genres': ['Action', 'Adventure', 'Sci-Fi']}, {'movie': 'Resident Evil: The Final Chapter (2017)', 'url': '/assets/utils/ml-latest/img/168498.jpg', 'movie_idx': '1429', 'movie_id': 168498, 'genres': ['Action', 'Horror', 'Sci-Fi']}, {'movie': 'Project Almanac (2015)', 'url': '/assets/utils/ml-latest/img/127096.jpg', 'movie_idx': '1191', 'movie_id': 127096, 'genres': ['Sci-Fi', 'Thriller']}, {'movie': 'Pixels (2015)', 'url': '/assets/utils/ml-latest/img/135137.jpg', 'movie_idx': '1229', 'movie_id': 135137, 'genres': ['Action', 'Comedy', 'Sci-Fi']}, {'movie': 'Independence Day: Resurgence (2016)', 'url': '/assets/utils/ml-latest/img/135567.jpg', 'movie_idx': '1238', 'movie_id': 135567, 'genres': ['Action', 'Adventure', 'Sci-Fi']}, {'movie': 'Transformers: The Last Knight (2017)', 'url': '/assets/utils/ml-latest/img/174585.jpg', 'movie_idx': '1468', 'movie_id': 174585, 'genres': ['Action', 'Adventure', 'Sci-Fi', 'Thriller']}, {'movie': 'Hansel & Gretel: Witch Hunters (2013)', 'url': '/assets/utils/ml-latest/img/100163.jpg', 'movie_idx': '966', 'movie_id': 100163, 'genres': ['Action', 'Fantasy', 'Horror', 'IMAX']}, {'movie': 'Seven Sisters (2017)', 'url': '/assets/utils/ml-latest/img/173925.jpg', 'movie_idx': '1464', 'movie_id': 173925, 'genres': ['Sci-Fi', 'Thriller']}], 'order': 1}, 'delta': {'movies': [{'movie': 'In Time (2011)', 'url': '/assets/utils/ml-latest/img/90405.jpg', 'movie_idx': '871', 'movie_id': 90405, 'genres': ['Crime', 'Sci-Fi', 'Thriller']}, {'movie': "Ender's Game (2013)", 'url': '/assets/utils/ml-latest/img/106002.jpg', 'movie_idx': '1034', 'movie_id': 106002, 'genres': ['Action', 'Adventure', 'Sci-Fi', 'IMAX']}, {'movie': 'Transcendence (2014)', 'url': '/assets/utils/ml-latest/img/110730.jpg', 'movie_idx': '1072', 'movie_id': 110730, 'genres': ['Drama', 'Sci-Fi', 'IMAX']}, {'movie': 'Lucy (2014)', 'url': '/assets/utils/ml-latest/img/111360.jpg', 'movie_idx': '1077', 'movie_id': 111360, 'genres': ['Action', 'Sci-Fi']}, {'movie': 'Passengers (2016)', 'url': '/assets/utils/ml-latest/img/166635.jpg', 'movie_idx': '1399', 'movie_id': 166635, 'genres': ['Adventure', 'Drama', 'Romance', 'Sci-Fi']}, {'movie': 'Riddick (2013)', 'url': '/assets/utils/ml-latest/img/104243.jpg', 'movie_idx': '1016', 'movie_id': 104243, 'genres': ['Action', 'Sci-Fi', 'Thriller', 'IMAX']}, {'movie': 'John Carter (2012)', 'url': '/assets/utils/ml-latest/img/93363.jpg', 'movie_idx': '900', 'movie_id': 93363, 'genres': ['Action', 'Adventure', 'Sci-Fi', 'IMAX']}, {'movie': 'Terminator Genisys (2015)', 'url': '/assets/utils/ml-latest/img/120799.jpg', 'movie_idx': '1170', 'movie_id': 120799, 'genres': ['Action', 'Adventure', 'Sci-Fi', 'Thriller']}, {'movie': 'Total Recall (2012)', 'url': '/assets/utils/ml-latest/img/95875.jpg', 'movie_idx': '923', 'movie_id': 95875, 'genres': ['Action', 'Sci-Fi', 'Thriller']}, {'movie': 'Elysium (2013)', 'url': '/assets/utils/ml-latest/img/103253.jpg', 'movie_idx': '998', 'movie_id': 103253, 'genres': ['Action', 'Drama', 'Sci-Fi', 'IMAX']}], 'order': 0}}, 'algorithm_assignment': {'0': {'algorithm': 'relevance_based', 'name': 'gamma', 'order': 1}, '1': {'algorithm': 'weighted_average', 'name': 'delta', 'order': 0}}, 'result_layout': 'rows', 'refinement_layout': '3', 'shown': {'relevance_based': [[1288, 1108, 1327, 972, 1084, 1367, 1326, 1204, 1377, 1107], [1422, 1236, 1346, 1409, 1386, 1471, 1352, 1433, 1403, 1349], [1462, 1072, 1665, 1429, 1191, 1229, 1238, 1468, 966, 1464]], 'weighted_average': [[1084, 1204, 1163, 972, 954, 973, 797, 1206, 1275, 1089], [1054, 1230, 1205, 1213, 1154, 813, 733, 1117, 764, 1166], [871, 1034, 1072, 1077, 1399, 1016, 900, 1170, 923, 998]]}}
# how does the record with iteration-ended looks like?
d.loc[3377]
participation 36 interaction_type iteration-ended time 2023-01-15 23:19:22.851985 data {"iteration": 6, "selected": [[1040, 314, 355,... iteration 6.0 result_layout columns GAMMA 1.0 DELTA 0.0 Name: 3377, dtype: object
json.loads(d.loc[3377]["data"])
#selected_variants: did the click appear on advanteged or disadvantaged algorithm?
#selected: sequence of all selected items (movie_idx) in all iterations so far
{'iteration': 6, 'selected': [[1040, 314, 355, 956, 1231, 468], [1155, 1228, 1142, 1039, 883], [1140, 1054], [293, 392, 885, 1345], [1417, 329], [345, 437]], 'new_weights': [0.33333333333333337, 0.33333333333333337, 0.33333333333333337], 'selected_variants': [[0, 0, 0, 0, 1, 1], [0, 0, 0, 0, 0], [1, 0], [0, 0, 0, 0], [0, 1], [0, 0]], 'dont_like_anything': [False, False, False, False, False, False], 'algorithm_comparison': ['third', 'third', 'fourth', 'second', 'third', 'first'], 'ratings': [{'gamma': 4.0, 'delta': 4.0}, {'gamma': 4.0, 'delta': 4.0}, {'gamma': 2.0, 'delta': 3.0}, {'gamma': 2.0, 'delta': 3.0}, {'gamma': 2.0, 'delta': 2.0}, {'gamma': 1.0, 'delta': 3.0}]}
# adding information on whether the selected item was displayed on an advantaged position (variant=0), or not (variant=1)
d["variant"] = -1
print(d.shape)
d.loc[d["interaction_type"] == "selected-item", "variant"] = d[d["interaction_type"] == "selected-item"].data.map(lambda x: json.loads(x)["selected_item"]).map(lambda x: x.get("variant", -1))
d = d.loc[d.iteration <= 8]
print(d.shape)
(30305, 9) (30264, 9)
selected_item_interactions = d[d.variant >= 0].copy()
selected_item_interactions.shape
(6980, 9)
selected_item_interactions.head()
participation | interaction_type | time | data | iteration | result_layout | GAMMA | DELTA | variant | |
---|---|---|---|---|---|---|---|---|---|
id | |||||||||
3271 | 36 | selected-item | 2023-01-15 23:15:59.742434 | {"selected_item": {"genres": ["Adventure", "Fa... | 1.0 | row-single-scrollable | 1.0 | 0.0 | 0 |
3272 | 36 | selected-item | 2023-01-15 23:16:00.293013 | {"selected_item": {"genres": ["Adventure", "Fa... | 1.0 | row-single-scrollable | 1.0 | 0.0 | 0 |
3273 | 36 | selected-item | 2023-01-15 23:16:00.636370 | {"selected_item": {"genres": ["Action", "Adven... | 1.0 | row-single-scrollable | 1.0 | 0.0 | 0 |
3274 | 36 | selected-item | 2023-01-15 23:16:02.500438 | {"selected_item": {"genres": ["Adventure", "Fa... | 1.0 | row-single-scrollable | 1.0 | 0.0 | 0 |
3277 | 36 | selected-item | 2023-01-15 23:16:09.472159 | {"selected_item": {"genres": ["Fantasy"], "mov... | 1.0 | row-single-scrollable | 1.0 | 0.0 | 1 |
# information available for selected-item interaction_type
#selected_item: current selection
#selected_items: previous selections
json.loads(selected_item_interactions.loc[47768]["data"])
{'selected_item': {'genres': ['Action', 'Fantasy', 'Horror', 'Thriller'], 'movie': 'Underworld: Vzpoura Lycanů (2009) Akční|Fantasy|Horor|Thriller', 'movie_id': 65682, 'movie_idx': '650', 'url': '/assets/utils/ml-latest/img/65682.jpg', 'variant': 1}, 'selected_items': [{'genres': ['Action', 'Comedy', 'IMAX'], 'movie': 'Noc v muzeu 2 (2009) Akční|Komedie|IMAX', 'movie_id': 68793, 'movie_idx': '674', 'url': '/assets/utils/ml-latest/img/68793.jpg', 'variant': 0}, {'genres': ['Action', 'Fantasy', 'Horror', 'IMAX'], 'movie': 'Underworld: Probuzení (2012) Akční|Fantasy|Horor|IMAX', 'movie_id': 91974, 'movie_idx': '890', 'url': '/assets/utils/ml-latest/img/91974.jpg', 'variant': 0}, {'genres': ['Action', 'Fantasy', 'Horror'], 'movie': 'Underworld: Evolution (2006) Akční|Fantasy|Horor', 'movie_id': 42738, 'movie_idx': '472', 'url': '/assets/utils/ml-latest/img/42738.jpg', 'variant': 1}, {'genres': ['Action', 'Fantasy', 'Horror', 'Thriller'], 'movie': 'Underworld: Vzpoura Lycanů (2009) Akční|Fantasy|Horor|Thriller', 'movie_id': 65682, 'movie_idx': '650', 'url': '/assets/utils/ml-latest/img/65682.jpg', 'variant': 1}], 'context': {'url': 'http://hmon.ms.mff.cuni.cz:5000/plugin1/compare-algorithms', 'time': '2023-02-05T18:46:36.111Z', 'viewport': {'left': 0, 'top': -620, 'width': 1249.3333740234375, 'height': 1856.3958740234375}, 'extra': {'variant': 1}}}
# adding information on corresponding MovieID
def getSelectedMovieId(x):
return json.loads(x)["selected_item"]["movie_id"]
selected_item_interactions["movieID"] = np.nan
selected_item_interactions.movieID = selected_item_interactions.data.map(lambda x: getSelectedMovieId(x))
# adding information on which algorithm is responsible for the selection
selected_item_interactions["selected_algorithm"] = "GAMMA"
selected_item_interactions.loc[selected_item_interactions.variant == selected_item_interactions.DELTA, "selected_algorithm"] = "DELTA"
selected_item_interactions.head()
participation | interaction_type | time | data | iteration | result_layout | GAMMA | DELTA | variant | movieID | selected_algorithm | |
---|---|---|---|---|---|---|---|---|---|---|---|
id | |||||||||||
3271 | 36 | selected-item | 2023-01-15 23:15:59.742434 | {"selected_item": {"genres": ["Adventure", "Fa... | 1.0 | row-single-scrollable | 1.0 | 0.0 | 0 | 106489 | DELTA |
3272 | 36 | selected-item | 2023-01-15 23:16:00.293013 | {"selected_item": {"genres": ["Adventure", "Fa... | 1.0 | row-single-scrollable | 1.0 | 0.0 | 0 | 5952 | DELTA |
3273 | 36 | selected-item | 2023-01-15 23:16:00.636370 | {"selected_item": {"genres": ["Action", "Adven... | 1.0 | row-single-scrollable | 1.0 | 0.0 | 0 | 7153 | DELTA |
3274 | 36 | selected-item | 2023-01-15 23:16:02.500438 | {"selected_item": {"genres": ["Adventure", "Fa... | 1.0 | row-single-scrollable | 1.0 | 0.0 | 0 | 98809 | DELTA |
3277 | 36 | selected-item | 2023-01-15 23:16:09.472159 | {"selected_item": {"genres": ["Fantasy"], "mov... | 1.0 | row-single-scrollable | 1.0 | 0.0 | 1 | 135143 | GAMMA |