Two main tables: participants and interactions
import pandas as pd
import numpy as np
import os
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
df_participation = pd.read_csv("participation-export.csv", index_col=0)
df_participation = df_participation.loc[31:] df_participation.participant_email = df_participation.participant_email.fillna("") df_participation = df_participation[~df_participation.participant_email.str.contains("testuser")] df_participation.shape
df_participation = df_participation[df_participation.age_group.notna()] df_completed_participation = df_participation[df_participation.time_finished.notna()] df_uncompleted_participation = df_participation[df_participation.time_finished.isna()]
df_completed_participation = df_completed_participation.drop(["participant_email","extra_data"], axis=1) df_completed_participation.to_csv("participation-export_filtered.csv")
df_interaction = pd.read_json("interaction-export.json", encoding='utf-8')
def get_participants_interaction(df_i, df_p): return df_i[df_i.participation.isin(df_p.index)]
df_interaction = get_participants_interaction(df_interaction, df_completed_participation) df_interaction.to_json("interaction-export_filtered.json")
df_interactionTop20 = get_participants_interaction(df_interaction, df_completed_participation.iloc[0:20]) df_interactionTop20.to_json("interaction-export_filteredSmall.json")
df_interaction = pd.read_json("interaction-export_filtered.json", encoding='utf-8')
df_completed_participation = pd.read_csv("participation-export_filtered.csv", index_col=0)
df_interaction.head()
| id | participation | interaction_type | time | data | |
|---|---|---|---|---|---|
| 2718 | 3219 | 36 | loaded-page | 2023-01-15 23:13:22.508734 | {"page": "preference_elicitation", "context": ... |
| 2719 | 3220 | 36 | changed-viewport | 2023-01-15 23:13:27.502013 | {"viewport": {"left": 0, "top": 0, "width": 25... |
| 2720 | 3221 | 36 | on-input | 2023-01-15 23:13:31.569834 | {"search_text_box_value": "potter", "context":... |
| 2721 | 3222 | 36 | on-input | 2023-01-15 23:13:31.580454 | {"id": "", "text": "Search", "name": "search",... |
| 2722 | 3223 | 36 | selected-item | 2023-01-15 23:13:32.643579 | {"selected_item": {"movieName": "Harry Potter ... |
df_interaction.interaction_type.unique()
array(['loaded-page', 'changed-viewport', 'on-input', 'selected-item',
'elicitation-ended', 'iteration-started', 'iteration-ended',
'study-ended', 'deselected-item'], dtype=object)
df_completed_participation.head()
| age_group | gender | education | ml_familiar | user_study_id | time_joined | time_finished | uuid | language | |
|---|---|---|---|---|---|---|---|---|---|
| id | |||||||||
| 36 | 21.0 | 0.0 | 4.0 | True | 13 | 2023-01-15 23:13:22.047657 | 2023-01-15 23:20:18.022112 | JQpLq2n0cE-86IMSaaIuig | en |
| 37 | 29.0 | 0.0 | 5.0 | True | 13 | 2023-01-15 23:19:44.236601 | 2023-01-15 23:27:10.655670 | H43BQ14jGPykvJK-BPMPlg | en |
| 39 | 21.0 | 0.0 | 2.0 | True | 13 | 2023-01-16 17:04:24.403909 | 2023-01-16 17:20:09.564196 | Bunsc02cUiXVrGE_1R7StQ | en |
| 40 | 21.0 | 1.0 | 2.0 | False | 13 | 2023-01-16 18:59:21.828438 | 2023-01-16 19:12:04.004289 | 9fKA90WNfh5mzgD_v1Otgg | cs |
| 42 | 21.0 | 0.0 | 4.0 | False | 13 | 2023-01-16 23:14:00.585090 | 2023-01-16 23:23:47.331900 | pVVFqHS8_nWp0uISHV61qg | en |
N_ITERATIONS = 8
def get_iteration(x):
return json.loads(x)["iteration"]
#This cell takes up to a minute or two to complete
import json
def set_iteration(row):
if row.interaction_type == "iteration-started" or row.interaction_type == "iteration-ended":
row['iteration'] = json.loads(row.data)['iteration']
else:
row['iteration'] = None
return row
def set_result_layout(row):
if row.interaction_type == "iteration-started":
row['result_layout'] = json.loads(row.data)['result_layout']
else:
row['result_layout'] = None
return row
#'algorithm_assignment': {'0': {'algorithm': 'relevance_based',
# 'name': 'gamma',
# 'order': 1},
# '1': {'algorithm': 'weighted_average', 'name': 'delta', 'order': 0}},
def set_mapping(row):
if row.interaction_type == 'iteration-started':
dat = json.loads(row.data)['algorithm_assignment'].values()
for mapping in dat:
row[mapping['name'].upper()] = mapping['order']
else:
row['GAMMA'] = None
row['DELTA'] = None
return row
d = df_interaction.copy()
d = d.set_index("id")
d = d.apply(set_iteration, axis=1).apply(set_result_layout, axis=1).apply(set_mapping, axis=1)
d['iteration'] = d.groupby(['participation'], sort=False)['iteration'].apply(lambda x: x.ffill())
d['result_layout'] = d.groupby(['participation'], sort=False)['result_layout'].apply(lambda x: x.ffill())
d['GAMMA'] = d.groupby(['participation'], sort=False)['GAMMA'].apply(lambda x: x.ffill())
d['DELTA'] = d.groupby(['participation'], sort=False)['DELTA'].apply(lambda x: x.ffill())
d = d[d.iteration.notna()]
#in case of problems with the code above, use the following:
d = pd.read_json("interaction-export_filteredEnriched.json", encoding='utf-8')
# how does the record with iteration-started looks like?
d.loc[26009]
participation 173
interaction_type iteration-started
time 2023-01-20 10:33:24.402124
data {"iteration": 3, "weights": [0.333333333333333...
iteration 3.0
result_layout rows
GAMMA 1.0
DELTA 0.0
Name: 26009, dtype: object
# GAMMA and DELTA fields denote which algorithm was at advantaged (0) or disadvantaged (1) position.
# - Advantaged position denote left column, top row and so on
# the main payload reside in the "data" field
# it contains (among other) list of movies as appeared in the list for both gamma and delta algorithms
# full history of shown items also from previous iterations is available from "shown"
# movie_idx vs. movie_id: the ordering in our reduced dataset vs. the original ID from MovieLens
json.loads(d.loc[26009]["data"])
{'iteration': 3,
'weights': [0.33333333333333337, 0.33333333333333337, 0.33333333333333337],
'movies': {'gamma': {'movies': [{'movie': 'Valerian and the City of a Thousand Planets (2017)',
'url': '/assets/utils/ml-latest/img/173291.jpg',
'movie_idx': '1462',
'movie_id': 173291,
'genres': ['Action', 'Adventure', 'Sci-Fi']},
{'movie': 'Transcendence (2014)',
'url': '/assets/utils/ml-latest/img/110730.jpg',
'movie_idx': '1072',
'movie_id': 110730,
'genres': ['Drama', 'Sci-Fi', 'IMAX']},
{'movie': 'Rampage (2018)',
'url': '/assets/utils/ml-latest/img/186587.jpg',
'movie_idx': '1665',
'movie_id': 186587,
'genres': ['Action', 'Adventure', 'Sci-Fi']},
{'movie': 'Resident Evil: The Final Chapter (2017)',
'url': '/assets/utils/ml-latest/img/168498.jpg',
'movie_idx': '1429',
'movie_id': 168498,
'genres': ['Action', 'Horror', 'Sci-Fi']},
{'movie': 'Project Almanac (2015)',
'url': '/assets/utils/ml-latest/img/127096.jpg',
'movie_idx': '1191',
'movie_id': 127096,
'genres': ['Sci-Fi', 'Thriller']},
{'movie': 'Pixels (2015)',
'url': '/assets/utils/ml-latest/img/135137.jpg',
'movie_idx': '1229',
'movie_id': 135137,
'genres': ['Action', 'Comedy', 'Sci-Fi']},
{'movie': 'Independence Day: Resurgence (2016)',
'url': '/assets/utils/ml-latest/img/135567.jpg',
'movie_idx': '1238',
'movie_id': 135567,
'genres': ['Action', 'Adventure', 'Sci-Fi']},
{'movie': 'Transformers: The Last Knight (2017)',
'url': '/assets/utils/ml-latest/img/174585.jpg',
'movie_idx': '1468',
'movie_id': 174585,
'genres': ['Action', 'Adventure', 'Sci-Fi', 'Thriller']},
{'movie': 'Hansel & Gretel: Witch Hunters (2013)',
'url': '/assets/utils/ml-latest/img/100163.jpg',
'movie_idx': '966',
'movie_id': 100163,
'genres': ['Action', 'Fantasy', 'Horror', 'IMAX']},
{'movie': 'Seven Sisters (2017)',
'url': '/assets/utils/ml-latest/img/173925.jpg',
'movie_idx': '1464',
'movie_id': 173925,
'genres': ['Sci-Fi', 'Thriller']}],
'order': 1},
'delta': {'movies': [{'movie': 'In Time (2011)',
'url': '/assets/utils/ml-latest/img/90405.jpg',
'movie_idx': '871',
'movie_id': 90405,
'genres': ['Crime', 'Sci-Fi', 'Thriller']},
{'movie': "Ender's Game (2013)",
'url': '/assets/utils/ml-latest/img/106002.jpg',
'movie_idx': '1034',
'movie_id': 106002,
'genres': ['Action', 'Adventure', 'Sci-Fi', 'IMAX']},
{'movie': 'Transcendence (2014)',
'url': '/assets/utils/ml-latest/img/110730.jpg',
'movie_idx': '1072',
'movie_id': 110730,
'genres': ['Drama', 'Sci-Fi', 'IMAX']},
{'movie': 'Lucy (2014)',
'url': '/assets/utils/ml-latest/img/111360.jpg',
'movie_idx': '1077',
'movie_id': 111360,
'genres': ['Action', 'Sci-Fi']},
{'movie': 'Passengers (2016)',
'url': '/assets/utils/ml-latest/img/166635.jpg',
'movie_idx': '1399',
'movie_id': 166635,
'genres': ['Adventure', 'Drama', 'Romance', 'Sci-Fi']},
{'movie': 'Riddick (2013)',
'url': '/assets/utils/ml-latest/img/104243.jpg',
'movie_idx': '1016',
'movie_id': 104243,
'genres': ['Action', 'Sci-Fi', 'Thriller', 'IMAX']},
{'movie': 'John Carter (2012)',
'url': '/assets/utils/ml-latest/img/93363.jpg',
'movie_idx': '900',
'movie_id': 93363,
'genres': ['Action', 'Adventure', 'Sci-Fi', 'IMAX']},
{'movie': 'Terminator Genisys (2015)',
'url': '/assets/utils/ml-latest/img/120799.jpg',
'movie_idx': '1170',
'movie_id': 120799,
'genres': ['Action', 'Adventure', 'Sci-Fi', 'Thriller']},
{'movie': 'Total Recall (2012)',
'url': '/assets/utils/ml-latest/img/95875.jpg',
'movie_idx': '923',
'movie_id': 95875,
'genres': ['Action', 'Sci-Fi', 'Thriller']},
{'movie': 'Elysium (2013)',
'url': '/assets/utils/ml-latest/img/103253.jpg',
'movie_idx': '998',
'movie_id': 103253,
'genres': ['Action', 'Drama', 'Sci-Fi', 'IMAX']}],
'order': 0}},
'algorithm_assignment': {'0': {'algorithm': 'relevance_based',
'name': 'gamma',
'order': 1},
'1': {'algorithm': 'weighted_average', 'name': 'delta', 'order': 0}},
'result_layout': 'rows',
'refinement_layout': '3',
'shown': {'relevance_based': [[1288,
1108,
1327,
972,
1084,
1367,
1326,
1204,
1377,
1107],
[1422, 1236, 1346, 1409, 1386, 1471, 1352, 1433, 1403, 1349],
[1462, 1072, 1665, 1429, 1191, 1229, 1238, 1468, 966, 1464]],
'weighted_average': [[1084,
1204,
1163,
972,
954,
973,
797,
1206,
1275,
1089],
[1054, 1230, 1205, 1213, 1154, 813, 733, 1117, 764, 1166],
[871, 1034, 1072, 1077, 1399, 1016, 900, 1170, 923, 998]]}}
# how does the record with iteration-ended looks like?
d.loc[3377]
participation 36
interaction_type iteration-ended
time 2023-01-15 23:19:22.851985
data {"iteration": 6, "selected": [[1040, 314, 355,...
iteration 6.0
result_layout columns
GAMMA 1.0
DELTA 0.0
Name: 3377, dtype: object
json.loads(d.loc[3377]["data"])
#selected_variants: did the click appear on advanteged or disadvantaged algorithm?
#selected: sequence of all selected items (movie_idx) in all iterations so far
{'iteration': 6,
'selected': [[1040, 314, 355, 956, 1231, 468],
[1155, 1228, 1142, 1039, 883],
[1140, 1054],
[293, 392, 885, 1345],
[1417, 329],
[345, 437]],
'new_weights': [0.33333333333333337,
0.33333333333333337,
0.33333333333333337],
'selected_variants': [[0, 0, 0, 0, 1, 1],
[0, 0, 0, 0, 0],
[1, 0],
[0, 0, 0, 0],
[0, 1],
[0, 0]],
'dont_like_anything': [False, False, False, False, False, False],
'algorithm_comparison': ['third',
'third',
'fourth',
'second',
'third',
'first'],
'ratings': [{'gamma': 4.0, 'delta': 4.0},
{'gamma': 4.0, 'delta': 4.0},
{'gamma': 2.0, 'delta': 3.0},
{'gamma': 2.0, 'delta': 3.0},
{'gamma': 2.0, 'delta': 2.0},
{'gamma': 1.0, 'delta': 3.0}]}
# adding information on whether the selected item was displayed on an advantaged position (variant=0), or not (variant=1)
d["variant"] = -1
print(d.shape)
d.loc[d["interaction_type"] == "selected-item", "variant"] = d[d["interaction_type"] == "selected-item"].data.map(lambda x: json.loads(x)["selected_item"]).map(lambda x: x.get("variant", -1))
d = d.loc[d.iteration <= 8]
print(d.shape)
(30305, 9) (30264, 9)
selected_item_interactions = d[d.variant >= 0].copy()
selected_item_interactions.shape
(6980, 9)
selected_item_interactions.head()
| participation | interaction_type | time | data | iteration | result_layout | GAMMA | DELTA | variant | |
|---|---|---|---|---|---|---|---|---|---|
| id | |||||||||
| 3271 | 36 | selected-item | 2023-01-15 23:15:59.742434 | {"selected_item": {"genres": ["Adventure", "Fa... | 1.0 | row-single-scrollable | 1.0 | 0.0 | 0 |
| 3272 | 36 | selected-item | 2023-01-15 23:16:00.293013 | {"selected_item": {"genres": ["Adventure", "Fa... | 1.0 | row-single-scrollable | 1.0 | 0.0 | 0 |
| 3273 | 36 | selected-item | 2023-01-15 23:16:00.636370 | {"selected_item": {"genres": ["Action", "Adven... | 1.0 | row-single-scrollable | 1.0 | 0.0 | 0 |
| 3274 | 36 | selected-item | 2023-01-15 23:16:02.500438 | {"selected_item": {"genres": ["Adventure", "Fa... | 1.0 | row-single-scrollable | 1.0 | 0.0 | 0 |
| 3277 | 36 | selected-item | 2023-01-15 23:16:09.472159 | {"selected_item": {"genres": ["Fantasy"], "mov... | 1.0 | row-single-scrollable | 1.0 | 0.0 | 1 |
# information available for selected-item interaction_type
#selected_item: current selection
#selected_items: previous selections
json.loads(selected_item_interactions.loc[47768]["data"])
{'selected_item': {'genres': ['Action', 'Fantasy', 'Horror', 'Thriller'],
'movie': 'Underworld: Vzpoura Lycanů (2009) Akční|Fantasy|Horor|Thriller',
'movie_id': 65682,
'movie_idx': '650',
'url': '/assets/utils/ml-latest/img/65682.jpg',
'variant': 1},
'selected_items': [{'genres': ['Action', 'Comedy', 'IMAX'],
'movie': 'Noc v muzeu 2 (2009) Akční|Komedie|IMAX',
'movie_id': 68793,
'movie_idx': '674',
'url': '/assets/utils/ml-latest/img/68793.jpg',
'variant': 0},
{'genres': ['Action', 'Fantasy', 'Horror', 'IMAX'],
'movie': 'Underworld: Probuzení (2012) Akční|Fantasy|Horor|IMAX',
'movie_id': 91974,
'movie_idx': '890',
'url': '/assets/utils/ml-latest/img/91974.jpg',
'variant': 0},
{'genres': ['Action', 'Fantasy', 'Horror'],
'movie': 'Underworld: Evolution (2006) Akční|Fantasy|Horor',
'movie_id': 42738,
'movie_idx': '472',
'url': '/assets/utils/ml-latest/img/42738.jpg',
'variant': 1},
{'genres': ['Action', 'Fantasy', 'Horror', 'Thriller'],
'movie': 'Underworld: Vzpoura Lycanů (2009) Akční|Fantasy|Horor|Thriller',
'movie_id': 65682,
'movie_idx': '650',
'url': '/assets/utils/ml-latest/img/65682.jpg',
'variant': 1}],
'context': {'url': 'http://hmon.ms.mff.cuni.cz:5000/plugin1/compare-algorithms',
'time': '2023-02-05T18:46:36.111Z',
'viewport': {'left': 0,
'top': -620,
'width': 1249.3333740234375,
'height': 1856.3958740234375},
'extra': {'variant': 1}}}
# adding information on corresponding MovieID
def getSelectedMovieId(x):
return json.loads(x)["selected_item"]["movie_id"]
selected_item_interactions["movieID"] = np.nan
selected_item_interactions.movieID = selected_item_interactions.data.map(lambda x: getSelectedMovieId(x))
# adding information on which algorithm is responsible for the selection
selected_item_interactions["selected_algorithm"] = "GAMMA"
selected_item_interactions.loc[selected_item_interactions.variant == selected_item_interactions.DELTA, "selected_algorithm"] = "DELTA"
selected_item_interactions.head()
| participation | interaction_type | time | data | iteration | result_layout | GAMMA | DELTA | variant | movieID | selected_algorithm | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| id | |||||||||||
| 3271 | 36 | selected-item | 2023-01-15 23:15:59.742434 | {"selected_item": {"genres": ["Adventure", "Fa... | 1.0 | row-single-scrollable | 1.0 | 0.0 | 0 | 106489 | DELTA |
| 3272 | 36 | selected-item | 2023-01-15 23:16:00.293013 | {"selected_item": {"genres": ["Adventure", "Fa... | 1.0 | row-single-scrollable | 1.0 | 0.0 | 0 | 5952 | DELTA |
| 3273 | 36 | selected-item | 2023-01-15 23:16:00.636370 | {"selected_item": {"genres": ["Action", "Adven... | 1.0 | row-single-scrollable | 1.0 | 0.0 | 0 | 7153 | DELTA |
| 3274 | 36 | selected-item | 2023-01-15 23:16:02.500438 | {"selected_item": {"genres": ["Adventure", "Fa... | 1.0 | row-single-scrollable | 1.0 | 0.0 | 0 | 98809 | DELTA |
| 3277 | 36 | selected-item | 2023-01-15 23:16:09.472159 | {"selected_item": {"genres": ["Fantasy"], "mov... | 1.0 | row-single-scrollable | 1.0 | 0.0 | 1 | 135143 | GAMMA |