derek-thomas HF staff commited on
Commit
94acd5b
1 Parent(s): ff9b905

Added web-links for images, and polished output

Browse files
Files changed (1) hide show
  1. notebooks/04_get_embeddings.ipynb +5 -424
notebooks/04_get_embeddings.ipynb CHANGED
@@ -892,428 +892,6 @@
892
  "!echo \"$folder_out_str\" && cat \"$folder_out_str\"/*.ndjson | wc -l"
893
  ]
894
  },
895
- {
896
- "cell_type": "code",
897
- "execution_count": 12,
898
- "id": "cdee2b1c-0493-4b3e-8ecb-9d79109c756e",
899
- "metadata": {
900
- "collapsed": true,
901
- "jupyter": {
902
- "outputs_hidden": true
903
- },
904
- "tags": []
905
- },
906
- "outputs": [
907
- {
908
- "data": {
909
- "text/plain": [
910
- "{'content': 'عشاء كارين هو من سلسلة مطاعم استرالية يهدف عمداً عن تجربة تناول طعام غير سارَة ويتم توجيه الموظفين لإهانة العملاء طوال وجباتهم.\\nاقتبس اسم المطعم من المصطلح العامي على الإنترنت (كارين) والذي يستخدم لوصف امرأة بيضاء مسنة وقحة بشكل نمطي.\\nتاريخ المطعم.\\nتم إنشاء السلسلة في أستراليا (سيدني) في عام 2021 من قبل إيدين ليفن وجيمس فاريل. المطعم ذو طابع خاص يعتمد على خدمة تجربة طعام غير سارة حيث يدفع العملاء للموظفين لإهانتهم وكان من المفترض ان يكون المطعم مطعماً منبثقاً لمدة ستة أشهر في وورلد سكوير.\\nاثارت فكرة المطعم في البداية ردات فعل متغايرة مما أثار الخوف بشأن ما إذا كانت الإهانات المتبادلة من الممكن ان تعرض الموظفين لسوء المعاملة من قبل العملاء.\\nاسم (كارين) هو إشارة إلى الإسم المستخدم في الميمات (النكت التي تنشهر بسرعة في مواقع التواصل) لوصف امرأة بيضاء في منتصف العمر ووقحة بشكل نمطي.\\nيطلب من الموظفين ارتداء شخصية وقحة والسخرية من العملاء بشكل هزلي اثناء تناول وجباتهم ومن المتوقع ان يعيد العملاء هذا السلوك من خلال التصرف بوقاحة تجاه الموظفين ومع ذلك يُحظر على العملاء والموظفين استخدام الإهانات العنصرية أو التحيز الجنسي أو رهاب المثلية الجنسية.\\nتتضمن العديد من هذه التبادلات لغة نابية ويجب ان يكون برفقة الاشخاص اللذين يقلون عن 16 عاماََ بالغين.\\nكما يمكن لمالكي بطاقة هوية تظهر ان اسمهم كارين الحصول على مشروب مجاني.\\n',\n",
911
- " 'content_type': 'text',\n",
912
- " 'score': None,\n",
913
- " 'meta': {'id': '8974231',\n",
914
- " 'revid': '593870',\n",
915
- " 'url': 'https://ar.wikipedia.org/wiki?curid=8974231',\n",
916
- " 'title': 'مطعم عشاء كارين',\n",
917
- " '_split_id': 0,\n",
918
- " '_split_overlap': [{'doc_id': '288196225044b53e6ff86f2485257a0a',\n",
919
- " 'range': [790, 1225]}]},\n",
920
- " 'id_hash_keys': ['content'],\n",
921
- " 'embedding': [0.053985596,\n",
922
- " -0.06933594,\n",
923
- " -0.046417236,\n",
924
- " 0.07788086,\n",
925
- " -0.06768799,\n",
926
- " -0.01789856,\n",
927
- " 0.03265381,\n",
928
- " -0.1060791,\n",
929
- " 0.046325684,\n",
930
- " 0.00022745132,\n",
931
- " 0.01524353,\n",
932
- " 0.021408081,\n",
933
- " -0.039398193,\n",
934
- " -0.076049805,\n",
935
- " -0.058380127,\n",
936
- " -0.027786255,\n",
937
- " 0.067993164,\n",
938
- " -0.00894928,\n",
939
- " 0.11645508,\n",
940
- " 0.09039307,\n",
941
- " 0.032684326,\n",
942
- " -0.023635864,\n",
943
- " 0.00970459,\n",
944
- " 0.08312988,\n",
945
- " 0.01638794,\n",
946
- " -0.058380127,\n",
947
- " 0.0501709,\n",
948
- " 0.035583496,\n",
949
- " -0.042297363,\n",
950
- " -0.011688232,\n",
951
- " 0.010314941,\n",
952
- " 0.026626587,\n",
953
- " 0.039978027,\n",
954
- " 0.04095459,\n",
955
- " 0.039398193,\n",
956
- " -0.029754639,\n",
957
- " 0.01360321,\n",
958
- " -0.0015621185,\n",
959
- " -0.009384155,\n",
960
- " 0.043151855,\n",
961
- " 0.09710693,\n",
962
- " -0.0044937134,\n",
963
- " 0.0059928894,\n",
964
- " -0.07293701,\n",
965
- " -0.012710571,\n",
966
- " 0.013046265,\n",
967
- " -0.08099365,\n",
968
- " 0.015457153,\n",
969
- " -0.037963867,\n",
970
- " -0.016601562,\n",
971
- " -0.09564209,\n",
972
- " 0.038238525,\n",
973
- " -0.020751953,\n",
974
- " -0.016494751,\n",
975
- " 0.05529785,\n",
976
- " -0.024490356,\n",
977
- " 0.011192322,\n",
978
- " 0.021087646,\n",
979
- " -0.1274414,\n",
980
- " 0.04840088,\n",
981
- " 0.068603516,\n",
982
- " -0.026535034,\n",
983
- " 0.010772705,\n",
984
- " -0.03778076,\n",
985
- " -0.046447754,\n",
986
- " 0.012870789,\n",
987
- " -0.008674622,\n",
988
- " 0.062561035,\n",
989
- " -0.06188965,\n",
990
- " -0.05831909,\n",
991
- " -0.05557251,\n",
992
- " 0.018417358,\n",
993
- " -0.028656006,\n",
994
- " 0.015487671,\n",
995
- " 0.0146865845,\n",
996
- " -0.010131836,\n",
997
- " 0.06652832,\n",
998
- " -0.09710693,\n",
999
- " -0.012542725,\n",
1000
- " 0.04815674,\n",
1001
- " 0.019470215,\n",
1002
- " 0.0017337799,\n",
1003
- " -0.0069770813,\n",
1004
- " -0.027648926,\n",
1005
- " -0.103149414,\n",
1006
- " -0.019058228,\n",
1007
- " -0.049926758,\n",
1008
- " -0.05307007,\n",
1009
- " 0.13562012,\n",
1010
- " 0.016311646,\n",
1011
- " -0.10913086,\n",
1012
- " -0.04837036,\n",
1013
- " 0.033996582,\n",
1014
- " -0.042144775,\n",
1015
- " 0.043060303,\n",
1016
- " -0.10797119,\n",
1017
- " -0.087402344,\n",
1018
- " -0.051452637,\n",
1019
- " 0.018203735,\n",
1020
- " 0.15771484,\n",
1021
- " -0.025131226,\n",
1022
- " 0.074401855,\n",
1023
- " -0.0033817291,\n",
1024
- " -0.026138306,\n",
1025
- " 0.032165527,\n",
1026
- " -0.010025024,\n",
1027
- " -0.016189575,\n",
1028
- " -0.001121521,\n",
1029
- " -0.049560547,\n",
1030
- " -0.034057617,\n",
1031
- " -0.04336548,\n",
1032
- " 0.11694336,\n",
1033
- " -0.035949707,\n",
1034
- " -0.061767578,\n",
1035
- " -0.101623535,\n",
1036
- " 0.0051116943,\n",
1037
- " 0.039245605,\n",
1038
- " 0.0053977966,\n",
1039
- " 0.016708374,\n",
1040
- " -0.012908936,\n",
1041
- " -0.022369385,\n",
1042
- " 0.03475952,\n",
1043
- " 0.035583496,\n",
1044
- " 0.046417236,\n",
1045
- " -0.06097412,\n",
1046
- " 0.07409668,\n",
1047
- " -0.07312012,\n",
1048
- " -0.019683838,\n",
1049
- " -0.07086182,\n",
1050
- " -0.03967285,\n",
1051
- " -0.016937256,\n",
1052
- " -0.02658081,\n",
1053
- " 0.105651855,\n",
1054
- " 0.05831909,\n",
1055
- " 0.03778076,\n",
1056
- " -0.07543945,\n",
1057
- " 0.055267334,\n",
1058
- " 0.012268066,\n",
1059
- " 0.07720947,\n",
1060
- " -0.068359375,\n",
1061
- " -0.011100769,\n",
1062
- " -0.0072898865,\n",
1063
- " 0.051635742,\n",
1064
- " 0.027954102,\n",
1065
- " 0.043121338,\n",
1066
- " 0.032440186,\n",
1067
- " 0.081604004,\n",
1068
- " -0.058807373,\n",
1069
- " -0.048706055,\n",
1070
- " 0.07867432,\n",
1071
- " 0.014404297,\n",
1072
- " -0.0024490356,\n",
1073
- " -0.008575439,\n",
1074
- " 0.010345459,\n",
1075
- " -0.013870239,\n",
1076
- " -0.019424438,\n",
1077
- " 0.020889282,\n",
1078
- " 0.013290405,\n",
1079
- " 0.012817383,\n",
1080
- " 0.015930176,\n",
1081
- " 0.03768921,\n",
1082
- " -0.0012264252,\n",
1083
- " 0.0010223389,\n",
1084
- " -0.023483276,\n",
1085
- " -0.005252838,\n",
1086
- " -0.051574707,\n",
1087
- " 0.034729004,\n",
1088
- " -0.004081726,\n",
1089
- " 0.0317688,\n",
1090
- " 0.0087890625,\n",
1091
- " 0.11047363,\n",
1092
- " 0.05291748,\n",
1093
- " -0.12841797,\n",
1094
- " 0.031799316,\n",
1095
- " -0.09881592,\n",
1096
- " 0.07299805,\n",
1097
- " 0.02859497,\n",
1098
- " 0.024780273,\n",
1099
- " 0.049316406,\n",
1100
- " -0.07122803,\n",
1101
- " 0.03930664,\n",
1102
- " 0.012702942,\n",
1103
- " 0.06915283,\n",
1104
- " -0.03967285,\n",
1105
- " 0.035949707,\n",
1106
- " -0.045166016,\n",
1107
- " 0.09625244,\n",
1108
- " 0.001203537,\n",
1109
- " 0.022750854,\n",
1110
- " -0.03665161,\n",
1111
- " -0.013633728,\n",
1112
- " -0.018112183,\n",
1113
- " 0.054107666,\n",
1114
- " -0.007106781,\n",
1115
- " 0.004924774,\n",
1116
- " -0.014953613,\n",
1117
- " 0.07147217,\n",
1118
- " -0.013092041,\n",
1119
- " 0.016845703,\n",
1120
- " -0.002910614,\n",
1121
- " -0.05593872,\n",
1122
- " 0.027404785,\n",
1123
- " 0.002696991,\n",
1124
- " 0.05822754,\n",
1125
- " 0.0066566467,\n",
1126
- " -0.09729004,\n",
1127
- " 0.040100098,\n",
1128
- " -0.00868988,\n",
1129
- " 0.10290527,\n",
1130
- " 0.04144287,\n",
1131
- " -0.012680054,\n",
1132
- " 0.039215088,\n",
1133
- " -0.14074707,\n",
1134
- " 0.08215332,\n",
1135
- " -0.05078125,\n",
1136
- " -0.028549194,\n",
1137
- " 0.011962891,\n",
1138
- " 0.028900146,\n",
1139
- " -0.02444458,\n",
1140
- " 0.004207611,\n",
1141
- " -0.00995636,\n",
1142
- " 0.028717041,\n",
1143
- " -0.08325195,\n",
1144
- " -0.047424316,\n",
1145
- " 0.032043457,\n",
1146
- " -0.04675293,\n",
1147
- " -0.064575195,\n",
1148
- " -0.03857422,\n",
1149
- " 0.0070266724,\n",
1150
- " -0.12634277,\n",
1151
- " -0.0803833,\n",
1152
- " -0.05419922,\n",
1153
- " 0.064331055,\n",
1154
- " -0.004421234,\n",
1155
- " -0.00844574,\n",
1156
- " -0.05923462,\n",
1157
- " 0.052490234,\n",
1158
- " 0.032592773,\n",
1159
- " 0.024230957,\n",
1160
- " 0.075683594,\n",
1161
- " 0.011390686,\n",
1162
- " 0.013252258,\n",
1163
- " -0.029403687,\n",
1164
- " -0.03338623,\n",
1165
- " -0.045928955,\n",
1166
- " 0.015022278,\n",
1167
- " -0.08343506,\n",
1168
- " 0.060180664,\n",
1169
- " 0.076171875,\n",
1170
- " 0.058898926,\n",
1171
- " 0.026184082,\n",
1172
- " -0.04031372,\n",
1173
- " -0.0847168,\n",
1174
- " -0.06628418,\n",
1175
- " -0.017974854,\n",
1176
- " -0.09967041,\n",
1177
- " 0.07952881,\n",
1178
- " 0.012413025,\n",
1179
- " 0.04006958,\n",
1180
- " 0.07788086,\n",
1181
- " 0.014640808,\n",
1182
- " -0.029281616,\n",
1183
- " -0.04949951,\n",
1184
- " 0.012565613,\n",
1185
- " 0.025466919,\n",
1186
- " -0.018478394,\n",
1187
- " -0.072753906,\n",
1188
- " 0.08905029,\n",
1189
- " 0.023849487,\n",
1190
- " 0.012306213,\n",
1191
- " -0.089538574,\n",
1192
- " -0.05657959,\n",
1193
- " 0.11804199,\n",
1194
- " -0.035827637,\n",
1195
- " 0.049194336,\n",
1196
- " -0.008911133,\n",
1197
- " 0.09680176,\n",
1198
- " -0.03616333,\n",
1199
- " -0.057525635,\n",
1200
- " 0.03375244,\n",
1201
- " -0.027435303,\n",
1202
- " 0.0035476685,\n",
1203
- " 0.010269165,\n",
1204
- " 0.031921387,\n",
1205
- " 0.0011024475,\n",
1206
- " 0.045715332,\n",
1207
- " -0.026885986,\n",
1208
- " 0.033935547,\n",
1209
- " 0.06341553,\n",
1210
- " 0.019958496,\n",
1211
- " 0.008239746,\n",
1212
- " 0.015174866,\n",
1213
- " -0.023071289,\n",
1214
- " 0.0056762695,\n",
1215
- " 0.064575195,\n",
1216
- " 0.0042533875,\n",
1217
- " -0.05718994,\n",
1218
- " 0.04486084,\n",
1219
- " 0.020614624,\n",
1220
- " 0.01461792,\n",
1221
- " -0.09283447,\n",
1222
- " 0.019592285,\n",
1223
- " -0.022644043,\n",
1224
- " 0.011512756,\n",
1225
- " -0.005874634,\n",
1226
- " -0.018569946,\n",
1227
- " 0.006614685,\n",
1228
- " 0.009269714,\n",
1229
- " -0.04296875,\n",
1230
- " -0.052856445,\n",
1231
- " 0.084106445,\n",
1232
- " 0.0043563843,\n",
1233
- " -0.020721436,\n",
1234
- " 0.029022217,\n",
1235
- " 0.03982544,\n",
1236
- " -0.109436035,\n",
1237
- " -0.036071777,\n",
1238
- " 0.03253174,\n",
1239
- " 0.011558533,\n",
1240
- " -0.10650635,\n",
1241
- " 0.034454346,\n",
1242
- " -0.06951904,\n",
1243
- " -0.025817871,\n",
1244
- " 0.10668945,\n",
1245
- " 0.010101318,\n",
1246
- " -0.070739746,\n",
1247
- " 0.049621582,\n",
1248
- " -0.09057617,\n",
1249
- " 0.037231445,\n",
1250
- " -0.03152466,\n",
1251
- " -0.043914795,\n",
1252
- " 0.07507324,\n",
1253
- " 0.061645508,\n",
1254
- " 0.0085372925,\n",
1255
- " 0.004142761,\n",
1256
- " -0.051971436,\n",
1257
- " -0.05480957,\n",
1258
- " 0.0030975342,\n",
1259
- " -0.046875,\n",
1260
- " -0.039398193,\n",
1261
- " 0.08782959,\n",
1262
- " -0.012550354,\n",
1263
- " -0.003955841,\n",
1264
- " -0.07775879,\n",
1265
- " -0.021133423,\n",
1266
- " 0.0062713623,\n",
1267
- " -0.02255249,\n",
1268
- " 0.017868042,\n",
1269
- " 0.049560547,\n",
1270
- " 0.028121948,\n",
1271
- " 0.031707764,\n",
1272
- " 0.041168213,\n",
1273
- " 0.009559631,\n",
1274
- " 0.036956787,\n",
1275
- " 0.008987427,\n",
1276
- " 0.0024776459,\n",
1277
- " -0.003440857,\n",
1278
- " -0.0067749023,\n",
1279
- " -0.06439209,\n",
1280
- " -0.010902405,\n",
1281
- " -0.07104492,\n",
1282
- " 0.006214142,\n",
1283
- " -0.06359863,\n",
1284
- " 0.062316895,\n",
1285
- " 0.005367279,\n",
1286
- " 0.015197754,\n",
1287
- " -0.043182373,\n",
1288
- " 0.050933838,\n",
1289
- " 0.0035800934,\n",
1290
- " 0.0032138824,\n",
1291
- " -0.017974854,\n",
1292
- " 0.08972168,\n",
1293
- " 0.011268616,\n",
1294
- " 0.020477295,\n",
1295
- " -0.05050659,\n",
1296
- " -0.07232666,\n",
1297
- " 0.07055664,\n",
1298
- " -0.010002136,\n",
1299
- " 0.11480713,\n",
1300
- " 0.02130127,\n",
1301
- " 0.039093018,\n",
1302
- " 0.009597778,\n",
1303
- " -0.0619812,\n",
1304
- " -0.016952515],\n",
1305
- " 'id': '1af84f3b4cc6a9f1018f2f80b4fd3ba7'}"
1306
- ]
1307
- },
1308
- "execution_count": 12,
1309
- "metadata": {},
1310
- "output_type": "execute_result"
1311
- }
1312
- ],
1313
- "source": [
1314
- "documents[0]"
1315
- ]
1316
- },
1317
  {
1318
  "cell_type": "markdown",
1319
  "id": "93d6ab01-bd3b-479d-918d-2bdb30b00fac",
@@ -1321,12 +899,15 @@
1321
  "source": [
1322
  "# Performance and Cost Analysis\n",
1323
  "You can see that we are quite cost effective!\n",
1324
- "![Cost](../media/arabic-rag-embeddings-cost.png)\n",
 
 
1325
  "Note that the performance is over just the last 30 min window.\n",
1326
  "Observations:\n",
1327
  "- We have a througput of `~333/s`\n",
1328
  "- Our median latency per request is `~50ms`\n",
1329
- "![Metrics](../media/arabic-rag-embeddings-metrics.png)"
 
1330
  ]
1331
  },
1332
  {
 
892
  "!echo \"$folder_out_str\" && cat \"$folder_out_str\"/*.ndjson | wc -l"
893
  ]
894
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
895
  {
896
  "cell_type": "markdown",
897
  "id": "93d6ab01-bd3b-479d-918d-2bdb30b00fac",
 
899
  "source": [
900
  "# Performance and Cost Analysis\n",
901
  "You can see that we are quite cost effective!\n",
902
+ "\n",
903
+ "![Cost](https://huggingface.co/spaces/derek-thomas/arabic-RAG/resolve/main/media/arabic-rag-embeddings-cost.png)\n",
904
+ "\n",
905
  "Note that the performance is over just the last 30 min window.\n",
906
  "Observations:\n",
907
  "- We have a througput of `~333/s`\n",
908
  "- Our median latency per request is `~50ms`\n",
909
+ "\n",
910
+ "![Metrics](https://huggingface.co/spaces/derek-thomas/arabic-RAG/resolve/main/media/arabic-rag-embeddings-metrics.png)"
911
  ]
912
  },
913
  {