@inproceedings{koh24acl, title = {VisualWebArena: Evaluating Multimodal Agents on Realistic Visual Web Tasks}, author = {Jing Yu Koh and Robert Lo and Lawrence Jang and Vikram Duvvur and Ming Chong Lim and Po-Yu Huang and Graham Neubig and Shuyan Zhou and Ruslan Salakhutdinov and Daniel Fried}, booktitle = {Annual Conference of the Association for Computational Linguistics (ACL)}, address = {Bangkok, Thailand}, month = {August}, url = {https://arxiv.org/abs/2401.13649}, year = {2024} }