Carl Doersch: Selected Projects

TAPNext: Tracking Any Point (TAP) as Next Token Prediction

Artem Zholus, Carl Doersch, Yi Yang, Skanda Koppula, Viorica Patraucean, Xu He, Ignacio Rocco, Mehdi S. M. Sajjadi, Sarath Chandar, Ross Goroshin in ICCV 2025

[webpage] [arXiv] [github] [Show BibTex]

@inproceedings{zholus2025tapnext,
  title={TAPNext: Tracking Any Point (TAP) as Next Token Prediction},
  author={Zholus, Artem and Doersch, Carl and Yang, Yi and Koppula, Skanda and Patraucean, Viorica and He, Xu and Rocco, Ignacio and Sajjadi, Mehdi S. M. and Chandar, Sarath and Goroshin, Ross},
  booktitle={International Conference on Computer Vision (ICCV)},
  year={2025}
}

Direct Motion Models for Assessing Generated Videos

Kelsey Allen, Carl Doersch, Guangyao Zhou, Mohammed Suhail, Danny Driess, Ignacio Rocco, Yulia Rubanova, Thomas Kipf, Mehdi S. M. Sajjadi, Kevin Murphy, Joao Carreira, Sjoerd van Steenkiste in ICML 2025

[webpage] [arXiv] [github] [Show BibTex]

@inproceedings{allen2025direct,
  title={Direct Motion Models for Assessing Generated Videos},
  author={Allen, Kelsey and Doersch, Carl and Zhou, Guangyao and Suhail, Mohammed and Driess, Danny and Rocco, Ignacio and Rubanova, Yulia and Kipf, Thomas and Sajjadi, Mehdi S. M. and Murphy, Kevin and Carreira, Joao and van Steenkiste, Sjoerd},
  booktitle={International Conference on Machine Learning (ICML)},
  year={2025}
}

Motion Prompting: Controlling Video Generation with Motion Trajectories

Daniel Geng, Charles Herrmann, Junhwa Hur, Forrester Cole, Serena Zhang, Tobias Pfaff, Tatiana Lopez-Guevara, Carl Doersch, Yusuf Aytar, Michael Rubinstein, Chen Sun, Oliver Wang, Andrew Owens, Deqing Sun

[arXiv] [webpage] [Show BibTex]

@article{geng2024motion,
  title={Motion Prompting: Controlling Video Generation with Motion Trajectories},
  author={Geng, Daniel and Herrmann, Charles and Hur, Junhwa and Cole, Forrester and Zhang, Serena and Pfaff, Tobias and Lopez-Guevara, Tatiana and Doersch, Carl and Aytar, Yusuf and Rubinstein, Michael and Sun, Chen and Wang, Oliver and Owens, Andrew and Sun, Deqing},
  journal={arXiv preprint arXiv:2412.02700},
  year={2024}
}

Gen2Act: Human Video Generation in Novel Scenarios enables Generalizable Robot Manipulation

Homanga Ballav, Suneel Belkhale, Philipp Kühenbühl, Kanika Madan, Carl Doersch, Igor Mordatch, Deepak Pathak in CoRL 2025

[arXiv] [webpage] [Show BibTex]

@inproceedings{ballav2025gen2act,
  title={Gen2Act: Human Video Generation in Novel Scenarios enables Generalizable Robot Manipulation},
  author={Ballav, Homanga and Belkhale, Suneel and K{\"u}henb{\"u}hl, Philipp and Madan, Kanika and Doersch, Carl and Mordatch, Igor and Deepak Pathak},
  booktitle={Conference on Robot Learning (CoRL)},
  year={2025}
}

TAPVid-3D: A Benchmark for Tracking Any Point in 3D

Skanda Koppula, Ignacio Rocco, Yi Yang, Joe Heyward, Joao Carreira, Andrew Zisserman, Gabriel Brostow, Carl Doersch in NeurIPS 2024

[arXiv] [github] [webpage] [Show BibTex]

 @inproceedings{koppula2024tapvid,
  title={TAPVid-3D: A Benchmark for Tracking Any Point in 3D},
  author={Koppula, Skanda and Rocco, Ignacio and Yang, Yi and Heyward, Joe and Carreira, Joao and Zisserman, Andrew and Brostow, Gabriel and Doersch, Carl},
  booktitle={Neural Information Processing Systems (NeurIPS)},
  year={2024}
}

BootsTAP: Bootstrapped Training for Tracking-Any-Point

Carl Doersch, Pauline Luc, Yi Yang, Dilara Gokay, Skanda Koppula, Ankush Gupta, Joseph Heyward, Ignacio Rocco, Ross Goroshin, João Carreira, Andrew Zisserman in ACCV 2024

[arXiv] [github] [webpage] [Show BibTex]

@inproceedings{doersch2024bootstap,
  title={BootsTAP: Bootstrapped Training for Tracking-Any-Point},
  author={Doersch, Carl and Luc, Pauline and Yang, Yi and Gokay, Dilara and Koppula, Skanda and Gupta, Ankush and Heyward, Joseph and Rocco, Ignacio and Goroshin, Ross and Carreira, Jo{\~a}o and Zisserman, Andrew},
  booktitle={Asian Conference on Computer Vision (ACCV)},
  year={2024}
}

RoboTAP: Tracking Arbitrary Points for Few-Shot Visual Imitation

Mel Vecerik, Carl Doersch, Yi Yang, Todor Davchev, Yusuf Aytar, Guangyao Zhou, Raia Hadsell, Lourdes Agapito, Jon Scholz in ICRA 2024

[arXiv] [Show BibTex]

@inproceedings{vecerik2024robotap,
  title={RoboTAP: Tracking Arbitrary Points for Few-Shot Visual Imitation},
  author={Vecerik, Mel and Doersch, Carl and Yang, Yi and Davchev, Todor and Aytar, Yusuf and Zhou, Guangyao and Hadsell, Raia and Agapito, Lourdes and Scholz, Jon},
  booktitle={International Conference on Robotics and Automation (ICRA)},
  year={2024}
}

TAPIR: Tracking Any Point with per-frame Initialization and temporal Refinement

Carl Doersch, Yi Yang, Mel Vecerik, Dilara Gokay, Ankush Gupta, Yusuf Aytar, João Carreira, Andrew Zisserman in ICCV 2023

[arXiv] [github] [Show BibTex]

@inproceedings{doersch2023tapir,
  title={TAPIR: Tracking Any Point with per-frame Initialization and temporal Refinement},
  author={Doersch, Carl and Yang, Yi and Vecerik, Mel and Gokay, Dilara and Gupta, Ankush and Yusuf Aytar and Carreira, Jo{\~a}o and Zissserman, Andrew},
  booktitle={International Conference on Computer Vision (ICCV)},
  year={2023}
}

The Perception Test

Viorica Patraucean, Lucas Smaira, Ankush Gupta, Adria Recasens Continente, Larisa Markeeva, Dylan Banarse, Skanda Koppula, Joseph Heyward, Mateusz Malinowski, Yi Yang, Carl Doersch, Tatiana Matejovicova, Yury Sulsky, Antoine Miech, Alex Frechette, Hanna Klimczak, Raphael Koster, Junlin Zhang, Stephanie Winkler, Yusuf Aytar, Simon Osindero, Dima Damen, Andrew Zisserman, Joao Carreira ECCV/ICCV Workshop Series

[v1] [Show BibTex]

@article{patraucean2023perception,
  title={The Perception Test: A Large-Scale Multimodal Benchmark for General Perception},
  author={Patraucean, Viorica and Smaira, Lucas and Gupta, Ankush and Continente, Adri{\`a} Recasens and Markeeva, Larisa and Banarse, Dylan and Koppula, Skanda and Heyward, Joseph and Malinowski, Mateusz and Yang, Yi and Doersch, Carl and Matejovicova, Tatiana and Sulsky, Yury and Miech, Antoine and Frechette, Alex and Klimczak, Hanna and Koster, Raphael and Zhang, Junlin and Winkler, Stephanie and Aytar, Yusuf and Osindero, Simon and Damen, Dima and Zisserman, Andrew and Carreira, Jo{\~a}o},
  journal={arXiv preprint arXiv:2305.13786},
  year={2023}
}

TAP-Vid: A Benchmark for Tracking Any Point in a Video

Carl Doersch, Ankush Gupta, Larisa Markeeva, Adrià Recasens, Lucas Smaira, Yusuf Aytar, João Carreira, Andrew Zisserman, Yi Yang in NeurIPS Datasets and Benchmarks 2022

[arXiv] [github] [Show BibTex]

@inproceedings{doersch2022tapvid,
  title={TAP-Vid: A Benchmark for Tracking Any Point in a Video},
  author={Doersch, Carl and Gupta, Ankush and Markeeva, Larisa and Recasens, Adri{\`a} and Smaira, Lucas and Aytar, Yusuf and Carreira, Jo{\~a}o and Zisserman, Andrew and Yang, Yi},
  booktitle={NeurIPS Datasets and Benchmarks},
  year={2022}
}

Input-level Inductive Biases for 3D Reconstruction

Wang Yifan, Carl Doersch, Relja Arandjelovic, Joao Carreira, Andrew Zisserman in CVPR 2022

[arXiv] [Show BibTex]

@inproceedings{yifan2022input,
  title={Input-level Inductive Biases for 3D Reconstruction},
  author={Yifan, Wang and Doersch, Carl and Arandjelovic, Relja and Carreira, Joao and Zisserman, Andrew},
  booktitle={IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
  year={2022}
}

Kubric: A Scalable Dataset Generator

Klaus Greff, Francois Belletti, Lucas Beyer, Carl Doersch, Yilun Du, Daniel Duckworth, David J. Fleet, Dan Gnanapragasam, Florian Golemo, Charles Herrmann, Thomas Kipf, Abhijit Kundu, Dmitry Lagun, Issam Laradji, Hsueh-Ti (Derek) Liu, Henning Meyer, Yishu Miao, Derek Nowrouzezahrai, Cengiz Oztireli, Etienne Pot, Noha Radwan, Daniel Rebain, Sara Sabour, Mehdi S. M. Sajjadi, Matan Sela, Vincent Sitzmann, Austin Stone, Deqing Sun, Suhani Vora, Ziyu Wang, Tianhao Wu, Kwang Moo Yi, Fangcheng Zhong, Andrea Tagliasacchi in CVPR 2022

[arXiv] [github] [Show BibTex]

@inproceedings{greff2022kubric,
  title={Kubric: A Scalable Dataset Generator},
  author={Greff, Klaus and Belletti, Francois and Beyer, Lucas and Doersch, Carl and Du, Yilun and Duckworth, Daniel and Fleet, David J and Gnanapragasam, Dan and Golemo, Florian and Herrmann, Charles and Kipf, Thomas and Kundu, Abhijit and Lagun, Dmitry and Laradji, Issam and Liu, Hsueh-Ti Derek and Meyer, Henning and Miao, Yishu and Nowrouzezahrai, Derek and Oztireli, Cengiz and Pot, Etienne and Radwan, Noha and Rebain, Daniel and Sabour, Sara and Sajjadi, Mehdi SM and Sela, Matan and Sitzmann, Vincent and Stone, Austin and Sun, Deqing and Vora, Suhani and Wang, Ziyu and Wu, Tianhao and Yi, Kwang Moo and Zhong, Fangcheng and Tagliasacchi, Andrea},
  booktitle={IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
  year={2022}
}

Perceiver IO: A General Architecture for Structured Inputs & Outputs

Andrew Jaegle, Sebastian Borgeaud, Jean-Baptiste Alayrac, Carl Doersch, Catalin Ionescu, David Ding, Skanda Koppula, Daniel Zoran, Andrew Brock, Evan Shelhamer, Olivier Henaff, Matthew M. Botvinick, Andrew Zisserman, Oriol Vinyals, Joao Carreira in ICLR 2022

[arXiv] [Show BibTex]

@inproceedings{jaegle2022perceiver,
  title={Perceiver IO: A General Architecture for Structured Inputs \& Outputs},
  author={Jaegle, Andrew and Borgeaud, Sebastian and Alayrac, Jean-Baptiste and Doersch, Carl and Ionescu, Catalin and Ding, David and Koppula, Skanda and Zoran, Daniel and Brock, Andrew and Shelhamer, Evan and Henaff, Olivier and Botvinick, Matthew M and Zisserman, Andrew and Vinyals, Oriol and Carreira, Joao},
  booktitle={International Conference on Learning Representations (ICLR)},
  year={2022}
}

Inferring a Continuous Distribution of Atom Coordinates from Cryo-EM Images using VAEs

Dan Rosenbaum, Marta Garnelo, Michal Zielinski, Charlie Beattie, Ellen Clancy, Andrea Huber, Pushmeet Kohli, Andrew W. Senior, John Jumper, Carl Doersch, S. M. Ali Eslami, Olaf Ronneberger, Jonas Adler in NeurIPS 2021 workshop on Machine Learning in Structural Biology

[arXiv] [Show BibTex]

@inproceedings{rosenbaum2021inferring,
  title={Inferring a Continuous Distribution of Atom Coordinates from Cryo-EM Images using VAEs},
  author={Rosenbaum, Dan and Garnelo, Marta and Zielinski, Michal and Beattie, Charlie and Clancy, Ellen and Huber, Andrea and Kohli, Pushmeet and Senior, Andrew W and Jumper, John and Doersch, Carl and Eslami, SM Ali and Ronneberger, Olaf and Adler, Jonas},
  booktitle={NeurIPS Workshop on Machine Learning in Structural Biology},
  year={2021}
}

CrossTransformers: spatially-aware few-shot transfer

Carl Doersch, Ankush Gupta, Andrew Zisserman in NeurIPS 2020

[arXiv] [Show BibTex]

@inproceedings{doersch2020crosstransformers,
  title={CrossTransformers: spatially-aware few-shot transfer},
  author={Doersch, Carl and Gupta, Ankush and Zisserman, Andrew},
  booktitle={Neural Information Processing Systems (NeurIPS)},
  year={2020}
}

Bootstrap Your Own Latent: A New Approach to Self-Supervised Learning

Jean-Bastien Grill, Florian Strub, Florent Altché, Corentin Tallec, Pierre H. Richemond, Elena Buchatskaya, Carl Doersch, Bernardo Avila Pires, Zhaohan Daniel Guo, Mohammad Gheshlaghi Azar, Bilal Piot, Koray Kavukcuoglu, Rémi Munos, Michal Valko in NeurIPS 2020 (Oral)

[arXiv] [Show BibTex]

@inproceedings{grill2020bootstrap,
  title={Bootstrap Your Own Latent: A New Approach to Self-Supervised Learning},
  author={Grill, Jean-Bastien and Strub, Florian and Altch{\'e}, Florent and Tallec, Corentin and Richemond, Pierre H and Buchatskaya, Elena and Doersch, Carl and Avila Pires, Bernardo and Guo, Zhaohan Daniel and Gheshlaghi Azar, Mohammad and Piot, Bilal and Kavukcuoglu, Koray and Munos, R{\'e}mi and Valko, Michal},
  booktitle={Neural Information Processing Systems (NeurIPS)},
  year={2020}
}

Data-Efficient Image Recognition with Contrastive Predictive Coding

Olivier J. Hénaff, Aravind Srinivas, Jeffrey De Fauw, Ali Razavi, Carl Doersch, S. M. Ali Eslami, Aaron van den Oord in ICML 2020

[arXiv] [Show BibTex]

@inproceedings{henaff2020data,
  title={Data-Efficient Image Recognition with Contrastive Predictive Coding},
  author={H{\'e}naff, Olivier J and Srinivas, Aravind and De Fauw, Jeffrey and Razavi, Ali and Doersch, Carl and Eslami, SM Ali and van den Oord, Aaron},
  booktitle={International Conference on Machine Learning (ICML)},
  year={2020}
}

Sim2real transfer learning for 3D human pose estimation: motion to the rescue

Carl Doersch, Andrew Zisserman in NeurIPS 2019

[arXiv] [Show BibTex]

@inproceedings{doersch2019sim2real,
  title={Sim2real transfer learning for 3D human pose estimation: motion to the rescue},
  author={Doersch, Carl and Zisserman, Andrew},
  booktitle={Neural Information Processing Systems (NeurIPS)},
  year={2019}
}

Exploiting temporal context for 3D human pose estimation in the wild

Anurag Arnab, Carl Doersch, Andrew Zisserman in CVPR 2019

[arXiv] [Show BibTex]

@inproceedings{arnab2019exploiting,
  title={Exploiting temporal context for 3D human pose estimation in the wild},
  author={Arnab, Anurag and Doersch, Carl and Zisserman, Andrew},
  booktitle={IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
  year={2019}
}

Video Action Transformer Network

Rohit Girdhar, João Carreira, Carl Doersch, Andrew Zisserman in CVPR 2019

[arXiv] [Show BibTex]

@inproceedings{girdhar2019video,
  title={Video Action Transformer Network},
  author={Girdhar, Rohit and Carreira, Joao and Doersch, Carl and Zisserman, Andrew},
  booktitle={IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
  year={2019}
}

A Better Baseline for AVA

Rohit Girdhar, João Carreira, Carl Doersch, Andrew Zisserman in CVPR 2018 ActivityNet Workshop

[arXiv] [Show BibTex]

@inproceedings{girdhar2018better,
  title={A Better Baseline for AVA},
  author={Girdhar, Rohit and Carreira, Joao and Doersch, Carl and Zisserman, Andrew},
  booktitle={CVPR ActivityNet Workshop},
  year={2018}
}

Kickstarting Deep Reinforcement Learning

Simon Schmitt, Jony Hudson, Augustin Zidek, Simon Osindero, Carl Doersch, Wojciech Czarnecki, Joel Leibo, Heinrich Kuttler, Andrew Zisserman, Karen Simonyan, Ali Eslami in NIPS 2018 Reinforcement Learning Workshop

[arXiv] [Show BibTex]

@article{schmitt2018kickstarting,
  title={Kickstarting Deep Reinforcement Learning},
  author={Schmitt, Simon and Hudson, Jony and Zidek, Augustin and Osindero, Simon and Doersch, Carl and Czarnecki, Wojciech and Leibo, Joel and Kuttler, Heinrich and Zisserman, Andrew and Simonyan, Karen and Eslami, Ali},
  journal={arXiv preprint arXiv:1803.03835},
  year={2018}
}

Learning Visual Question Answering by Bootstrapping Hard Attention

Mateusz Malinowski, Carl Doersch, Adam Santoro, Peter Battaglia in ECCV 2018

[arXiv] [Show BibTex]

@inproceedings{malinowski2018learning,
  title={Learning Visual Question Answering by Bootstrapping Hard Attention},
  author={Malinowski, Mateusz and Doersch, Carl and Santoro, Adam and Battaglia, Peter},
  booktitle={European Conference on Computer Vision (ECCV)},
  year={2018}
}

The Visual QA Devil in the Details: The Impact of Early Fusion and Batch Norm on CLEVR

Mateusz Malinowski, Carl Doersch in ECCV 2018 Workshop on Shortcomings in Vision and Language

[arXiv] [Show BibTex]

@inproceedings{malinowski2018visual,
  title={The Visual QA Devil in the Details: The Impact of Early Fusion and Batch Norm on CLEVR},
  author={Malinowski, Mateusz and Doersch, Carl},
  booktitle={ECCV Workshop on Shortcomings in Vision and Language},
  year={2018}
}

Multi-task Self-Supervised Visual Learning

Carl Doersch and Andrew Zisserman in ICCV 2017

[arXiv] [Show BibTex]

@inproceedings{doersch2017multitask,
  title = {Multi-task Self-Supervised Visual Learning},
  author = {Doersch, Carl and Zisserman, Andrew},
  booktitle = {International Conference on Computer Vision},
  year = {2017},
}

Supervision Beyond Manual Annotations for Learning Visual Representations

Carl Doersch.
Carnegie Mellon Thesis Dissertation

[pdf] [Show BibTex]

@phdthesis{doersch2016unsupervised,
  title = {Supervision Beyond Manual Annotations for Learning Visual Representations},
  author = {Doersch, Carl},
  school = {Carnegie Mellon University},
  year = {2016},
}

Tutorial on Variational Autoencoders

Carl Doersch.
Arxiv Tech Report, June 2016

[arXiv] [Show BibTex]

@article{doersch2016tutorial,
  title = {Tutorial on Variational Autoencoders},
  author = {Doersch, Carl},
  journal = {arXiv preprint arXiv:1606.05908},
  year = {2016},
}

An Uncertain Future: Forecasting from Static Images using Variational Autoencoders

Jacob Walker, Carl Doersch, Abhinav Gupta, and Martial Hebert.
in ECCV 2016

[webpage] [arXiv] [Show BibTex]

@inproceedings{walker2016uncertain,
  title = {An Uncertain Future: Forecasting from Static Images using Variational Autoencoders},
  author = {Walker, Jacob and Doersch, Carl and Gupta, Abhinav and Hebert, Martial},
  booktitle = {European Conference on Computer Vision},
  year = {2016},
}

Data-dependent Initializations of Convolutional Neural Networks

Philipp Krähenbühl, Carl Doersch, Jeff Donahue, and Trevor Darrell.
ICLR, 2016

[arxiv] [Show BibTex]

@inproceedings{krahenbuhl2016data,
  title={Data-dependent Initializations of Convolutional Neural Networks},
  author={Kr{\"a}henb{\"u}hl, Philipp and Doersch, Carl and Donahue, Jeff and Darrell, Trevor},
  booktitle={International Conference on Learning Representations (ICLR)},
  year={2016}
}

Unsupervised Visual Representation Learning by Context Prediction

Carl Doersch, Abhinav Gupta, and Alexei A. Efros.
in ICCV 2015 (oral)

[webpage] [arXiv] [Show BibTex]

@inproceedings{doersch2015unsupervised,
  title = {Unsupervised Visual Representation Learning by Context Prediction},
  author = {Doersch, Carl and Gupta, Abhinav and Efros, Alexei A.},
  booktitle = {International Conference on Computer Vision},
  year = {2015},
}

Context as Supervisory Signal: Discovering Objects with Predictable Context

Carl Doersch, Abhinav Gupta, and Alexei A. Efros.
In ECCV 2014

[Show BibTex]

@inproceedings{doersch2014context,
  title = {Context as Supervisory Signal:  Discovering Objects with Predictable Context},
  author = {Doersch, Carl and Gupta, Abhinav and Efros, Alexei A.},
  booktitle = {European Conference on Computer Vision},
  year = {2014},
}

Mid-Level Visual Element Discovery as Discriminative Mode Seeking

Carl Doersch, Abhinav Gupta, and Alexei A. Efros.
In NIPS 2013

[Show BibTex]

@inproceedings{doersch2013mid,
  title = {Mid-Level Visual Element Discovery as Discriminative Mode Seeking},
  author = {Doersch, Carl and Gupta, Abhinav and Efros, Alexei A.},
  booktitle = {Neural Information Processing Systems (NIPS)},
  year = {2013},
}

What Makes Paris Look like Paris?

Carl Doersch, Saurabh Singh, Abhinav Gupta, Josef Sivic, and Alexei A. Efros.
In SIGGRAPH 2012 (oral)
Republished on the cover of the CACM magazine Dec. 2015

[Show BibTex]

@article{doersch2012what,
  title = {What Makes Paris Look like Paris?},
  author = {Carl Doersch and Saurabh Singh and Abhinav Gupta and Josef Sivic and Alexei A. Efros},
  journal = {ACM Transactions on Graphics (SIGGRAPH)},
  volume = {31},
  number = {4},
  year = {2012},
}

@article{doersch2015makes,
  title={What makes Paris look like Paris?},
  author={Doersch, Carl and Singh, Saurabh and Gupta, Abhinav and Sivic, Josef and Efros, Alexei A},
  journal={Communications of the ACM},
  volume={58},
  number={12},
  pages={103--110},
  year={2015},
  publisher={ACM}
}

Bounding the Probability of Error for High Precision Optical Character Recognition

Gary B. Huang, Andrew Kae, Carl Doersch, and Erik Learned-Miller.
In JMLR 2012

[pdf] [Show BibTex]

@article{huang2012bounding,
  title={Bounding the Probability of Error for High Precision Optical Character Recognition},
  author={Huang, G.B. and Kae, A. and Doersch, C. and Learned-Miller, E.},
  journal={Journal of Machine Learning Research},
  volume={12},
  pages={363--387},
  year={2012}
}

Improving state-of-the-art OCR through high-precision document-specific modeling.

Andrew Kae, Gary B. Huang, Carl Doersch, and Erik Learned-Miller.
In CVPR 2010

[pdf] [Show BibTex]

@INPROCEEDINGS{kae10improving,
  author = {Andrew Kae and Gary B. Huang and Carl Doersch and Erik Learned-Miller},
  title = {Improving state-of-the-art OCR through high-precision document-specific modeling.},
  booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
  year = {2010},
  month = {Jun}
}