manually constructed dataset are small due to expensive human intervention and automatically extracted dataset do not have high quality because the commits from version control systems contain bug-irrelevant changes
@inproceedings{jiang2021extracting,
title={Extracting Concise Bug-Fixing Patches from Human-Written Patches in Version Control Systems},
author={Jiang, Yanjie and Liu, Hui and Niu, Nan and Zhang, Lu and Hu, Yamin},
booktitle={2021 IEEE/ACM 43rd International Conference on Software Engineering (ICSE)},
pages={686--698},
year={2021},
organization={IEEE}
}
@article{ferenc2020automatically,
title={An automatically created novel bug dataset and its validation in bug prediction},
author={Ferenc, Rudolf and Gyimesi, P{\\'e}ter and Gyimesi, G{\\'a}bor and T{\\'o}th, Zolt{\\'a}n and Gyim{\\'o}thy, Tibor},
journal={Journal of Systems and Software},
volume={169},
pages={110691},
year={2020},
publisher={Elsevier}
}