manually constructed dataset are small due to expensive human intervention and automatically extracted dataset do not have high quality because the commits from version control systems contain bug-irrelevant changes
@inproceedings{jiang2021extracting,
title={Extracting Concise Bug-Fixing Patches from Human-Written Patches in Version Control Systems},
author={Jiang, Yanjie and Liu, Hui and Niu, Nan and Zhang, Lu and Hu, Yamin},
booktitle={2021 IEEE/ACM 43rd International Conference on Software Engineering (ICSE)},
pages={686--698},
year={2021},
organization={IEEE}
}
@article{ferenc2020automatically,
title={An automatically created novel bug dataset and its validation in bug prediction},
author={Ferenc, Rudolf and Gyimesi, P{\'e}ter and Gyimesi, G{\'a}bor and T{\'o}th, Zolt{\'a}n and Gyim{\'o}thy, Tibor},
journal={Journal of Systems and Software},
volume={169},
pages={110691},
year={2020},
publisher={Elsevier}
}