@@ -313,3 +313,111 @@ def test_on_bad_lines_warn_correct_formatting(all_parsers):
313313 ):
314314 result = parser .read_csv (StringIO (data ), on_bad_lines = "warn" )
315315 tm .assert_frame_equal (result , expected )
316+
317+
318+ @pytest .mark .parametrize (
319+ "on_bad_lines,should_warn" ,
320+ [
321+ ("skip" , False ),
322+ ("warn" , True ),
323+ ],
324+ )
325+ def test_on_bad_lines_dtype_conversion_skip (c_parser_only , on_bad_lines , should_warn ):
326+ # GH#63168 - on_bad_lines should handle dtype conversion failures
327+ parser = c_parser_only
328+ data = "col1,col2,col3\n 1,2,3\n a,4,5\n 4,5,6"
329+
330+ if should_warn :
331+ with tm .assert_produces_warning (
332+ ParserWarning ,
333+ match = "Could not convert column|Skipped .* line" ,
334+ check_stacklevel = False ,
335+ ):
336+ result = parser .read_csv (
337+ StringIO (data ),
338+ dtype = {"col1" : int , "col2" : int , "col3" : int },
339+ on_bad_lines = on_bad_lines ,
340+ )
341+ else :
342+ result = parser .read_csv (
343+ StringIO (data ),
344+ dtype = {"col1" : int , "col2" : int , "col3" : int },
345+ on_bad_lines = on_bad_lines ,
346+ )
347+
348+ # Row with 'a' cannot convert to int, should be skipped
349+ expected = DataFrame ({"col1" : [1 , 4 ], "col2" : [2 , 5 ], "col3" : [3 , 6 ]})
350+ tm .assert_frame_equal (result , expected )
351+
352+
353+ def test_on_bad_lines_dtype_conversion_error (c_parser_only ):
354+ # GH#63168 - on_bad_lines='error' should raise on dtype conversion failure
355+ parser = c_parser_only
356+ data = "col1,col2\n 1,2\n a,3"
357+
358+ with pytest .raises (ValueError , match = "invalid literal for int" ):
359+ parser .read_csv (
360+ StringIO (data ),
361+ dtype = {"col1" : int , "col2" : int },
362+ on_bad_lines = "error" ,
363+ )
364+
365+
366+ def test_on_bad_lines_dtype_float_conversion (c_parser_only ):
367+ # GH#63168 - Float dtype with non-numeric values
368+ parser = c_parser_only
369+ data = "a,b\n 1.5,2.5\n foo,3.5\n 4.5,5.5"
370+
371+ result = parser .read_csv (
372+ StringIO (data ),
373+ dtype = {"a" : float , "b" : float },
374+ on_bad_lines = "skip" ,
375+ )
376+
377+ expected = DataFrame ({"a" : [1.5 , 4.5 ], "b" : [2.5 , 5.5 ]})
378+ tm .assert_frame_equal (result , expected )
379+
380+
381+ def test_on_bad_lines_dtype_partial_columns (c_parser_only ):
382+ # GH#63168 - Only some columns have dtype specified
383+ parser = c_parser_only
384+ data = "a,b,c\n 1,hello,3\n foo,world,6\n 4,test,9"
385+
386+ result = parser .read_csv (
387+ StringIO (data ),
388+ dtype = {"a" : int , "c" : int },
389+ on_bad_lines = "skip" ,
390+ )
391+
392+ expected = DataFrame ({"a" : [1 , 4 ], "b" : ["hello" , "test" ], "c" : [3 , 9 ]})
393+ tm .assert_frame_equal (result , expected )
394+
395+
396+ def test_on_bad_lines_dtype_mixed_errors (c_parser_only ):
397+ # GH#63168 - Mix of structural errors (wrong field count) and dtype errors
398+ parser = c_parser_only
399+ data = "a,b,c\n 1,2,3\n wrong_field_count\n foo,4,5\n 6,7,8"
400+
401+ result = parser .read_csv (
402+ StringIO (data ),
403+ dtype = {"a" : int , "b" : int , "c" : int },
404+ on_bad_lines = "skip" ,
405+ )
406+
407+ expected = DataFrame ({"a" : [1 , 6 ], "b" : [2 , 7 ], "c" : [3 , 8 ]})
408+ tm .assert_frame_equal (result , expected )
409+
410+
411+ def test_on_bad_lines_dtype_all_bad_rows (c_parser_only ):
412+ # GH#63168 - All data rows fail conversion
413+ parser = c_parser_only
414+ data = "a,b\n foo,bar\n baz,qux"
415+
416+ result = parser .read_csv (
417+ StringIO (data ),
418+ dtype = {"a" : int , "b" : int },
419+ on_bad_lines = "skip" ,
420+ )
421+
422+ expected = DataFrame ({"a" : [], "b" : []}).astype (int )
423+ tm .assert_frame_equal (result , expected )
0 commit comments