Skip to content

Test Tools > Validators

TestValidateSublists

Source code in tests/tools/test_validators.py
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
class TestValidateSublists:
    def test_equal_elements_and_order(
        self,
        list_with_equal_elements_equal_order,
    ):
        """Verify validate_sublists function with lists having identical
        elements in the same order.

        Parameters
        ----------
        list_with_equal_elements_equal_order : list
            Input provided by a fixture.

        Asserts
        -------
        The function returns True for lists with identical elements and order.
        """
        assert validate_sublists(list_with_equal_elements_equal_order)

    def test_equal_elements_and_different_order(
        self,
        list_with_equal_elements_different_order,
    ):
        """Verify validate_sublists function with lists having identical
        elements in different orders.

        Parameters
        ----------
        list_with_equal_elements_different_order : list
            Input provided by a fixture.

        Asserts
        -------
        The function returns True for lists with identical elements
        regardless of their order.
        """
        assert validate_sublists(list_with_equal_elements_different_order)

    def test_unequal_elements(self, list_with_unequal_elements):
        """Verify validate_sublists function with lists having different
        elements.

        Parameters
        ----------
        list_with_unequal_elements : list
            Input provided by a fixture.

        Asserts
        -------
        The function raises a ValueError for lists with differing elements.
        """
        with pytest.raises(ValueError) as excinfo:
            validate_sublists(list_with_unequal_elements)
        assert "Sublists do not have the same elements." in str(excinfo.value)

    def test_unequal_elements_and_unbalance(
        self,
        list_with_unequal_elements_and_unbalance,
    ):
        """Verify validate_sublists function with lists having different
        elements and lengths.

        Parameters
        ----------
        list_with_unequal_elements_and_unbalance : list
            Input provided by a fixture.

        Asserts
        -------
        The function raises a ValueError for lists with differing elements
        and lengths.
        """
        with pytest.raises(ValueError) as excinfo:
            validate_sublists(list_with_unequal_elements_and_unbalance)
        assert "Sublists do not have the same elements." in str(excinfo.value)

test_equal_elements_and_different_order(list_with_equal_elements_different_order)

Verify validate_sublists function with lists having identical elements in different orders.

Parameters

list_with_equal_elements_different_order : list Input provided by a fixture.

Asserts

The function returns True for lists with identical elements regardless of their order.

Source code in tests/tools/test_validators.py
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
def test_equal_elements_and_different_order(
    self,
    list_with_equal_elements_different_order,
):
    """Verify validate_sublists function with lists having identical
    elements in different orders.

    Parameters
    ----------
    list_with_equal_elements_different_order : list
        Input provided by a fixture.

    Asserts
    -------
    The function returns True for lists with identical elements
    regardless of their order.
    """
    assert validate_sublists(list_with_equal_elements_different_order)

test_equal_elements_and_order(list_with_equal_elements_equal_order)

Verify validate_sublists function with lists having identical elements in the same order.

Parameters

list_with_equal_elements_equal_order : list Input provided by a fixture.

Asserts

The function returns True for lists with identical elements and order.

Source code in tests/tools/test_validators.py
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
def test_equal_elements_and_order(
    self,
    list_with_equal_elements_equal_order,
):
    """Verify validate_sublists function with lists having identical
    elements in the same order.

    Parameters
    ----------
    list_with_equal_elements_equal_order : list
        Input provided by a fixture.

    Asserts
    -------
    The function returns True for lists with identical elements and order.
    """
    assert validate_sublists(list_with_equal_elements_equal_order)

test_unequal_elements(list_with_unequal_elements)

Verify validate_sublists function with lists having different elements.

Parameters

list_with_unequal_elements : list Input provided by a fixture.

Asserts

The function raises a ValueError for lists with differing elements.

Source code in tests/tools/test_validators.py
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
def test_unequal_elements(self, list_with_unequal_elements):
    """Verify validate_sublists function with lists having different
    elements.

    Parameters
    ----------
    list_with_unequal_elements : list
        Input provided by a fixture.

    Asserts
    -------
    The function raises a ValueError for lists with differing elements.
    """
    with pytest.raises(ValueError) as excinfo:
        validate_sublists(list_with_unequal_elements)
    assert "Sublists do not have the same elements." in str(excinfo.value)

test_unequal_elements_and_unbalance(list_with_unequal_elements_and_unbalance)

Verify validate_sublists function with lists having different elements and lengths.

Parameters

list_with_unequal_elements_and_unbalance : list Input provided by a fixture.

Asserts

The function raises a ValueError for lists with differing elements and lengths.

Source code in tests/tools/test_validators.py
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
def test_unequal_elements_and_unbalance(
    self,
    list_with_unequal_elements_and_unbalance,
):
    """Verify validate_sublists function with lists having different
    elements and lengths.

    Parameters
    ----------
    list_with_unequal_elements_and_unbalance : list
        Input provided by a fixture.

    Asserts
    -------
    The function raises a ValueError for lists with differing elements
    and lengths.
    """
    with pytest.raises(ValueError) as excinfo:
        validate_sublists(list_with_unequal_elements_and_unbalance)
    assert "Sublists do not have the same elements." in str(excinfo.value)

list_with_equal_elements_different_order()

Provide a list where sublists have identical elements in different orders.

Returns

list A list of sublists with equal elements in varying order.

Source code in tests/tools/test_validators.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
@pytest.fixture()
def list_with_equal_elements_different_order():
    """Provide a list where sublists have identical elements in different
    orders.

    Returns
    -------
    list
        A list of sublists with equal elements in varying order.
    """
    return [
        ["A", "B"],
        ["B", "A"],
    ]

list_with_equal_elements_equal_order()

Provide a list where sublists have identical elements in the same order.

Returns

list A list of sublists with equal elements in identical order.

Source code in tests/tools/test_validators.py
15
16
17
18
19
20
21
22
23
24
25
26
27
@pytest.fixture()
def list_with_equal_elements_equal_order():
    """Provide a list where sublists have identical elements in the same order.

    Returns
    -------
    list
        A list of sublists with equal elements in identical order.
    """
    return [
        ["A", "B"],
        ["A", "B"],
    ]

list_with_unequal_elements()

Provide a list where sublists have different elements.

Returns

list A list of sublists with unequal elements.

Source code in tests/tools/test_validators.py
46
47
48
49
50
51
52
53
54
55
56
57
58
@pytest.fixture()
def list_with_unequal_elements():
    """Provide a list where sublists have different elements.

    Returns
    -------
    list
        A list of sublists with unequal elements.
    """
    return [
        ["A", "B"],
        ["C", "A"],
    ]

list_with_unequal_elements_and_unbalance()

Provide a list where sublists have different elements and lengths.

Returns

list A list of sublists with unequal elements and varying lengths.

Source code in tests/tools/test_validators.py
61
62
63
64
65
66
67
68
69
70
71
72
73
@pytest.fixture()
def list_with_unequal_elements_and_unbalance():
    """Provide a list where sublists have different elements and lengths.

    Returns
    -------
    list
        A list of sublists with unequal elements and varying lengths.
    """
    return [
        ["A", "B", "C"],
        ["C", "A"],
    ]

test_data_validation(tmp_path_factory, data_raw, file_name_log, data_schema, data_clean)

Validate the functionality of validate_data_quality.

This test iterates through a series of predefined datasets, including both valid and invalid data, to verify the data quality validation process. It checks whether the function correctly processes valid data, identifies invalid data, and logs errors as expected.

Parameters

tmp_path_factory : _pytest.tmpdir.TempPathFactory A fixture provided by pytest to create temporary directories. data_raw : dict The raw data dictionary to be validated. Represents a single row of data intended for processing by the validate_data_quality function. file_name_log : str The name of the log file used to record validation errors. data_schema : BaseModel The Pydantic model that the raw data is validated against. data_clean : list The expected processed data outcome from the validation function, for comparison with the actual result.

Asserts

Asserts that the processed data matches the expected data_clean list. Additionally, it checks if the log file's existence aligns with the presence of invalid data, ensuring that logs are created only when there are validation errors.

Source code in tests/tools/test_validators.py
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
@pytest.mark.parametrize(
    "data_raw, file_name_log, data_schema, data_clean",
    data_for_validation_parameters,
)
def test_data_validation(
    tmp_path_factory,
    data_raw,
    file_name_log,
    data_schema,
    data_clean,
):
    """Validate the functionality of validate_data_quality.

    This test iterates through a series of predefined datasets, including
    both valid and invalid data, to verify the data quality validation
    process. It checks whether the function correctly processes valid data,
    identifies invalid data, and logs errors as expected.

    Parameters
    ----------
    tmp_path_factory : _pytest.tmpdir.TempPathFactory
        A fixture provided by pytest to create temporary directories.
    data_raw : dict
        The raw data dictionary to be validated. Represents a single row of
        data intended for processing by the validate_data_quality function.
    file_name_log : str
        The name of the log file used to record validation errors.
    data_schema : BaseModel
        The Pydantic model that the raw data is validated against.
    data_clean : list
        The expected processed data outcome from the validation function,
        for comparison with the actual result.

    Asserts
    -------
    Asserts that the processed data matches the expected data_clean list.
    Additionally, it checks if the log file's existence aligns with the
    presence of invalid data, ensuring that logs are created only when there
    are validation errors.
    """
    df = pd.DataFrame(
        data_raw,
        index=[0],
        dtype=str,
    )

    output_dir = tmp_path_factory.mktemp("data_validation")

    data_process = list(
        validate_data_quality(
            df,
            str(output_dir),
            file_name_log,
            data_schema,
        ),
    )

    if data_process:
        data_process_value = list(data_process[0].values())
        is_bad_data = False
    else:
        data_process_value = []
        is_bad_data = True

    log_output = "test_invalid_records.log"
    output_empty_file_path = output_dir / log_output

    assert data_process_value == data_clean
    assert os.path.exists(output_empty_file_path) == is_bad_data