CSV Duplicates Identifier

JS
S
JavaScript

CSV Duplicates Identifier Python

1import csv
2
3# Read the raw CSV file
4with open('test-sch-222.27-02-2025.100759.csv', 'r') as file:
5    lines = file.readlines()
6
7# Find exact string matches
8duplicates = []
9for i in range(len(lines)):
10    for j in range(i + 1, min(i + 4, len(lines))):  # Look at next 3 lines only
11        if lines[i] == lines[j]:
12            duplicates.append((i, j, lines[i].strip()))
13
14print(f"Total lines in file: {len(lines)}")
15print(f"Duplicate pairs found: {len(duplicates)}")
16print("\nDuplicate lines (0-based line numbers):")
17for i, j, line in duplicates:
18    print(f"\nLines {i} and {j} are identical:")
19    print(f"Content: {line}")

Created on 3/4/2025