import sys import os import pandas as pd source_path = sys.argv[1] file_name = os.path.splitext(os.path.basename(source_path))[0] directory = os.path.dirname(os.path.abspath(source_path)) column = sys.argv[2] # 1. Read CSV df = pd.read_csv(source_path, low_memory=False) # 2(a). For complete row duplicate # pd.drop_duplicates(inplace=True) # 2(b). For partials # df.drop_duplicates(subset=['email'], inplace=True) df=df[df[column].isnull() | ~df[df[column].notnull()].duplicated(subset=column,keep='first')] # 3. Save then save_path = os.path.normpath(directory + '/' + file_name + "-without-dups.csv") df.to_csv(save_path, index=False)