95 lines
2.4 KiB
TypeScript
95 lines
2.4 KiB
TypeScript
// Minimum transactions in a group to be displayed
|
|
export const MIN_GROUP_SIZE = 1;
|
|
|
|
// Common words to filter out when suggesting keywords
|
|
export const STOP_WORDS = [
|
|
"de",
|
|
"du",
|
|
"la",
|
|
"le",
|
|
"les",
|
|
"des",
|
|
"un",
|
|
"une",
|
|
"et",
|
|
"ou",
|
|
"par",
|
|
"pour",
|
|
"avec",
|
|
"sur",
|
|
"dans",
|
|
"en",
|
|
"au",
|
|
"aux",
|
|
"ce",
|
|
"cette",
|
|
"ces",
|
|
"mon",
|
|
"ma",
|
|
"mes",
|
|
"ton",
|
|
"ta",
|
|
"tes",
|
|
"son",
|
|
"sa",
|
|
"ses",
|
|
"notre",
|
|
"nos",
|
|
"votre",
|
|
"vos",
|
|
"leur",
|
|
"leurs",
|
|
];
|
|
|
|
// Function to normalize transaction descriptions for grouping
|
|
export function normalizeDescription(description: string): string {
|
|
return description
|
|
.toLowerCase()
|
|
.replace(/\d{2}\/\d{2}\/\d{4}/g, "") // Remove dates
|
|
.replace(/\d{2}-\d{2}-\d{4}/g, "") // Remove dates
|
|
.replace(/\d+[.,]\d+/g, "") // Remove amounts
|
|
.replace(/carte \*+\d+/gi, "CARTE") // Normalize card numbers
|
|
.replace(/cb\*+\d+/gi, "CB") // Normalize CB numbers
|
|
.replace(/\s+/g, " ") // Normalize spaces
|
|
.replace(/[^\w\s]/g, " ") // Remove special chars
|
|
.trim();
|
|
}
|
|
|
|
// Extract meaningful keywords from description
|
|
export function extractKeywords(description: string): string[] {
|
|
const normalized = normalizeDescription(description);
|
|
const words = normalized.split(/\s+/);
|
|
|
|
return words
|
|
.filter((word) => word.length > 2)
|
|
.filter((word) => !STOP_WORDS.includes(word.toLowerCase()))
|
|
.filter((word) => !/^\d+$/.test(word)); // Remove pure numbers
|
|
}
|
|
|
|
// Suggest a keyword based on common patterns in descriptions
|
|
export function suggestKeyword(descriptions: string[]): string {
|
|
// Find common substrings
|
|
const keywords = descriptions.flatMap(extractKeywords);
|
|
const frequency: Record<string, number> = {};
|
|
|
|
keywords.forEach((keyword) => {
|
|
frequency[keyword] = (frequency[keyword] || 0) + 1;
|
|
});
|
|
|
|
// Find the most frequent keyword that appears in most descriptions
|
|
const sorted = Object.entries(frequency)
|
|
.filter(([_, count]) => count >= Math.ceil(descriptions.length * 0.5))
|
|
.sort((a, b) => b[1] - a[1]);
|
|
|
|
if (sorted.length > 0) {
|
|
// Return the longest frequent keyword
|
|
return sorted.reduce((best, current) =>
|
|
current[0].length > best[0].length ? current : best,
|
|
)[0];
|
|
}
|
|
|
|
// Fallback: first meaningful word from first description
|
|
const firstKeywords = extractKeywords(descriptions[0]);
|
|
return firstKeywords[0] || descriptions[0].slice(0, 15);
|
|
}
|