From 155db6157d4fa84f1a7af8da2a6261e022b72c80 Mon Sep 17 00:00:00 2001 From: Julio Ruiz Date: Tue, 3 Feb 2026 00:05:21 -0500 Subject: [PATCH] Updating indexer --- carpa_json_to_markdown/index.js | 245 +++++++++----------------- typesense/schema/activities_base.json | 13 +- 2 files changed, 95 insertions(+), 163 deletions(-) diff --git a/carpa_json_to_markdown/index.js b/carpa_json_to_markdown/index.js index e7325ff..c5ccc3b 100644 --- a/carpa_json_to_markdown/index.js +++ b/carpa_json_to_markdown/index.js @@ -60,28 +60,28 @@ function generateMarkdown(type) { fs.writeFileSync(`./${DATA_OUTPUT_FOLDER}/${type}/${startYear}/${month}/${conference.date}-${conference.activity}.md`, output) //Add month count for summary - const m = conference.date.substring(5,7); - if( !months.hasOwnProperty( m ) ){ - months[m] = [] - } - months[m].push( m ) + const m = conference.date.substring(5, 7); + if (!months.hasOwnProperty(m)) { + months[m] = [] + } + months[m].push(m) }) let monthsArray = []; - - for ( const month in months) { - let mObject = {}; - mObject.month = month; - mObject.count = months[month].length; - monthsArray.push( mObject ); + + for (const month in months) { + let mObject = {}; + mObject.month = month; + mObject.count = months[month].length; + monthsArray.push(mObject); } //Sort monthsarray - monthsArray.sort((a,b) => b.month - a.month); // b - a for reverse sort + monthsArray.sort((a, b) => b.month - a.month); // b - a for reverse sort yearObject.months = monthsArray; - summary.push( yearObject ); + summary.push(yearObject); }) @@ -93,48 +93,6 @@ async function generateJson( type ) { let options = { json: true }; let fields = []; - fields['conferences'] = [ - 'id', - 'title', - 'date', - 'activity', - 'place', - 'city', - 'state', - 'country', - 'duration', - 'translations.text_published', - 'public', - 'thumbnail.filename_disk', - 'translations.slug', - 'translations.youtube', - 'translations.video.filename_disk', - 'translations.audio.filename_disk', - 'translations.pdf.filename_disk', - 'translations.pdf_simple.filename_disk', - ] - - fields['activities'] = [ - 'id', - 'title', - 'date', - 'activity', - 'place', - 'city', - 'state', - 'country', - 'duration', - 'translations.interventions.text', - 'private', - 'thumbnail.filename_disk', - 'translations.slug', - 'translations.youtube', - 'translations.privateVideo.filename_disk', - 'translations.mp3.filename_disk', - 'translations.pdf_booklet.filename_disk', - 'translations.pdf.filename_disk', - ] - fields['activities_translations'] = [ 'activities_id.id', 'title', @@ -179,8 +137,6 @@ async function generateJson( type ) { let url = `http://directus.carpa.com/items/${type}?fields=${fields[type].join(",")}&filter[languages_code]=${LOCALE}&access_token=dUILDpE5gV224XqOB5xUTzE69sk8VSOL&limit=10000`; - console.log( url ) - request(url, options, (error, res, body) => { if (error) { return console.log(error) @@ -191,132 +147,102 @@ async function generateJson( type ) { const items = body.data; let nitems = items.map((item) => { - let nitem = {} + let nitem = {} - if( type == 'activities_translations' ){ - nitem.id = item.activities_id?.id - nitem.title = item.title - nitem.body = he.decode(striptags(item.interventions[0]?.text)) || '' - nitem.private = item.activities_id?.private == 1 ? true : false; - nitem.files = {} - nitem.files.youtube = item.youtube - nitem.files.video = item.privateVideo?.filename_disk - nitem.files.audio = item.mp3?.filename_disk - nitem.files.booklet = item.pdf_booklet?.filename_disk - nitem.files.simple = item.pdf?.filename_disk - nitem.slug = item.slug - nitem.place = item.activities_id?.place || null; - nitem.city = item.activities_id?.city || null; - nitem.state = item.activities_id?.state || null; - nitem.country = item.activities_id?.country || null; + if (type == 'activities_translations') { + nitem.id = item.activities_id?.id + nitem.type = 'activities' + nitem.title = item.title + //nitem.body = he.decode(striptags(item.interventions[0]?.text)) || '' + nitem.body = item.interventions[0]?.text + nitem.private = item.activities_id?.private == 1 ? true : false; + nitem.files = {} + nitem.files.youtube = item.youtube + nitem.files.video = item.privateVideo?.filename_disk + nitem.files.audio = item.mp3?.filename_disk + nitem.files.booklet = item.pdf_booklet?.filename_disk + nitem.files.simple = item.pdf?.filename_disk + nitem.slug = item.slug + nitem.place = item.activities_id?.place || null; + nitem.city = item.activities_id?.city || null; + nitem.state = item.activities_id?.state || null; + nitem.country = item.activities_id?.country || null; - nitem.duration = item.activities_id?.duration ?? 0; + nitem.duration = item.activities_id?.duration ?? 0; - nitem.year = dayjs(item.activities_id?.date).year().toString() - nitem.month = nitem.year + " > " + (dayjs(item.activities_id?.date).month()+1).toString().padStart(2,"0") + nitem.year = dayjs(item.activities_id?.date).year().toString() + nitem.month = nitem.year + " > " + (dayjs(item.activities_id?.date).month() + 1).toString().padStart(2, "0") - nitem.menu_lvl0 = nitem.country - nitem.menu_lvl1 = nitem.country + " > " + nitem.state - nitem.menu_lvl2 = nitem.country + " > " + nitem.state + " > " + nitem.city - nitem.menu_lvl3 = nitem.country + " > " + nitem.state + " > " + nitem.city + " > " + nitem.place + nitem.menu_country = nitem.country + nitem.menu_state = nitem.country + " > " + nitem.state + nitem.menu_city = nitem.country + " > " + nitem.state + " > " + nitem.city + nitem.menu_place = nitem.country + " > " + nitem.state + " > " + nitem.city + " > " + nitem.place - nitem.date = dayjs(item.activities_id?.date).unix() + nitem.date = dayjs(item.activities_id?.date).unix() - nitem.thumbnail = item.activities_id?.thumbnail?.filename_disk + nitem.thumbnail = item.activities_id?.thumbnail?.filename_disk + + //Filter out anything before 25/12/2021 + if( nitem.date < 1640408400 ) { + return false } - - if( type == 'conferences_translations' ){ - nitem.id = item.conferences_id?.id.toString() - nitem.title = item.title - nitem.body = he.decode(striptags(item.text_published)) || '' - nitem.private = item.conferences_id?.public == 0 ? true : false; - nitem.files = {} - nitem.files.youtube = item.youtube - nitem.files.video = item.privateVideo?.filename_disk - nitem.files.audio = item.mp3?.filename_disk - nitem.files.booklet = item.pdf_booklet?.filename_disk - nitem.files.simple = item.pdf?.filename_disk - nitem.slug = item.slug - nitem.place = item.conferences_id?.place || null; - nitem.city = item.conferences_id?.city || null; - nitem.state = item.conferences_id?.state || null; - nitem.country = item.conferences_id?.country || null; + } - nitem.duration = item.conferences_id?.duration ?? 0; + if (type == 'conferences_translations') { + nitem.id = item.conferences_id?.id.toString() + nitem.type = 'conferences' + nitem.title = item.title + //nitem.body = he.decode(striptags(item.text_published)) || '' + nitem.body = item.text_published + nitem.private = item.conferences_id?.public == 0 ? true : false; + nitem.files = {} + nitem.files.youtube = item.youtube + nitem.files.video = item.privateVideo?.filename_disk + nitem.files.audio = item.mp3?.filename_disk + nitem.files.booklet = item.pdf_booklet?.filename_disk + nitem.files.simple = item.pdf?.filename_disk + nitem.slug = item.slug + nitem.place = item.conferences_id?.place || null; + nitem.city = item.conferences_id?.city || null; + nitem.state = item.conferences_id?.state || null; + nitem.country = item.conferences_id?.country || null; - nitem.year = dayjs(item.conferences_id?.date).year().toString() - nitem.month = nitem.year + " > " + (dayjs(item.conferences_id?.date).month()+1).toString().padStart(2,"0") + nitem.duration = item.conferences_id?.duration ?? 0; - nitem.menu_lvl0 = nitem.country - nitem.menu_lvl1 = nitem.country + " > " + nitem.state - nitem.menu_lvl2 = nitem.country + " > " + nitem.state + " > " + nitem.city - nitem.menu_lvl3 = nitem.country + " > " + nitem.state + " > " + nitem.city + " > " + nitem.place + nitem.year = dayjs(item.conferences_id?.date).year().toString() + nitem.month = nitem.year + " > " + (dayjs(item.conferences_id?.date).month() + 1).toString().padStart(2, "0") - nitem.date = dayjs(item.conferences_id?.date).unix() + nitem.menu_country = nitem.country + nitem.menu_state = nitem.country + " > " + nitem.state + nitem.menu_city = nitem.country + " > " + nitem.state + " > " + nitem.city + nitem.menu_place = nitem.country + " > " + nitem.state + " > " + nitem.city + " > " + nitem.place - nitem.thumbnail = item.conferences_id?.thumbnail?.filename_disk - } + nitem.date = dayjs(item.conferences_id?.date).unix() - // if(type=='conferences'){ - // item.id = item.id.toString(); - // item.body = he.decode(striptags(item.translations[0]?.text_published)) || '' - // item.private = item.public == 0 ? true : false; - // item.files = {} - // item.files.youtube = item.translations[0]?.youtube - // item.files.video = item.translations[0]?.video?.filename_disk - // item.files.audio = item.translations[0]?.audio?.filename_disk - // item.files.booklet = item.translations[0]?.pdf?.filename_disk - // item.files.simple = item.translations[0]?.pdf_simple?.filename_disk - // delete item.public - // } - - // item.slug = item.translations[0]?.slug - // item.place = item.place || null; - // item.city = item.city || null; - // item.state = item.state || null; - // item.country = item.country || null; - - // item.duration = item.duration ?? 0; - - // item.year = dayjs(item.date).year().toString() - // item.month = item.year + " > " + (dayjs(item.date).month()+1).toString().padStart(2,"0") - - // item.menu_lvl0 = item.country - // item.menu_lvl1 = item.country + " > " + item.state - // item.menu_lvl2 = item.country + " > " + item.state + " > " + item.city - // item.menu_lvl3 = item.country + " > " + item.state + " > " + item.city + " > " + item.place - - // item.date = dayjs(item.date).unix() - - // item.thumbnail = item.thumbnail?.filename_disk - - // delete item.translations; - if( nitem.title && nitem.id ){ + nitem.thumbnail = item.conferences_id?.thumbnail?.filename_disk + } + if (nitem.title && nitem.id) { return nitem; } - - }) - // if (!fs.existsSync(`./${DATA_INPUT_FOLDER}/${type}`)) { - // fs.mkdirSync(`./${DATA_INPUT_FOLDER}/${type}`); - // } + }) jsonlData += nitems.filter(item => item).map(JSON.stringify).join('\n') - writeFile( jsonlData , type ) + writeFile(jsonlData, type) }; }); } -function writeFile( jsonlData, type ){ +function writeFile(jsonlData, type) { fs.writeFile(`./${DATA_INPUT_FOLDER}/${type}_${LOCALE.toLocaleUpperCase()}.json`, jsonlData, (err) => { - if (err) { - console.error("Error writing file:", err); - return; - } - console.log("File has been written successfully! : " + type); - }); + if (err) { + console.error("Error writing file:", err); + return; + } + console.log("File has been written successfully! : " + type); + }); } @@ -334,5 +260,6 @@ function writeFile( jsonlData, type ){ // } //for( let year = 2021; year < 2027; year++){ - generateJson( 'activities_translations' ); +//generateJson( 'activities_translations' ); +generateJson( 'conferences_translations' ); //} \ No newline at end of file diff --git a/typesense/schema/activities_base.json b/typesense/schema/activities_base.json index eab3fbf..aa5e2e8 100644 --- a/typesense/schema/activities_base.json +++ b/typesense/schema/activities_base.json @@ -6,6 +6,11 @@ "type": "string", "facet": false }, + { + "name": "type", + "type": "string", + "facet": false + }, { "name": "title", "type": "string", @@ -75,25 +80,25 @@ "facet":true }, { - "name": "menu_lvl0", + "name": "menu_country", "type": "string", "facet":true, "optional": true }, { - "name": "menu_lvl1", + "name": "menu_state", "type": "string", "facet":true, "optional": true }, { - "name": "menu_lvl2", + "name": "menu_city", "type": "string", "facet":true, "optional": true }, { - "name": "menu_lvl3", + "name": "menu_place", "type": "string", "facet":true, "optional": true