From a3a16eb006a4345b62e6b73f6f3cf3a9497dd48e Mon Sep 17 00:00:00 2001 From: Julio Ruiz Date: Thu, 29 Jan 2026 05:50:58 +0000 Subject: [PATCH] Update indexer Modified indexer to query _translation tables directly so that we can pull only translated versions of activities and conferences --- carpa_json_to_markdown/index.js | 209 +++++++++++++++++++++++--------- 1 file changed, 155 insertions(+), 54 deletions(-) diff --git a/carpa_json_to_markdown/index.js b/carpa_json_to_markdown/index.js index 8c289f5..fd8e18e 100644 --- a/carpa_json_to_markdown/index.js +++ b/carpa_json_to_markdown/index.js @@ -18,6 +18,7 @@ let jsonlData = ''; const DATA_INPUT_FOLDER = './input'; const DATA_OUTPUT_FOLDER = './output'; const TEMPLATES_FOLDER = './templates'; +const LOCALE = 'es'; function generateMarkdown(type) { const folderPath = `${DATA_INPUT_FOLDER}/${type}`; @@ -88,7 +89,7 @@ function generateMarkdown(type) { console.log(`Summary created successfully.`); } -async function generateJson( type, year ) { +async function generateJson( type ) { let options = { json: true }; let fields = []; @@ -134,7 +135,49 @@ async function generateJson( type, year ) { 'translations.pdf.filename_disk', ] - let url = `http://directus.carpa.com/items/${type}?fields=${fields[type].join(",")}&deep[translations][_filter][languages_code][_eq]=es&filter[year(date)]=${year}&access_token=dUILDpE5gV224XqOB5xUTzE69sk8VSOL&limit=1000&sort=-date`; + fields['activities_translations'] = [ + 'activities_id.id', + 'title', + 'activities_id.date', + 'activities_id.activity', + 'activities_id.place', + 'activities_id.city', + 'activities_id.state', + 'activities_id.country', + 'activities_id.duration', + 'interventions.text', + 'activities_id.private', + 'activities_id.thumbnail.filename_disk', + 'slug', + 'youtube', + 'privateVideo.filename_disk', + 'mp3.filename_disk', + 'pdf_booklet.filename_disk', + 'pdf.filename_disk' + ] + + fields['conferences_translations'] = [ + 'conferences_id.id', + 'title', + 'conferences_id.date', + 'conferences_id.activity', + 'conferences_id.place', + 'conferences_id.city', + 'conferences_id.state', + 'conferences_id.country', + 'conferences_id.duration', + 'text_published', + 'conferences_id.public', + 'conferences_id.thumbnail.filename_disk', + 'slug', + 'youtube', + 'video.filename_disk', + 'audio.filename_disk', + 'pdf.filename_disk', + 'pdf_simple.filename_disk', + ] + + let url = `http://directus.carpa.com/items/${type}?fields=${fields[type].join(",")}&filter[languages_code]=${LOCALE}&access_token=dUILDpE5gV224XqOB5xUTzE69sk8VSOL&limit=10000`; console.log( url ) @@ -147,60 +190,118 @@ async function generateJson( type, year ) { // do something with JSON, using the 'body' variable const items = body.data; - items.map((item) => { - if(type=='activities'){ - item.body = he.decode(striptags(item.translations[0]?.interventions[0]?.text)) || '' - item.private = item.private == 1 ? true : false; - item.files = {} - item.files.youtube = item.translations[0]?.youtube - item.files.video = item.translations[0]?.privateVideo?.filename_disk - item.files.audio = item.translations[0]?.mp3?.filename_disk - item.files.booklet = item.translations[0]?.pdf_booklet?.filename_disk - item.files.simple = item.translations[0]?.pdf?.filename_disk - } + let nitems = items.map((item) => { + let nitem = {} - if(type=='conferences'){ - item.id = item.id.toString(); - item.body = he.decode(striptags(item.translations[0]?.text_published)) || '' - item.private = item.public == 0 ? true : false; - item.files = {} - item.files.youtube = item.translations[0]?.youtube - item.files.video = item.translations[0]?.video?.filename_disk - item.files.audio = item.translations[0]?.audio?.filename_disk - item.files.booklet = item.translations[0]?.pdf?.filename_disk - item.files.simple = item.translations[0]?.pdf_simple?.filename_disk - delete item.public + if( type == 'activities_translations' ){ + nitem.id = item.activities_id?.id + nitem.title = item.title + nitem.body = he.decode(striptags(item.interventions[0]?.text)) || '' + nitem.private = item.activities_id?.private == 1 ? true : false; + nitem.files = {} + nitem.files.youtube = item.youtube + nitem.files.video = item.privateVideo?.filename_disk + nitem.files.audio = item.mp3?.filename_disk + nitem.files.booklet = item.pdf_booklet?.filename_disk + nitem.files.simple = item.pdf?.filename_disk + nitem.slug = item.slug + nitem.place = item.activities_id?.place || null; + nitem.city = item.activities_id?.city || null; + nitem.state = item.activities_id?.state || null; + nitem.country = item.activities_id?.country || null; + + nitem.duration = item.activities_id?.duration ?? 0; + + nitem.year = dayjs(item.activities_id?.date).year().toString() + nitem.month = item.year + " > " + (dayjs(item.activities_id?.date).month()+1).toString().padStart(2,"0") + + nitem.menu_lvl0 = item.country + nitem.menu_lvl1 = item.country + " > " + item.state + nitem.menu_lvl2 = item.country + " > " + item.state + " > " + item.city + nitem.menu_lvl3 = item.country + " > " + item.state + " > " + item.city + " > " + item.place + + nitem.date = dayjs(item.activities_id?.date).unix() + + nitem.thumbnail = item.activities_id?.thumbnail?.filename_disk + } + + if( type == 'conferences_translations' ){ + nitem.id = item.conferences_id?.id + nitem.title = item.title + nitem.body = he.decode(striptags(item.text_published)) || '' + nitem.private = item.conferences_id?.public == 0 ? true : false; + nitem.files = {} + nitem.files.youtube = item.youtube + nitem.files.video = item.privateVideo?.filename_disk + nitem.files.audio = item.mp3?.filename_disk + nitem.files.booklet = item.pdf_booklet?.filename_disk + nitem.files.simple = item.pdf?.filename_disk + nitem.slug = item.slug + nitem.place = item.conferences_id?.place || null; + nitem.city = item.conferences_id?.city || null; + nitem.state = item.conferences_id?.state || null; + nitem.country = item.conferences_id?.country || null; + + nitem.duration = item.conferences_id?.duration ?? 0; + + nitem.year = dayjs(item.conferences_id?.date).year().toString() + nitem.month = item.year + " > " + (dayjs(item.conferences_id?.date).month()+1).toString().padStart(2,"0") + + nitem.menu_lvl0 = item.country + nitem.menu_lvl1 = item.country + " > " + item.state + nitem.menu_lvl2 = item.country + " > " + item.state + " > " + item.city + nitem.menu_lvl3 = item.country + " > " + item.state + " > " + item.city + " > " + item.place + + nitem.date = dayjs(item.conferences_id?.date).unix() + + nitem.thumbnail = item.conferences_id?.thumbnail?.filename_disk + } + + // if(type=='conferences'){ + // item.id = item.id.toString(); + // item.body = he.decode(striptags(item.translations[0]?.text_published)) || '' + // item.private = item.public == 0 ? true : false; + // item.files = {} + // item.files.youtube = item.translations[0]?.youtube + // item.files.video = item.translations[0]?.video?.filename_disk + // item.files.audio = item.translations[0]?.audio?.filename_disk + // item.files.booklet = item.translations[0]?.pdf?.filename_disk + // item.files.simple = item.translations[0]?.pdf_simple?.filename_disk + // delete item.public + // } + + // item.slug = item.translations[0]?.slug + // item.place = item.place || null; + // item.city = item.city || null; + // item.state = item.state || null; + // item.country = item.country || null; + + // item.duration = item.duration ?? 0; + + // item.year = dayjs(item.date).year().toString() + // item.month = item.year + " > " + (dayjs(item.date).month()+1).toString().padStart(2,"0") + + // item.menu_lvl0 = item.country + // item.menu_lvl1 = item.country + " > " + item.state + // item.menu_lvl2 = item.country + " > " + item.state + " > " + item.city + // item.menu_lvl3 = item.country + " > " + item.state + " > " + item.city + " > " + item.place + + // item.date = dayjs(item.date).unix() + + // item.thumbnail = item.thumbnail?.filename_disk + + // delete item.translations; + if( nitem.title && nitem.id ){ + return nitem; } - item.slug = item.translations[0]?.slug - item.place = item.place || null; - item.city = item.city || null; - item.state = item.state || null; - item.country = item.country || null; - - item.duration = item.duration ?? 0; - - item.year = dayjs(item.date).year().toString() - item.month = item.year + " > " + (dayjs(item.date).month()+1).toString().padStart(2,"0") - - item.menu_lvl0 = item.country - item.menu_lvl1 = item.country + " > " + item.state - item.menu_lvl2 = item.country + " > " + item.state + " > " + item.city - item.menu_lvl3 = item.country + " > " + item.state + " > " + item.city + " > " + item.place - - item.date = dayjs(item.date).unix() - - item.thumbnail = item.thumbnail?.filename_disk - - delete item.translations; - return item; }) - if (!fs.existsSync(`./${DATA_INPUT_FOLDER}/${type}`)) { - fs.mkdirSync(`./${DATA_INPUT_FOLDER}/${type}`); - } + // if (!fs.existsSync(`./${DATA_INPUT_FOLDER}/${type}`)) { + // fs.mkdirSync(`./${DATA_INPUT_FOLDER}/${type}`); + // } - jsonlData += items.map(JSON.stringify).join('\n') + jsonlData += nitems.filter(item => item).map(JSON.stringify).join('\n') writeFile( jsonlData , type ) }; @@ -209,7 +310,7 @@ async function generateJson( type, year ) { } function writeFile( jsonlData, type ){ - fs.writeFile(`./${DATA_INPUT_FOLDER}/${type}.json`, jsonlData, (err) => { + fs.writeFile(`./${DATA_INPUT_FOLDER}/${type}_${LOCALE.toLocaleUpperCase()}.json`, jsonlData, (err) => { if (err) { console.error("Error writing file:", err); return; @@ -232,6 +333,6 @@ function writeFile( jsonlData, type ){ // generateJson( 'conferences', year ); // } -for( let year = 2021; year < 2027; year++){ - generateJson( 'activities', year ); -} \ No newline at end of file +//for( let year = 2021; year < 2027; year++){ + generateJson( 'conferences_translations' ); +//} \ No newline at end of file