api/carpa_json_to_markdown/index.js

265 lines
8.6 KiB
JavaScript

//import fetch from 'node-fetch'; // For ES Modules
const fs = require('fs');
const path = require('path');
const striptags = require('striptags');
const dayjs = require('dayjs');
const he = require('he');
const jsonTojsonl = require('json-to-jsonl');
const fetch = require('node-fetch'); // For CommonJS
const { start } = require('repl');
const request = require('request');
let { render } = require("mustache");
let jsonlData = '';
const DATA_INPUT_FOLDER = './input';
const DATA_OUTPUT_FOLDER = './output';
const TEMPLATES_FOLDER = './templates';
const LOCALE = 'pt';
function generateMarkdown(type) {
const folderPath = `${DATA_INPUT_FOLDER}/${type}`;
const files = fs.readdirSync(folderPath);
const jsonFiles = files.filter(file => file.endsWith('.json'));
const summary = [];
jsonFiles.forEach(file => {
const startYear = path.basename(file, '.json');
const filePath = path.join(folderPath, file);
const content = fs.readFileSync(filePath, 'utf8');
const data = JSON.parse(content);
//Init year for summaries
yearObject = {};
yearObject.year = startYear;
yearObject.count = data.length;
//Init months object for summary
let months = {};
let template = fs.readFileSync(`./${TEMPLATES_FOLDER}/${type}.md`).toString()
data.map((conference) => {
let month = conference.date.substring(5, 7);
let output = render(template, conference)
if (!fs.existsSync(`./${DATA_OUTPUT_FOLDER}/${type}`)) {
fs.mkdirSync(`./${DATA_OUTPUT_FOLDER}/${type}`);
}
if (!fs.existsSync(`./${DATA_OUTPUT_FOLDER}/${type}/${startYear}`)) {
fs.mkdirSync(`./${DATA_OUTPUT_FOLDER}/${type}/${startYear}`);
}
if (!fs.existsSync(`./${DATA_OUTPUT_FOLDER}/${type}/${startYear}/${month}`)) {
fs.mkdirSync(`./${DATA_OUTPUT_FOLDER}/${type}/${startYear}/${month}`);
}
fs.writeFileSync(`./${DATA_OUTPUT_FOLDER}/${type}/${startYear}/${month}/${conference.date}-${conference.activity}.md`, output)
//Add month count for summary
const m = conference.date.substring(5, 7);
if (!months.hasOwnProperty(m)) {
months[m] = []
}
months[m].push(m)
})
let monthsArray = [];
for (const month in months) {
let mObject = {};
mObject.month = month;
mObject.count = months[month].length;
monthsArray.push(mObject);
}
//Sort monthsarray
monthsArray.sort((a, b) => b.month - a.month); // b - a for reverse sort
yearObject.months = monthsArray;
summary.push(yearObject);
})
fs.writeFileSync(`./${DATA_OUTPUT_FOLDER}/${type}/${type}.json`, JSON.stringify(summary.reverse(), null, 2));
console.log(`Summary created successfully.`);
}
async function generateJson( type ) {
let options = { json: true };
let fields = [];
fields['activities_translations'] = [
'activities_id.id',
'title',
'activities_id.date',
'activities_id.activity',
'activities_id.place',
'activities_id.city',
'activities_id.state',
'activities_id.country',
'activities_id.duration',
'interventions.text',
'activities_id.private',
'activities_id.thumbnail.filename_disk',
'slug',
'youtube',
'privateVideo.filename_disk',
'mp3.filename_disk',
'pdf_booklet.filename_disk',
'pdf.filename_disk'
]
fields['conferences_translations'] = [
'conferences_id.id',
'title',
'conferences_id.date',
'conferences_id.activity',
'conferences_id.place',
'conferences_id.city',
'conferences_id.state',
'conferences_id.country',
'conferences_id.duration',
'text_published',
'conferences_id.public',
'conferences_id.thumbnail.filename_disk',
'slug',
'youtube',
'video.filename_disk',
'audio.filename_disk',
'pdf.filename_disk',
'pdf_simple.filename_disk',
]
let url = `http://directus.carpa.com/items/${type}?fields=${fields[type].join(",")}&filter[languages_code]=${LOCALE}&access_token=dUILDpE5gV224XqOB5xUTzE69sk8VSOL&limit=10000`;
request(url, options, (error, res, body) => {
if (error) {
return console.log(error)
};
if (!error && res.statusCode == 200) {
// do something with JSON, using the 'body' variable
const items = body.data;
let nitems = items.map((item) => {
let nitem = {}
if (type == 'activities_translations') {
nitem.id = item.activities_id?.id
nitem.type = 'activities'
nitem.title = item.title
//nitem.body = he.decode(striptags(item.interventions[0]?.text)) || ''
nitem.body = item.interventions[0]?.text
nitem.private = item.activities_id?.private == 1 ? true : false;
nitem.files = {}
nitem.files.youtube = item.youtube
nitem.files.video = item.privateVideo?.filename_disk
nitem.files.audio = item.mp3?.filename_disk
nitem.files.booklet = item.pdf_booklet?.filename_disk
nitem.files.simple = item.pdf?.filename_disk
nitem.slug = item.slug
nitem.place = item.activities_id?.place || null;
nitem.city = item.activities_id?.city || null;
nitem.state = item.activities_id?.state || null;
nitem.country = item.activities_id?.country || null;
nitem.duration = item.activities_id?.duration ?? 0;
nitem.year = dayjs(item.activities_id?.date).year().toString()
nitem.month = nitem.year + " > " + (dayjs(item.activities_id?.date).month() + 1).toString().padStart(2, "0")
nitem.menu_country = nitem.country
nitem.menu_state = nitem.country + " > " + nitem.state
nitem.menu_city = nitem.country + " > " + nitem.state + " > " + nitem.city
nitem.menu_place = nitem.country + " > " + nitem.state + " > " + nitem.city + " > " + nitem.place
nitem.date = dayjs(item.activities_id?.date).unix()
nitem.thumbnail = item.activities_id?.thumbnail?.filename_disk
//Filter out anything before 25/12/2021
if( nitem.date < 1640408400 ) {
return false
}
}
if (type == 'conferences_translations') {
nitem.id = item.conferences_id?.id.toString()
nitem.type = 'conferences'
nitem.title = item.title
//nitem.body = he.decode(striptags(item.text_published)) || ''
nitem.body = item.text_published
nitem.private = item.conferences_id?.public == 0 ? true : false;
nitem.files = {}
nitem.files.youtube = item.youtube
nitem.files.video = item.privateVideo?.filename_disk
nitem.files.audio = item.mp3?.filename_disk
nitem.files.booklet = item.pdf_booklet?.filename_disk
nitem.files.simple = item.pdf?.filename_disk
nitem.slug = item.slug
nitem.place = item.conferences_id?.place || null;
nitem.city = item.conferences_id?.city || null;
nitem.state = item.conferences_id?.state || null;
nitem.country = item.conferences_id?.country || null;
nitem.duration = item.conferences_id?.duration ?? 0;
nitem.year = dayjs(item.conferences_id?.date).year().toString()
nitem.month = nitem.year + " > " + (dayjs(item.conferences_id?.date).month() + 1).toString().padStart(2, "0")
nitem.menu_country = nitem.country
nitem.menu_state = nitem.country + " > " + nitem.state
nitem.menu_city = nitem.country + " > " + nitem.state + " > " + nitem.city
nitem.menu_place = nitem.country + " > " + nitem.state + " > " + nitem.city + " > " + nitem.place
nitem.date = dayjs(item.conferences_id?.date).unix()
nitem.thumbnail = item.conferences_id?.thumbnail?.filename_disk
}
if (nitem.title && nitem.id) {
return nitem;
}
})
jsonlData += nitems.filter(item => item).map(JSON.stringify).join('\n')
writeFile(jsonlData, type)
};
});
}
function writeFile(jsonlData, type) {
fs.writeFile(`./${DATA_INPUT_FOLDER}/${type}_${LOCALE.toLocaleUpperCase()}.json`, jsonlData, (err) => {
if (err) {
console.error("Error writing file:", err);
return;
}
console.log("File has been written successfully! : " + type);
});
}
//generateJson( 'conferences', 1974);
//generateJson( 'activities' , 2021 );
// setTimeout( () => {
// generateMarkdown( 'conferences' )
// }, 5000 );
//generateMarkdown( 'activities' );
//generateMarkdown( 'conferences' );
// for( let year = 1974; year < 2019; year++){
// generateJson( 'conferences', year );
// }
//for( let year = 2021; year < 2027; year++){
//generateJson( 'activities_translations' );
generateJson( 'conferences_translations' );
//}