From 84428c144e54170ec629b379e1daf86857da2be8 Mon Sep 17 00:00:00 2001
From: Maël Gassmann <mael.gassmann@students.bfh.ch>
Date: Thu, 17 Mar 2022 16:14:17 +0100
Subject: [~] Refactoring of the FileAssembler, Extraction of methods in
 PParser

---
 src/CMakeLists.txt    |   1 +
 src/FileAssembler.cpp | 165 ++++-------------------------------
 src/PParser.cpp       | 236 ++++++++++++++++++++++++++++++++++++++++++++++++++
 src/main.cpp          |  53 +++++-------
 4 files changed, 274 insertions(+), 181 deletions(-)
 create mode 100644 src/PParser.cpp

(limited to 'src')
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 81f1e7d..eac87bf 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -4,6 +4,7 @@ add_executable(
     ${EXE}
     main.cpp
     FileAssembler.cpp
+    PParser.cpp
     )
 add_custom_target(run
     COMMAND ${EXE}
diff --git a/src/FileAssembler.cpp b/src/FileAssembler.cpp
index 7a64846..3949ca7 100644
--- a/src/FileAssembler.cpp
+++ b/src/FileAssembler.cpp
@@ -1,12 +1,9 @@
 #include "../inc/FileAssembler.h"
+#include "../inc/PParser.h"
 #include "../inc/maddy/parser.h"
 #include <filesystem>
-#include <iostream>
-#include <string.h>
 #include <fstream>
 #include <sstream>
-#include <list>
-#include <map>
 
 using namespace std;
 namespace fs = filesystem;
@@ -40,7 +37,6 @@ FileAssembler::FileAssembler(string path): path(path){
 			}
 		}
 	}
-	ordered_posts_indexes = get_ordered_posts_indexes(); // Only used when a listing of posts in a page is necessary
 }
 
 void FileAssembler::parse_variables(){
@@ -77,154 +73,25 @@ string FileAssembler::get_file_content(string path){
 	return content;
 }
 
-map<string, string> FileAssembler::get_pages(){
-	return assemble_from_iterator(pages.begin(), pages.end(), false);
+map<string, string>* FileAssembler::get_website(){
+	PParser p = PParser(&variables, &templates, &pages, &posts);
+	return p.parse();
 }
 
-map<string, string> FileAssembler::get_posts(){
-	return assemble_from_iterator(posts.begin(), posts.end(), true);
-}
 
-map<string, string> FileAssembler::assemble_from_iterator(map<string, string>::iterator it, map<string, string>::iterator end, bool is_post){
-	if(templates.find("header.html") == templates.end()){
-		cerr << "Error: swg: header.html is not present in the sourced folder." << endl;
-		exit(2);
-	} else if(templates.find("footer.html") == templates.end()){
-		cerr << "Error: swg: footer.html is not present in the sourced folder." << endl;
-		exit(2);
-	}
-	map<string, string> p_it;
-	while (it != end){
-		if(it->first.substr(0, 5) != "link_"){ // Ignoring link pages
-			p_it[it->first] = parse(it->first, templates["header.html"] + it->second + templates["footer.html"], is_post);
-		}
-		it ++;
-	}
-	return p_it;
+string FileAssembler::get_target(){
+	string w = variables["website"];
+  	static std::regex rgx("\\w+\\.\\w+");
+	std::smatch match;
+	if (std::regex_search(w, match, rgx))
+		return match[0];
+	cerr << "Error: swg: website attribute is badly configured!" << endl;
+	exit(7);
 }
 
-string FileAssembler::parse(string title, string to_parse, bool is_post){
-	string parsed = to_parse;
-	string url = variables["website"];
-	
-	if(is_post)
-		variables["link"] = url + "posts/" + lowercase(title) + ".html";
-	else{
-		if(title == variables["index"]){
-			variables["link"] = url + "index.html";
-		}else if(title.substr(0, 5) == "link_"){ // Link
-			variables["link"] = pages[title];
-		}else{
-			variables["link"] = url + lowercase(title) + ".html";
-		}
-		variables.erase("date"); // we are not doing it if it's a post, because it could be a listing
-	}
-	// Parsing variables and functions	
-	size_t pos_first = 0;
-	size_t pos_second = 0;
-  	static std::regex r_expression("\\$([^\\$]*)\\$");
-	std::smatch m_expression;
-	while (regex_search(parsed, m_expression, r_expression)){
-		string input = m_expression[1]; //Group 1 ie. without the dollars
-		string output = "";
-		
-		if(input == "title") //TITLE
-			if(title.substr(0, 5) == "link_") // Link
-				output = title.substr(5,title.length()-5);
-			else
-				output = title;
-
-		else if(input.substr(0, 5) == "date("){
-			variables["date"] = parse_arg(input);
-			output = variables["date"];
-		}else if(variables.find(input) != variables.end()) //VARIABLES
-			output = variables[input];
-
-		else if(input.substr(0,4) == "res("){ //RESOURCES
-			string full_path = input.substr(4,input.length()-1-4);
-			output = url + full_path;
-			
-			if(full_path.find("/") != string::npos){
-				size_t s;
-
-				string tok;
-				string last_tok;
-
-				while ((s = full_path.find("/")) != string::npos){ // Adding the path progressively
-					tok = full_path.substr(0,s-1);
-					if(find(cached_res.begin(), cached_res.end(), last_tok+tok) == cached_res.end())
-						cached_res.push_back(last_tok+tok); // Not found, adding it
-					last_tok += tok+"/";
-					full_path.erase(0,s);
-				}
-			}
-			if(find(cached_res.begin(), cached_res.end(), input.substr(4,input.length()-1-4)) == cached_res.end())
-				cached_res.push_back(input.substr(4,input.length()-1-4)); // Not found, adding it
-
-
-		} else if(input.substr(0,5) == "list_"){ //LISTINGS
-			string name = "";
-			int arg = -1;
-  			static std::regex r_listing("\\_([a-z]*)(\\((\\d*)\\))?");
-			std::smatch m_listing;
-			if (std::regex_search(input, m_listing, r_listing)){
-				name = m_listing[1];
-				if(m_listing[3] != "")
-					arg = stoi(m_listing[3]);
-			}
-			string list_template = name + "_listing.html";
-
-			if(templates.find(list_template) == templates.end()){
-				cerr << "Error: swg: Listing template '" << list_template << "' does not exist." << endl;
-				exit(2);
-			}
-
-			if(name == "menu"){
-				string current_link = variables["link"];
-				map<string, string>::iterator it = pages.begin();
-				while (it != pages.end()){
-					output += parse(it->first,templates[list_template], false);
-					it ++;
-				}
-				variables["link"] = current_link;
-			}else if(name == "post"){
-				string current_date = "";
-				string current_link = variables["link"];				
-				if(variables.find("date") != templates.end())
-					current_date = variables["date"];
-
-				int i = 0;
-				vector<string>::iterator it = ordered_posts_indexes.begin();
-				while (it != ordered_posts_indexes.end() && 
-					(arg == -1 || i < arg) // Limiting the number of posts shown if arg is set
-					){
-					string date = parse_arg(posts[*it]);
-					if(date != "")
-						variables["date"] = date;
-					else{
-						cerr << "Error: swg: Variable 'date' of post '" << *it << "' is not defined." << endl;
-						exit(5);
-					}
-					output += parse(*it,templates[list_template], true);
-					it ++;
-					i ++;
-				}
-				if(current_date != "")
-					variables["date"] = current_date;
-				else
-					variables.erase("date");
-				variables["link"] = current_link;
-			}
-		}
-		if(output.length() == 0){
-			cerr << "Error: swg: Invalid swg text section: \"" << input << "\"." << endl;
-			exit(4);
-		}
-		//cout << "BEFORE:" << endl << parsed << endl << endl;
-		parsed = regex_replace(parsed, r_expression, output, regex_constants::format_first_only);
-		//cout << "AFTER:" << endl << parsed << endl ;
-		//cout << endl << "----------------------------------------------------" << endl << endl;
+string FileAssembler::get_index(){
+	if(variables.find("index") == variables.end()){
+		return "";
 	}
-	return parsed;
+	return variables["index"];
 }
-
diff --git a/src/PParser.cpp b/src/PParser.cpp
new file mode 100644
index 0000000..b4f478c
--- /dev/null
+++ b/src/PParser.cpp
@@ -0,0 +1,236 @@
+#include "../inc/PParser.h"
+
+PParser::PParser(
+	 map<string, string> *variables,
+  	 map<string, string> *templates,
+  	 map<string, string> *pages,
+  	 map<string, string> *posts
+	 ): variables(variables),
+	    templates(templates),
+	    pages(pages),
+	    posts(posts)
+{
+	ordered_posts_indexes = get_ordered_posts_indexes(); // Only used when a listing of posts in a page is necessary
+}
+
+map<string, string>* PParser::parse(){
+	map<string, string>* website = new map<string, string>[2];
+
+	map<string, string>::iterator it = pages->begin();
+	while (it != pages->end()){
+		if(it->first.substr(0, 5) != "link_"){ // Ignoring link pages
+			website[0][it->first] = parse_text(it->first, (*templates)["header.html"] + it->second + (*templates)["footer.html"], false);
+		}
+		it ++;
+	}
+
+	it = posts->begin();
+	while (it != posts->end()){
+		website[1][it->first] = parse_text(it->first, (*templates)["header.html"] + it->second + (*templates)["footer.html"], true);
+		it ++;
+	}
+	return website;
+}
+
+string* PParser::parse_function(string to_parse){
+	static std::regex rgx("(\\w+)\\(([^\\)\\$]*)\\)");
+      std::smatch match;
+      if (std::regex_search(to_parse, match, rgx)){
+      	string* r = new string[2];
+      	r[0] = match[1].str();
+      	r[1] = match[2].str();
+      	return r;
+      }
+      return NULL;
+}
+
+string* PParser::parse_separator(string to_parse, string separator){
+	static std::regex rgx("(\\w+)" + separator + "(\\w+)");
+      std::smatch match;
+      if (std::regex_search(to_parse, match, rgx)){
+      	string* r = new string[2];
+      	r[0] = match[1].str();
+      	r[1] = match[2].str();
+      	return r;
+      }
+      return NULL;
+}
+
+bool PParser::cmp_posts(pair<string, string>& a, pair<string, string>& b){
+      string a_date = parse_function(a.second)[1];
+      string b_date = parse_function(b.second)[1];
+      if(a_date == ""){
+      	cerr << "Error: swg: Variable 'date' of post '" << a.first << "' is not defined." << endl;
+      	exit(5);
+      }else if (b_date == ""){
+      	cerr << "Error: swg: Variable 'date' of post '" << b.first << "' is not defined." << endl;
+      	exit(5);
+      }
+	static std::regex rgx("(\\d{1,2})\\.(\\d{1,2})\\.(\\d{4})");
+      std::smatch a_match;
+      std::smatch b_match;
+      if (std::regex_search(a_date, a_match, rgx)){ // For now only european swiss format handled
+      	if (std::regex_search(b_date, b_match, rgx)){ // For now only european swiss format handled
+      		if(a_match[3] != b_match[3]) // Trying to differentiate by year
+      			return stoi(a_match[3]) > stoi(b_match[3]);
+      		if(a_match[2] != b_match[2]) // Trying to differentiate by month
+      			return stoi(a_match[2]) > stoi(b_match[2]);
+      		if(a_match[1] != b_match[1]) // Trying to differentiate by day
+      			return stoi(a_match[1]) > stoi(b_match[1]);
+      		return false; // or if equal return false
+      	}else{
+      		cerr << "Error: swg: Variable 'date' of post '" << b.first << "' is not following the format dd.mm.yyyy." << endl;
+      		exit(5);
+      	}
+      }else{
+      	cerr << "Error: swg: Variable 'date' of post '" << a.first << "' is not following the format dd.mm.yyyy." << endl;
+      	exit(5);
+      }
+}
+
+vector<string> PParser::get_ordered_posts_indexes(){
+      vector<pair<string,string>> ordered_posts;
+  	for (auto& it : *posts) {
+      	ordered_posts.push_back(it);
+  	}
+  	sort(ordered_posts.begin(), ordered_posts.end(), cmp_posts);
+
+      vector<string> titles;
+      transform(ordered_posts.begin(), ordered_posts.end(), std::back_inserter(titles), 
+             [](const std::pair<string, string>& p) { return p.first; });
+      return titles;
+}
+
+
+
+string PParser::parse_text(string title, string to_parse, bool is_post){
+	string parsed = to_parse;
+	string url = (*variables)["website"];
+	
+	if(is_post)
+		(*variables)["link"] = url + "posts/" + lowercase(title) + ".html";
+	else{
+		if(title == (*variables)["index"]){
+			(*variables)["link"] = url + "index.html";
+		}else if(title.substr(0, 5) == "link_"){ // Link
+			(*variables)["link"] = (*pages)[title];
+		}else{
+			(*variables)["link"] = url + lowercase(title) + ".html";
+		}
+		(*variables).erase("date"); // we are not doing it if it's a post, because it could be a listing
+	}
+	// Parsing variables and functions	
+	size_t pos_first = 0;
+	size_t pos_second = 0;
+  	static std::regex swg_expression("\\$([^\\$]*)\\$");
+	std::smatch m_swg_expression;
+	while (regex_search(parsed, m_swg_expression, swg_expression)){
+		string input = m_swg_expression[1]; //Group 1 ie. without the dollars
+		string output = "";
+		
+		if(string *separator = parse_separator(input, "_"))	// LISTINGS 
+		{							// Can have 0 or 1 argument
+			if(separator[0] == "list")
+			{
+				cout << "Type: " << separator[0] << ", Variation: " << separator[1] << endl;
+				
+				int arg = -1;
+				string *function = parse_function(input);
+				if (function != NULL)
+					arg = stoi(function[1]);
+
+				string list_template = separator[1]+ "_listing.html";
+
+				if((*templates).find(list_template) == (*templates).end())
+				{
+					cerr << "Error: swg: Listing template '" << list_template << "' does not exist." << endl;
+					exit(2);
+				}
+
+				if(separator[1] == "menu")
+				{
+					string current_link = (*variables)["link"];
+					map<string, string>::iterator it = (*pages).begin();
+					while (it != (*pages).end())
+					{
+						output += parse_text(it->first,(*templates)[list_template], false);
+						it ++;
+					}
+					(*variables)["link"] = current_link;
+				}
+				else if(separator[1] == "post")
+				{
+					string current_date = "";
+					string current_link = (*variables)["link"];				
+					if((*variables).find("date") != (*templates).end())
+						current_date = (*variables)["date"];
+
+					int i = 0;
+					vector<string>::iterator it = ordered_posts_indexes.begin();
+					while (it != ordered_posts_indexes.end() && 
+						(arg == -1 || i < arg)) // Limiting the number of posts shown if arg is set
+					{
+						string date = parse_function((*posts)[*it])[1];
+						if(date != "")
+							(*variables)["date"] = date;
+						else
+						{
+							cerr << "Error: swg: Variable 'date' of post '" << *it << "' is not defined." << endl;
+							exit(5);
+						}
+						output += parse_text(*it,(*templates)[list_template], true);
+						it ++;
+						i ++;
+					}
+					if(current_date != "")
+						(*variables)["date"] = current_date;
+					else
+						(*variables).erase("date");
+					(*variables)["link"] = current_link;
+				}
+			}
+		}
+		else if(string *function = parse_function(input)) // FUNCTIONS
+		{						  // *always* have *ONE* argument
+			cout << "Function: " << function[0] << ", Arg: " << function[1] << endl;
+			
+			if(function[0] == "date") // date
+			{
+				(*variables)["date"] = function[1];
+				output = function[1];
+			} 
+			else if(function[0] == "res") // resources
+			{
+				string full_path = function[1];
+				output = url + full_path;
+			}
+		} 
+		else  // KEYWORDS 
+		{
+			cout << "Input: " << input << endl;
+
+			if(input == "title") // Title
+			{
+				string* s = parse_separator(title, "_");
+				if(s != NULL && s[0] == "link") // Link
+					output = s[1];
+				else
+					output = title;
+			} 
+			else if((*variables).find(input) != (*variables).end()) //Variables
+				output = (*variables)[input];
+			
+		}
+
+		if(output.length() == 0){
+			cerr << "Error: swg: Invalid swg text section: \"" << input << "\"." << endl;
+			exit(4);
+		}
+		//cout << "BEFORE:" << endl << parsed << endl << endl;
+		parsed = regex_replace(parsed, swg_expression, output, regex_constants::format_first_only);
+		//cout << "AFTER:" << endl << parsed << endl ;
+		//cout << endl << "----------------------------------------------------" << endl << endl;
+	}
+	return parsed;
+}
+
diff --git a/src/main.cpp b/src/main.cpp
index 634d6fc..cb5ad2f 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -1,9 +1,7 @@
 #include "../inc/FileAssembler.h"
 #include <filesystem>
-#include <iostream>
 #include <fstream>
-#include <cstring>
-#include <string>
+#include <string.h>
 
 
 using namespace std;
@@ -21,6 +19,14 @@ void write_file(string path, string content){
 	}
 }
 
+static string lowercase(string s){
+    string low = "";
+    for (int i = 0; i < s.length(); i++) {
+    	low += tolower(s[i]);
+    }
+    return low;
+}
+
 int generateWebsite(string arg, string config){
 	// cout << arg << " " << config << endl;
 	if(config[config.length()-1] != '/')
@@ -28,8 +34,7 @@ int generateWebsite(string arg, string config){
 	FileAssembler *fa = new FileAssembler(config);
 	/* TODO 
 	 * clean or don't clean*/
-	map<string, string> pages = fa->get_pages();
-	map<string, string> posts = fa->get_posts();
+	map<string, string>* website = fa->get_website();
 	string target = fa->get_target();
 	string index = fa->get_index();
 
@@ -41,43 +46,27 @@ int generateWebsite(string arg, string config){
 		cerr << "Error: swg: Directory '" << target << "/posts' could not be created!" << endl;
 		exit(6);
 	}
-	map<string, string>::iterator it = pages.begin();
-	while(it != pages.end()){
+	map<string, string>::iterator it = website[0].begin();
+	while(it != website[0].end()){
 		if(it->first == index){
 			write_file(target+"/index.html", it->second);
 		}else{
-			write_file(target+"/"+FileAssembler::lowercase(it->first)+".html", it->second);
+			write_file(target+"/"+lowercase(it->first)+".html", it->second);
 		}
 		it++;
 	}
 
-	it = posts.begin();
-	while(it != posts.end()){
-		write_file(target+"/posts/"+FileAssembler::lowercase(it->first)+".html", it->second);
+	it = website[1].begin();
+	while(it != website[1].end()){
+		write_file(target+"/posts/"+lowercase(it->first)+".html", it->second);
 		it++;
 	}
 
-	if (false){ // Only copying the cached resources vs all of them
-	/**	list<string> resources = fa->get_cached_resources();
-
-		list<string>::iterator res = resources.begin();
-		while(res != resources.end()){
-			error_code ec;
-			fs::copy(config+"resources/"+*res, target+"/"+*res,ec);
-			if(ec.value() != 0){
-				cerr << "Error: swg: Resource file '" << *res << "' could not be copied!" << endl;
-				exit(6);
-			}
-			res ++;
-		}
-		**/
-	}else{	
-		// Recursively copies all files and folders from src to target and overwrites existing files in target.
-		try{
-        		fs::copy(config+"resources/", target, fs::copy_options::overwrite_existing | fs::copy_options::recursive);
-		}catch (std::exception& e){
-			std::cout << e.what(); // TODO better print
-		}
+	// Recursively copies all files and folders from src to target and overwrites existing files in target.
+	try{
+        	fs::copy(config+"resources/", target, fs::copy_options::overwrite_existing | fs::copy_options::recursive);
+	}catch (std::exception& e){
+		std::cout << e.what(); // TODO better print
 	}
 
 	return 0;
-- 
cgit v1.2.3