mirror of
				https://github.com/Nioux/AideDeJeu.git
				synced 2025-10-31 07:26:09 +00:00 
			
		
		
		
	Parsing
This commit is contained in:
		
							parent
							
								
									b27142c0fe
								
							
						
					
					
						commit
						c3216a5dc3
					
				
					 1 changed files with 102 additions and 26 deletions
				
			
		|  | @ -384,69 +384,145 @@ namespace AideDeJeuCmd | ||||||
|             var parser = new HtmlParser(); |             var parser = new HtmlParser(); | ||||||
|             var doc = new HtmlAgilityPack.HtmlDocument(); |             var doc = new HtmlAgilityPack.HtmlDocument(); | ||||||
|             doc.Load(@"..\..\..\..\..\Ignore\tome_of_beasts\page30.html"); |             doc.Load(@"..\..\..\..\..\Ignore\tome_of_beasts\page30.html"); | ||||||
|             parser.Parse(doc); |             parser.OutputMarkdown(parser.Parse(doc)); | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         class HtmlParser |         class HtmlParser | ||||||
|         { |         { | ||||||
|             string key = ""; |             string key = ""; | ||||||
|             string value = ""; |             string value = ""; | ||||||
|             public void Parse(HtmlAgilityPack.HtmlDocument doc) |             enum State | ||||||
|  |             { | ||||||
|  |                 Before, | ||||||
|  |                 Name, | ||||||
|  |                 Type, | ||||||
|  |                 TopKeyValues, | ||||||
|  |                 Abilities, | ||||||
|  |                 BottomKeyValues, | ||||||
|  |                 Competencies, | ||||||
|  |                 Actions, | ||||||
|  |                 Reactions, | ||||||
|  | 
 | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             public class ParsedSpan | ||||||
|  |             { | ||||||
|  |                 public string Text; | ||||||
|  |                 public string Style; | ||||||
|  |                 public string IdStyle; | ||||||
|  |             } | ||||||
|  |             public class FullLine : List<ParsedSpan> | ||||||
|  |             { | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             public class FullText : List<FullLine> | ||||||
|  |             { | ||||||
|  | 
 | ||||||
|  |             } | ||||||
|  |             public FullText Parse(HtmlAgilityPack.HtmlDocument doc) | ||||||
|             { |             { | ||||||
|                 var styles = doc.DocumentNode.SelectSingleNode("/html/head/style").InnerText.Split('\n'); |                 var styles = doc.DocumentNode.SelectSingleNode("/html/head/style").InnerText.Split('\n'); | ||||||
|                 var txtDivs = doc.DocumentNode.SelectNodes("//div[@class='txt']"); |                 var txtDivs = doc.DocumentNode.SelectNodes("//div[@class='txt']"); | ||||||
|                 bool started = false; |                 var fullText = new FullText(); | ||||||
|  |                 var fullLine = new FullLine(); | ||||||
|                 foreach (var txtDiv in txtDivs) |                 foreach (var txtDiv in txtDivs) | ||||||
|                 { |                 { | ||||||
|                     var spans = txtDiv.Elements("span"); |                     var spans = txtDiv.Elements("span"); | ||||||
|                     foreach (var span in spans) |                     for (var i = 0; i < spans.Count(); i++) | ||||||
|                     { |                     { | ||||||
|  |                         var span = spans.ToArray()[i]; | ||||||
|                         var spanId = span.GetAttributeValue("id", ""); |                         var spanId = span.GetAttributeValue("id", ""); | ||||||
|                         var spanStyle = span.GetAttributeValue("style", ""); |                         var spanStyle = span.GetAttributeValue("style", ""); | ||||||
|                         var spanIdStyle = styles.SingleOrDefault(s => s.StartsWith($"#{spanId} ")); |                         var spanIdStyle = new string(styles.SingleOrDefault(s => s.StartsWith($"#{spanId} ")).SkipWhile(c => c != '{').ToArray()); | ||||||
|  |                         var parsedSpan = new ParsedSpan() | ||||||
|  |                         { | ||||||
|  |                             Text = span.InnerText, | ||||||
|  |                             Style = spanStyle, | ||||||
|  |                             IdStyle = spanIdStyle, | ||||||
|  |                         }; | ||||||
|  |                         if (span.InnerText.Contains("Forme immuable")) | ||||||
|  |                         { | ||||||
|  |                             Debug.WriteLine(""); | ||||||
|  |                         } | ||||||
|  |                         if (i == 0) | ||||||
|  |                         { | ||||||
|  |                             var previousParsedSpan = fullLine.LastOrDefault(); | ||||||
|  |                             if (previousParsedSpan == null) | ||||||
|  |                             { | ||||||
|  |                                 var previousFullLine = fullText.LastOrDefault(); | ||||||
|  |                                 if (previousFullLine != null) | ||||||
|  |                                 { | ||||||
|  |                                     previousParsedSpan = previousFullLine.LastOrDefault(); | ||||||
|  |                                 } | ||||||
|  |                             } | ||||||
| 
 | 
 | ||||||
|                         if (spanStyle.Contains("font-size:11px")) |                             if (previousParsedSpan != null) | ||||||
|  |                             { | ||||||
|  |                                 if (previousParsedSpan.Style != parsedSpan.Style || previousParsedSpan.IdStyle != parsedSpan.IdStyle) | ||||||
|  |                                 { | ||||||
|  |                                     fullText.Add(fullLine); | ||||||
|  |                                     fullLine = new FullLine(); | ||||||
|  |                                 } | ||||||
|  |                             } | ||||||
|  |                         } | ||||||
|  |                         fullLine.Add(parsedSpan); | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |                 fullText.Add(fullLine); | ||||||
|  | 
 | ||||||
|  |                 return fullText; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             public void OutputMarkdown(FullText fullText) | ||||||
|  |             { | ||||||
|  |                 bool started = false; | ||||||
|  |                 foreach (var line in fullText) | ||||||
|  |                 { | ||||||
|  |                     foreach(var spa in line) | ||||||
|  |                     { | ||||||
|  |                         if (spa.Style.Contains("font-size:11px")) | ||||||
|                         {   // nom (démarrage) |                         {   // nom (démarrage) | ||||||
|                             started = true; |                             started = true; | ||||||
|                         } |                         } | ||||||
|                         if (started) |                         if (started) | ||||||
|                         { |                         { | ||||||
|                             if (spanStyle.Contains("font-size:11px")) |                             if (!spa.IdStyle.Contains("font-family:sans-serif; font-weight:normal; font-style:normal;") && CloseKeyValue()) | ||||||
|                             {   // nom |                             { | ||||||
|                                 Console.WriteLine($"# <!--Name-->{span.InnerText}<!--/Name-->"); |  | ||||||
|                             } |                             } | ||||||
|                             else if (spanIdStyle.Contains("font-family:sans-serif; font-weight:normal; font-style:italic;") && span.InnerText.Contains("taille")) |                             else if (spa.Style.Contains("font-size:11px")) | ||||||
|  |                             {   // nom | ||||||
|  |                                 Console.WriteLine($"# <!--Name-->{spa.Text}<!--/Name-->"); | ||||||
|  |                             } | ||||||
|  |                             else if (spa.IdStyle.Contains("font-family:sans-serif; font-weight:normal; font-style:italic;") && spa.Text.Contains("taille")) | ||||||
|                             {   // type / size / alignment |                             {   // type / size / alignment | ||||||
|                                 // todo : découper type / size / alignment |                                 // todo : découper type / size / alignment | ||||||
|                                 Console.WriteLine($"-  <!--Type-->{span.InnerText}<!--/Alignment-->"); |                                 Console.WriteLine($"-  <!--Type-->{spa.Text}<!--/Alignment-->"); | ||||||
|                             } |                             } | ||||||
|                             else if (spanStyle.Contains("rgba(121,27,16,1)")) |                             else if (spa.Style.Contains("rgba(121,27,16,1)")) | ||||||
|                             {   // key / value |                             {   // key / value | ||||||
|                                 if (CloseKeyValue()) |                                 key = spa.Text; | ||||||
|                                 { |  | ||||||
|                                 } |  | ||||||
|                                 key = span.InnerText; |  | ||||||
|                                 //Console.WriteLine($"-  <!--Type-->{span.InnerText}<!--/Alignment-->"); |                                 //Console.WriteLine($"-  <!--Type-->{span.InnerText}<!--/Alignment-->"); | ||||||
|                             } |                             } | ||||||
|                             else if (spanIdStyle.Contains("font-family:sans-serif; font-weight:normal; font-style:normal;")) |                             else if (spa.IdStyle.Contains("font-family:sans-serif; font-weight:normal; font-style:normal;")) | ||||||
|                             { |                             { | ||||||
|                                 value += (value.Length == 0 ? " " : "") + span.InnerText; |                                 value += (value.Length == 0 ? " " : "") + spa.Text; | ||||||
|                             } |                             } | ||||||
|                             else |                             else | ||||||
|                             { |                             { | ||||||
|                                 if (CloseKeyValue()) |  | ||||||
|                                 { |  | ||||||
|                                 } |  | ||||||
|                                 //Console.Write($"{spanStyle} => {span.InnerText} "); |                                 //Console.Write($"{spanStyle} => {span.InnerText} "); | ||||||
|                                 Console.Write($"{span.InnerText}"); |                                 Console.Write($"{spa.Text}"); | ||||||
|                             } |                             } | ||||||
|                         } |                         } | ||||||
|  |                         //Console.Write(spa.Text); | ||||||
|                     } |                     } | ||||||
|                     if (started) |                     Console.WriteLine(); | ||||||
|                     { |  | ||||||
|                     //Console.WriteLine(); |                     //Console.WriteLine(); | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|  | 
 | ||||||
|  |             void StripLine() | ||||||
|  |             { | ||||||
|  | 
 | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             Dictionary<string, string> KeyTags = new Dictionary<string, string>() |             Dictionary<string, string> KeyTags = new Dictionary<string, string>() | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Yan Maniez
						Yan Maniez