add linetracking to marked.js

2026-06-21 21:52:25 -06:00 · 2020-04-29 20:29:05 +02:00 · 2020-04-29 20:29:05 +02:00 · 334c07cc0c
parent ee284dd282
commit 334c07cc0c
5 changed files with 343 additions and 4 deletions
--- a/copyparty/web/md.css
+++ b/copyparty/web/md.css
@ -373,3 +373,12 @@ blink {
 		display: none;
 	}
 }
 /*
 *[data-ln]:before {
 	content: attr(data-ln);
 	font-size: .8em;
 	margin: 0 .4em;
 	color: #f0c;
 }
 */
--- a/copyparty/web/md.js
+++ b/copyparty/web/md.js
@ -126,8 +126,14 @@ function init_toc() {
            };
        }
        else if (is_precode) {
-            elm.innerHTML = elm.innerHTML.replace(
+            // not actually toc-related (sorry),
-                /\r?\n<\/code>$/i, '</code>').split(/\r?\n/g).join('</code>\n<code>');
+            // split <pre><code /></pre> into one <code> per line
            var nline = parseInt(elm.getAttribute('data-ln')) + 1;
            var lines = elm.innerHTML.replace(/\r?\n<\/code>$/i, '</code>').split(/\r?\n/g);
            for (var b = 0; b < lines.length - 1; b++)
                lines[b] += '</code>\n<code data-ln="' + (nline + b) + '">';
            elm.innerHTML = lines.join('');
        }
        if (!is_header && anchor)
@ -161,7 +167,7 @@ function init_toc() {
            return;
        var ptop = window.pageYOffset || document.documentElement.scrollTop;
-        var hit = -1;
+        var hit = anchors.length - 1;
        for (var a = 0; a < anchors.length; a++) {
            if (anchors[a].y >= ptop - 8) {  //???
                hit = a;
--- a/scripts/deps-docker/Dockerfile
+++ b/scripts/deps-docker/Dockerfile
@ -68,7 +68,9 @@ RUN     cd ogvjs-$ver_ogvjs \
 # build marked
 COPY    marked.patch /z/
 COPY    marked-ln.patch /z/
 RUN     cd marked-$ver_marked \
        && patch -p1 < /z/marked-ln.patch \
        && patch -p1 < /z/marked.patch \
        && npm run build \
        && cp -pv marked.min.js /z/dist/marked.js \
@ -152,3 +154,7 @@ RUN     cd /z/dist \
 # f=../../copyparty/web/deps/marked.js.gz; (cd ~ed/src/ && diff -NarU1 marked-1.0.0-orig/ marked-1.0.0-edit/) >marked.patch; make && printf '%d ' $(wc -c <$f) $(gzip -d <$f | wc -c); echo
 # d=/home/ed/dev/copyparty/scripts/deps-docker/; scp Dockerfile marked-ln.patch root@$bip:$d && ssh root@$bip "cd $d && make" && ssh root@$bip 'tar -cC /home/ed/dev/copyparty/copyparty/web deps' | (cd ../../copyparty/web/; cat > the.tgz; tar -xvf the.tgz)
 # gzip -dkf ../dev/copyparty/copyparty/web/deps/deps/marked.full.js.gz && diff -NarU2 ../dev/copyparty/copyparty/web/deps/{,deps/}marked.full.js
--- a/scripts/deps-docker/marked-ln.patch
+++ b/scripts/deps-docker/marked-ln.patch
@ -0,0 +1,300 @@
 diff --git a/src/Lexer.js b/src/Lexer.js
 adds linetracking to marked.js v1.0.0;
 add data-ln="%d" to most tags, %d is the source markdown line
 --- a/src/Lexer.js
 +++ b/src/Lexer.js
@@ -49,4 +49,5 @@ function mangle(text) {
 module.exports = class Lexer {
   constructor(options) {
 +    this.ln = 1;  // like most editors, start couting from 1
     this.tokens = [];
     this.tokens.links = Object.create(null);
@@ -108,4 +109,15 @@ module.exports = class Lexer {
   }
 +  set_ln(token, ln = this.ln) {
 +    // assigns ln (the current line numer) to the token,
 +    // then bump this.ln by the number of newlines in the contents
 +    //
 +    // if ln is set, also assigns the line counter to a new value
 +    // (usually a backup value from before a call into a subparser
 +    //  which bumped the linecounter by a subset of the newlines)
 +    token.ln = ln;
 +    this.ln = ln + (token.raw.match(/\n/g) || []).length;
 +  }
 +
   /**
    * Lexing
@@ -113,10 +125,15 @@ module.exports = class Lexer {
   blockTokens(src, tokens = [], top = true) {
     src = src.replace(/^ +$/gm, '');
 -    let token, i, l;
 +    let token, i, l, ln;
     while (src) {
 +      // this.ln will be bumped by recursive calls into this func;
 +      // reset the count and rely on the outermost token's raw only
 +      ln = this.ln;
 +      
       // newline
       if (token = this.tokenizer.space(src)) {
         src = src.substring(token.raw.length);
 +        this.set_ln(token); // is \n if not type
         if (token.type) {
           tokens.push(token);
@@ -128,4 +145,5 @@ module.exports = class Lexer {
       if (token = this.tokenizer.code(src, tokens)) {
         src = src.substring(token.raw.length);
 +        this.set_ln(token);
         tokens.push(token);
         continue;
@@ -135,4 +153,5 @@ module.exports = class Lexer {
       if (token = this.tokenizer.fences(src)) {
         src = src.substring(token.raw.length);
 +        this.set_ln(token);
         tokens.push(token);
         continue;
@@ -142,4 +161,5 @@ module.exports = class Lexer {
       if (token = this.tokenizer.heading(src)) {
         src = src.substring(token.raw.length);
 +        this.set_ln(token);
         tokens.push(token);
         continue;
@@ -149,4 +169,5 @@ module.exports = class Lexer {
       if (token = this.tokenizer.nptable(src)) {
         src = src.substring(token.raw.length);
 +        this.set_ln(token);
         tokens.push(token);
         continue;
@@ -156,4 +177,5 @@ module.exports = class Lexer {
       if (token = this.tokenizer.hr(src)) {
         src = src.substring(token.raw.length);
 +        this.set_ln(token);
         tokens.push(token);
         continue;
@@ -164,4 +186,7 @@ module.exports = class Lexer {
         src = src.substring(token.raw.length);
         token.tokens = this.blockTokens(token.text, [], top);
 +        // recursive call to blockTokens probably bumped this.ln,
 +        // token.raw is more reliable so reset this.ln and use that
 +        this.set_ln(token, ln);
         tokens.push(token);
         continue;
@@ -174,5 +199,9 @@ module.exports = class Lexer {
         for (i = 0; i < l; i++) {
           token.items[i].tokens = this.blockTokens(token.items[i].text, [], false);
 +          // list entries don't bump the linecounter, so let's
 +          this.ln++;
         }
 +        // then reset like blockquote
 +        this.set_ln(token, ln);
         tokens.push(token);
         continue;
@@ -182,4 +211,5 @@ module.exports = class Lexer {
       if (token = this.tokenizer.html(src)) {
         src = src.substring(token.raw.length);
 +        this.set_ln(token);
         tokens.push(token);
         continue;
@@ -189,4 +219,5 @@ module.exports = class Lexer {
       if (top && (token = this.tokenizer.def(src))) {
         src = src.substring(token.raw.length);
 +        this.set_ln(token);
         if (!this.tokens.links[token.tag]) {
           this.tokens.links[token.tag] = {
@@ -201,4 +232,5 @@ module.exports = class Lexer {
       if (token = this.tokenizer.table(src)) {
         src = src.substring(token.raw.length);
 +        this.set_ln(token);
         tokens.push(token);
         continue;
@@ -208,4 +240,5 @@ module.exports = class Lexer {
       if (token = this.tokenizer.lheading(src)) {
         src = src.substring(token.raw.length);
 +        this.set_ln(token);
         tokens.push(token);
         continue;
@@ -215,4 +248,5 @@ module.exports = class Lexer {
       if (top && (token = this.tokenizer.paragraph(src))) {
         src = src.substring(token.raw.length);
 +        this.set_ln(token);
         tokens.push(token);
         continue;
@@ -222,4 +256,5 @@ module.exports = class Lexer {
       if (token = this.tokenizer.text(src)) {
         src = src.substring(token.raw.length);
 +        this.set_ln(token);
         tokens.push(token);
         continue;
@@ -251,4 +286,7 @@ module.exports = class Lexer {
     for (i = 0; i < l; i++) {
       token = tokens[i];
 +      // this.ln is at EOF when inline() is invoked;
 +      // all this affects <br> tags only so no biggie if it breaks
 +      this.ln = token.ln || this.ln;
       switch (token.type) {
         case 'paragraph':
@@ -374,4 +412,6 @@ module.exports = class Lexer {
       if (token = this.tokenizer.br(src)) {
         src = src.substring(token.raw.length);
 +        // no need to reset (no more blockTokens anyways)
 +        token.ln = this.ln++;
         tokens.push(token);
         continue;
 diff --git a/src/Parser.js b/src/Parser.js
 index bad3ac7..882da47 100644
 --- a/src/Parser.js
 +++ b/src/Parser.js
@@ -18,4 +18,5 @@ module.exports = class Parser {
     this.textRenderer = new TextRenderer();
     this.slugger = new Slugger();
 +    this.ln = 0; // error indicator; should always be set >=1 from tokens
   }
@@ -55,4 +56,9 @@ module.exports = class Parser {
     for (i = 0; i < l; i++) {
       token = tokens[i];
 +      // take line-numbers from tokens whenever possible
 +      // and update the renderer's html attribute with the new value
 +      this.ln = token.ln || this.ln;
 +      this.renderer.tag_ln(this.ln);
 +
       switch (token.type) {
         case 'space': {
@@ -105,7 +111,10 @@ module.exports = class Parser {
             }
 -            body += this.renderer.tablerow(cell);
 +            // the +2 is to skip the table header
 +            body += this.renderer.tag_ln(token.ln + j + 2).tablerow(cell);
           }
 -          out += this.renderer.table(header, body);
 +          // the html attribute is now at the end of the table,
 +          // reset it before writing the <table> tag now
 +          out += this.renderer.tag_ln(token.ln).table(header, body);
           continue;
         }
@@ -148,8 +157,12 @@ module.exports = class Parser {
             itemBody += this.parse(item.tokens, loose);
 -            body += this.renderer.listitem(itemBody, task, checked);
 +            // similar to tables, writing contents before the <ul> tag
 +            // so update the tag attribute as we go
 +            // (assuming all list entries got tagged with a source-line, probably safe w)
 +            body += this.renderer.tag_ln(item.tokens[0].ln).listitem(itemBody, task, checked);
           }
 -          out += this.renderer.list(body, ordered, start);
 +          // then reset to the <ul>'s correct line number and write it
 +          out += this.renderer.tag_ln(token.ln).list(body, ordered, start);
           continue;
         }
@@ -160,5 +173,6 @@ module.exports = class Parser {
         }
         case 'paragraph': {
 -          out += this.renderer.paragraph(this.parseInline(token.tokens));
 +          let t = this.parseInline(token.tokens);
 +          out += this.renderer.tag_ln(token.ln).paragraph(t);
           continue;
         }
@@ -199,4 +213,6 @@ module.exports = class Parser {
     for (i = 0; i < l; i++) {
       token = tokens[i];
 +      // another thing that only affects <br/> and other inlines
 +      this.ln = token.ln || this.ln;
       switch (token.type) {
         case 'escape': {
@@ -229,5 +245,7 @@ module.exports = class Parser {
         }
         case 'br': {
 -          out += renderer.br();
 +          // update the html attribute before writing each <br/>,
 +          // don't care about the others
 +          out += renderer.tag_ln(this.ln).br();
           break;
         }
 diff --git a/src/Renderer.js b/src/Renderer.js
 index a86732c..7ed907b 100644
 --- a/src/Renderer.js
 +++ b/src/Renderer.js
@@ -11,6 +11,12 @@ module.exports = class Renderer {
   constructor(options) {
     this.options = options || defaults;
 +    this.ln = "";
   }
 +  tag_ln(n) {
 +    this.ln = ' data-ln="' + n + '"';
 +    return this;
 +  };
 +  
   code(code, infostring, escaped) {
     const lang = (infostring || '').match(/\S*/)[0];
@@ -24,10 +30,10 @@ module.exports = class Renderer {
     if (!lang) {
 -      return '<pre><code>'
 +      return '<pre' + this.ln + '><code>'
         + (escaped ? code : escape(code, true))
         + '</code></pre>';
     }
 -    return '<pre><code class="'
 +    return '<pre' + this.ln + '><code class="'
       + this.options.langPrefix
       + escape(lang, true)
@@ -38,5 +44,5 @@ module.exports = class Renderer {
   blockquote(quote) {
 -    return '<blockquote>\n' + quote + '</blockquote>\n';
 +    return '<blockquote' + this.ln + '>\n' + quote + '</blockquote>\n';
   }
@@ -49,4 +55,5 @@ module.exports = class Renderer {
       return '<h'
         + level
 +        + this.ln
         + ' id="'
         + this.options.headerPrefix
@@ -59,5 +66,5 @@ module.exports = class Renderer {
     }
     // ignore IDs
 -    return '<h' + level + '>' + text + '</h' + level + '>\n';
 +    return '<h' + level + this.ln + '>' + text + '</h' + level + '>\n';
   }
@@ -73,5 +80,5 @@ module.exports = class Renderer {
   listitem(text) {
 -    return '<li>' + text + '</li>\n';
 +    return '<li' + this.ln + '>' + text + '</li>\n';
   }
@@ -85,5 +92,5 @@ module.exports = class Renderer {
   paragraph(text) {
 -    return '<p>' + text + '</p>\n';
 +    return '<p' + this.ln + '>' + text + '</p>\n';
   }
@@ -100,5 +107,5 @@ module.exports = class Renderer {
   tablerow(content) {
 -    return '<tr>\n' + content + '</tr>\n';
 +    return '<tr' + this.ln + '>\n' + content + '</tr>\n';
   }
@@ -125,5 +132,5 @@ module.exports = class Renderer {
   br() {
 -    return this.options.xhtml ? '<br/>' : '<br>';
 +    return this.options.xhtml ? '<br' + this.ln + '/>' : '<br' + this.ln + '>';
   }
@@ -151,5 +158,5 @@ module.exports = class Renderer {
     }
 -    let out = '<img src="' + href + '" alt="' + text + '"';
 +    let out = '<img' + this.ln + ' src="' + href + '" alt="' + text + '"';
     if (title) {
       out += ' title="' + title + '"';
--- a/srv/test.md
+++ b/srv/test.md
@ -99,6 +99,7 @@ a newline toplevel
 ## ep2
 #######################################################################
@ -142,7 +143,18 @@ https://github.com/markdown-it/markdown-it
 almost-all-pass:
 https://github.com/Ionaru/easy-markdown-editor
  https://easymde.tk/
  simplemde fork (the most active)
 https://github.com/Inscryb/inscryb-markdown-editor
  simplemde fork
 other simplemde forks:
  pulkitmittal
 https://simplemde.com/
  (dead)
 https://github.com/nhn/tui.editor
  https://nhn.github.io/tui.editor/latest/tutorial-example01-editor-basic
@ -153,3 +165,9 @@ https://github.com/nhn/tui.editor
 unrelated neat stuff:
  https://github.com/gnab/remark
 ```sh
 awk '/./ {printf "%s %d\n", $0, NR; next} 1' <test.md >ln.md
 gawk '{print gensub(/([a-zA-Z\.])/,NR" \\1","1")}' <test.md >ln.md
 ```