Sunday, January 08, 2012

[link] LOCALE settings and regexp classes

Spent some time editing a German text (actually an FB2 book) in VIM. Hell of a job, because as it turned out, I wasn't blind: VIM really doesn't support locale in the regular expressions.

Workaround is to use \k (also suggested \i doesn't match ß). But that's only half workaround, since \k is case insensitive and case is used in German (e.g. nouns are capitalized).

Another workaround is to use Perl or Python integration for regular expressions, since both provide locale support in the regular expressions. But I haven't gone that far yet.

Edit1. Do NOT use Perl for the purpose. Locale/utf8 support is totally and utterly messed up.

P.S. Here are some of my FB2 editing helper functions.



" some Fiction Book functions

"
" merge adjacent paragraphs
"
function! ParaMany_ToSingle() range
        let lines = getline(a:firstlinea:lastline)
        call filter( lines, 'v:val !~ "^$"' )
        let xind = substitute( lines[0], '^\(\s*\).*''\1''' )
        for i in range( 0len(lines)-1 )
                if i != 0
                        let lines[i] = substitute( lines[i], '^\s*<p>[ \t]*''''' )
"                       echo i." < ".lines[i] | sleep 2
                endif
                if i != len(lines)-1
                        let lines[i] = substitute( lines[i], '[ \t]*</p>\s*$''''' )
"                       echo i." > ".lines[i] | sleep 2
                endif
        endfor
"       echo "xxx:".join( lines, " " ) | sleep 2
        if a:lastline > a:firstline
                exec ':'.(a:firstline+1).",".a:lastline."d"
        endif
        let text = join( lines, " " )
        let text = substitute( text, '[ \t]\{2,}'' ''g' )
        let text = substitute( text, '^\s\+''''' )
        call setline( a:firstline, xind.text )
endfunction

" gvim helper keyboard shortcuts (c-up/-down do not work in terminal)
function! Keyboard_ParaCUpDown()
        map <C-Up> :-1,.call ParaMany_ToSingle()<CR>
        map <C-Down> :.,+1call ParaMany_ToSingle()<CR><Up>
endfunction

"
"  insert section break, using the line's text as the section title
"
function! Para_ToSectionBreak()
        let t = getline(".")

        " capture the line indentation
        let xind = substitute( t, '^\(\s*\).*''\1''' )
        " etch a bit from section tag indentation
        let xindS = substitute( xind, '^\(\s*\)\s$''\1''' )

        " clean-up tags
        let t = substitute( t, '\v\</{0,1}[a-z]+[^>]*\>''''g' )
        let t = substitute( t, '[ \t]\+$''''' )
        let t = substitute( t, '^[ \t]\+''''' )

        " generate id, ensure starts with letter or _
        let id = substitute( t, '[^a-zA-Z0-9_-]''_''g' )
        let id = substitute( id, '^\([^a-zA-Z_]\)''_\1''g' )

        let l = [xindS.'</section>',
\               '',
\               xindS.'<section>',
\               xind.'<title>',
\               xind.'<p id="'.id.'">'.t.'</p>',
\               xind.'</title>' ]
        call setline( ".", l[0)
        call  append( ".", l[1:] )

        call cursor( line(".")+len(l)0 )
endfunction

"
"  convert line's text to subtitle
"
function! Para_ToSubtitle()
        let t = getline(".")
        let xi = substitute( t, '^\(\s*\).*''\1''' )
        let t = substitute( t, '\v\</{0,1}[a-z]+[^>]*\>''''g' )
        let t = substitute( t, '[ \t]\+$''''' )
        let t = substitute( t, '^[ \t]\+''''' )
        call setline( ".", xi.'<subtitle>'.t.'</subtitle>' )
endfunction

No comments: