1
0
Fork 0
mirror of https://git.sr.ht/~rjarry/aerc synced 2025-12-18 08:51:15 +01:00
aerc/lib/inlineimages.go
Robin Jarry 8fee1477b7 viewer: add html-inline-images option
Add a new html-inline-images option in the [viewer] section that enables
inlining of images referenced by <img> tags with cid: URLs in HTML emails.

When enabled, aerc will parse HTML content to find <img src="cid:...">
references, fetch the corresponding image parts using their Content-ID
and use base64 encoding to embed images directly in HTML using data:
URLs.

This allows HTML emails with embedded images to be properly viewed in
w3m and other browsers that support data: URLs. The implementation uses
asynchronous callbacks to fetch all images in parallel without blocking.

The feature works with all aerc commands that fetch message parts (:save,
:open, :pipe, and viewing). Updated the filters/html script to enable
w3m image support with sixel graphics when img2sixel is available.

Add documentation for the new html-inline-images viewer option in both
the default aerc.conf and the aerc-config(5) man page.

Implements: https://todo.sr.ht/~rjarry/aerc/252
Changelog-added: New `[viewer].html-inline-images` option to replace
 `<img src="cid:...">` tags in `text/html` parts with their related
 `image/*` part data encoded in base64. For this to work with sixel
 compatible terminals, you need to update your filters with `text/html =
 ! html -sixel` and install `img2sixel`.
Signed-off-by: Robin Jarry <robin@jarry.cc>
Tested-by: Bence Ferdinandy <bence@ferdinandy.com>
Tested-by: Inwit <inwit@sindominio.net>
Tested-by: Matthew Phillips <matthew@matthewphillips.info>
2025-12-04 13:52:37 +01:00

196 lines
5 KiB
Go

package lib
import (
"bytes"
"encoding/base64"
"io"
"maps"
"strings"
"sync"
"git.sr.ht/~rjarry/aerc/lib/log"
"git.sr.ht/~rjarry/aerc/models"
"golang.org/x/net/html"
)
// InlineHTMLImages transforms an HTML email by replacing <img> tags with cid:
// URLs with base64-encoded data: URLs. This allows HTML emails with embedded
// images to be viewed in browsers that support data: URLs, including w3m when
// used with the -sixel option and img2sixel installed.
//
// This function uses callbacks and will call the provided callback
// asynchronously once all images have been fetched and inlined.
func InlineHTMLImages(
htmlReader io.Reader,
msg MessageView,
callback func(io.Reader),
) {
// Read the HTML content
htmlBytes, err := io.ReadAll(htmlReader)
if err != nil {
log.Errorf("Failed to read HTML: %v", err)
callback(bytes.NewReader(htmlBytes))
return
}
// Parse the HTML
doc, err := html.Parse(bytes.NewReader(htmlBytes))
if err != nil {
log.Errorf("Failed to parse HTML: %v", err)
callback(bytes.NewReader(htmlBytes))
return
}
// Build a map of Content-ID to part index
cidMap := buildContentIDMap(msg.BodyStructure(), []int{})
// If no Content-IDs found, return the original HTML
if len(cidMap) == 0 {
log.Tracef("No Content-IDs found in message, skipping image inlining")
callback(bytes.NewReader(htmlBytes))
return
}
// Find all cid: references in img tags
cidReferences := make(map[string]bool)
var findCids func(*html.Node)
findCids = func(n *html.Node) {
if n.Type == html.ElementNode && n.Data == "img" {
for _, attr := range n.Attr {
if attr.Key == "src" && strings.HasPrefix(attr.Val, "cid:") {
cid := strings.Trim(strings.TrimPrefix(attr.Val, "cid:"), "<>")
if _, ok := cidMap[cid]; ok {
cidReferences[cid] = true
}
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
findCids(c)
}
}
findCids(doc)
if len(cidReferences) == 0 {
log.Tracef("No cid: references found in HTML, returning original")
callback(bytes.NewReader(htmlBytes))
return
}
log.Tracef("Found %d cid: references to inline", len(cidReferences))
// Fetch all images asynchronously and encode as data: URLs
var wg sync.WaitGroup
var mu sync.Mutex
imageURLs := make(map[string]string)
for cid := range cidReferences {
info := cidMap[cid]
wg.Add(1)
msg.FetchBodyPart(info.index, func(reader io.Reader) {
defer wg.Done()
data, err := io.ReadAll(reader)
if err != nil {
log.Errorf("Failed to read image part for CID %s: %v", cid, err)
return
}
// Encode as base64
encoded := base64.StdEncoding.EncodeToString(data)
// Create data: URL
dataURL := "data:" + info.mimeType + ";base64," + encoded
mu.Lock()
imageURLs[cid] = dataURL
mu.Unlock()
log.Tracef("Encoded image with Content-ID %s as data: URL (%d bytes)", cid, len(data))
})
}
// Wait for all images to be fetched, then transform HTML
go func() {
defer log.PanicHandler()
wg.Wait()
// Replace all cid: references with data: URLs
modified := false
var processNode func(*html.Node)
processNode = func(n *html.Node) {
if n.Type == html.ElementNode && n.Data == "img" {
for i, attr := range n.Attr {
if attr.Key == "src" && strings.HasPrefix(attr.Val, "cid:") {
cid := strings.Trim(strings.TrimPrefix(attr.Val, "cid:"), "<>")
if dataURL, ok := imageURLs[cid]; ok {
n.Attr[i].Val = dataURL
modified = true
log.Tracef("Replaced cid:%s with data: URL", cid)
}
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
processNode(c)
}
}
processNode(doc)
if !modified {
log.Warnf("No images were inlined despite finding CID references")
callback(bytes.NewReader(htmlBytes))
return
}
// Render the modified HTML back to bytes
var buf bytes.Buffer
err = html.Render(&buf, doc)
if err != nil {
log.Errorf("Failed to render HTML: %v", err)
callback(bytes.NewReader(htmlBytes))
return
}
callback(&buf)
}()
}
// partInfo stores the index and MIME type of a part
type partInfo struct {
index []int
mimeType string
}
// buildContentIDMap recursively builds a map from Content-ID to part information
func buildContentIDMap(bs *models.BodyStructure, index []int) map[string]partInfo {
result := make(map[string]partInfo)
if bs == nil {
log.Tracef("buildContentIDMap: body structure is nil")
return result
}
// Log the current part being examined
log.Tracef("buildContentIDMap: examining part index=%v mime=%s contentid=%q numParts=%d",
index, bs.FullMIMEType(), bs.ContentID, len(bs.Parts))
// Add this part if it has a Content-ID
if bs.ContentID != "" {
result[strings.Trim(bs.ContentID, "<>")] = partInfo{
index: append([]int{}, index...),
mimeType: bs.FullMIMEType(),
}
log.Tracef("Found Content-ID: %s at index %v (%s)",
bs.ContentID, index, bs.FullMIMEType())
}
// Recursively process child parts
for i, part := range bs.Parts {
childIndex := append(append([]int{}, index...), i+1)
maps.Copy(result, buildContentIDMap(part, childIndex))
}
return result
}