从Google工作表中提取内联图像并重新插入为链接



我有一个接近100个内联图像的谷歌表。这张纸载入很慢。我读到,如果图像作为链接从Google Drive文件加载,而不是作为嵌入图像在电子表格中,那么工作表的性能会更好。

所以我想写一个谷歌应用程序脚本…

  1. 查看每个工作表上的每个单元格;
  2. 决定单元格是否包含图像;
  3. 保存图片到Google drive;
  4. 获取图像文件的链接;
  5. 删除inline-image;
  6. 重新插入图片作为链接

到目前为止我有这个…

function extract_images() {
var spreadsheet_id = "1NGtkBj7HOqf-rvV7zdRb7lrj3MRFoJH9Ulnx9YIO7Hs" // MARKS COPY OF "RAW Exam Question Reference AQA GCSE PE 9-1"
var images_folder = DriveApp.getFolderById("1nEF_E7rZmTpTif3uDS8B0Jnio1x6IaSm");
var ss = SpreadsheetApp.openById(spreadsheet_id);
var sheets = ss.getSheets();
sheets.forEach (sh => {
Logger.log("Looking through '"+sh.getName()+"'");
var w = sh.getLastColumn();
var h = sh.getLastRow();
for (i=1; i<h; i++){
for (j=1; j<w; j++){
var value = sh.getRange(i,j).getValue();
var range = sh.getRange(i,j);
if (value.valueOf().toString().startsWith("com")){
Logger.log(`Image at (${i},${j})`);
}
}
}
});
}

…它通过单元格值查找工作表中的图像,对于图像,似乎具有格式com.google.apps.maestro.server.beans.trix.impl.CellImageApiAdapter@ID,其中ID是8个字符的十六进制代码。

我试着使用Kanshi TANAIKE写的这个惊人的库…

https://github.com/tanaikech/DocsServiceApp

…但我找不到用它从一个细胞中提取一张图像的方法。我只能看到一次提取所有图像的方法,当我尝试这样做&;exportsizelimitexceeded &;时,我得到了一个错误。

我觉得我很接近,这是令人沮丧的。请帮助!

感谢@Tanaike建议的帖子,我想出的解决方案如下。它很脏,写得不好,但它有效。

工作流…

  1. 获取电子表格ID和文件夹ID;
  2. 打开电子表格副本;
  3. 导出为XLSX;
  4. 将文件类型改为zip;
  5. 解压成blob;
  6. 创建对象来保存xlsx工作表id/工作表名称,图像位置和文件名,以及新创建图像的驱动器图像id;
  7. 遍历blob中的每个blob;
  8. 查找xl/workbook.xml文件并检索工作表id和名称;
  9. 查找每个xl/图纸/图纸#.xml文件并检索表,位置和图像文件名;
  10. 查找所有图像,保存到驱动器和检索驱动器id映射到文件名;
  11. 浏览Google工作表中的每个工作表;
  12. 遍历每个cell;
  13. 当你发现一个以'com'开头的单元格不是公式,从表id,行和列生成图像id,找到相应的图像名称,然后对应的谷歌驱动器图像id。
  14. 清除单元格内容并重新插入为=IMAGE()公式;
  15. 在单元格中添加带有图像名称的注释。
var spreadsheet_id = "SHEET_ID"; // Spreadsheet ID
var images_folder = DriveApp.getFolderById("FOLDER_ID"); // ID of folder to store images in
function extract_images() {

// Let's get started
var ss = SpreadsheetApp.openById(spreadsheet_id); // Open a copy of the spreadsheet
const url = "https://docs.google.com/spreadsheets/export?exportFormat=xlsx&id=" + spreadsheet_id; // Export as XLSX
const blob = UrlFetchApp.fetch(url, {headers: {authorization: `Bearer ${ScriptApp.getOAuthToken()}`}}).getBlob().setContentType(MimeType.ZIP); // Change filetype to ZIP
const xlsx = Utilities.unzip(blob); // Unzip folder into collection of binary files
const xlsx_workbook_filename = "xl/workbook.xml"; // This is the filename of the worksheet list!
var worksheets = new Object(); // Create object to maps sheet names to XLSX sheet ids
const xlsx_drawing_filename_stub = "xl/drawings/drawing"; // add '#.xml' where # is sheetid.
var images = new Object(); // Create object to maps cell references of images to image files
var image_ids = new Object(); // Create object to map ID of newly created image to image name
// Let's go through every item in the xlsx blob! There may be a better way!
xlsx.forEach(b =>{
// Check to see if the blob name is the same as the worksheets.xml file
if (b.getName() == xlsx_workbook_filename){
var xlsx_workbook_file = b.getAs("text/xml"); // convert to text/xml
Logger.log(`Found '${xlsx_workbook_filename}'`);
var document = XmlService.parse(xlsx_workbook_file.getDataAsString("UTF-8")); // parse the xml file so I can access the nodes
var namespace = XmlService.getNamespace("http://schemas.openxmlformats.org/spreadsheetml/2006/main"); // This is the namespace
var root = document.getRootElement(); // Get the root element so we know where to start
var sheets = root.getChild("sheets",namespace).getChildren("sheet",namespace); // Get a collection of sheet nodes
// Iterate through the sheet nodes
sheets.forEach(s => {
worksheets[s.getAttribute("name").getValue()] = s.getAttribute("sheetId").getValue(); // Create objects to map sheet name to xlsx sheet id
});
}
// Check to see if the blob name starts with the drawing file stub
if (b.getName().startsWith(xlsx_drawing_filename_stub)){
var this_sheet_id = b.getName().substr(19,1);
var xlsx_drawing_file = b.getAs("text/xml"); // Convert to text/xml
Logger.log(`Found drawing file '${b.getName()}'`);
var document = XmlService.parse(xlsx_drawing_file.getDataAsString("UTF-8")); // Parse the xml file!
var namespace = XmlService.getNamespace("http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing"); // This is the namespace
var root = document.getRootElement(); // Get root element so we know where to start
var children = root.getChildren(); // Get all the children - each one is an embedded image
Logger.log(`${children.length} images in worksheet ${this_sheet_id}`); // Nice!
// Iterate through each embedded image
children.forEach(t =>{
var col = t.getChild("from",namespace).getChild("col",namespace).getText(); // Get the column reference
var row = t.getChild("from",namespace).getChild("row",namespace).getText(); // Get the row reference
var image_filename = t.getChild("pic",namespace).getChild("nvPicPr",namespace).getChild("cNvPr",namespace).getAttribute("name").getValue(); // Dig down!
Logger.log(`Image in sheet ${this_sheet_id} at ${col},${row} called '${image_filename}'`);
images[`${this_sheet_id},${col},${row}`] = image_filename; // Create object to hold location and filename!
});
}
// Get the actual images!
if (b.getContentType() == 'image/png'){
var the_name = b.getName().substr(9); // The image name will start with 'xl/media/' otherwise!
Logger.log(`Saving image '${the_name}'...`);
var id = images_folder.createFile(b).setName(the_name).getId(); // Create the image file in the image folder
Logger.log(`This is image id '${id}'`);
image_ids[the_name] = id; // Create object to map image name to google drive image id
}
});;
// Now, lets replace the images!
var sheets = ss.getSheets();
// Go through each worksheet in the Google sheet
sheets.forEach (sh => {
var sheet_name = sh.getName(); // Get the sheet name
Logger.log(`Looking through '${sheet_name}'`);
var sheet_id = worksheets[sheet_name]; // This is the corresponding xlsx sheet id
Logger.log(`This is worksheet id '${sheet_id}'. Looking for images...`);
var w = sh.getLastColumn()+1; // Last occupied column
var h = sh.getLastRow()+1; // Last occupied row
// Iterate through all cells!
for (i=1; i<h; i++){
for (j=1; j<w; j++){
var range = sh.getRange(i,j); // Get the range object
var value = range.getValue(); // Get the value
Logger.log(`The cell contains '${value.valueOf().toString()}'`);
// Only replace the image if there is an image in the cell AND it's NOT a formula!
if (value.valueOf().toString().startsWith("com") && !range.getFormula()){
var img_ref = `${sheet_id},${j-1},${i-1}`; // Generate the image reference
var img_name = images[img_ref]; // Get the image name
var img_id = image_ids[img_name]; // Get the Google drive image id
Logger.log(`Image at ${img_ref}`);
Logger.log(`This is called ${img_name}`);
Logger.log(`It's ID is ${img_id}`);
range.setValue(`=IMAGE("https://docs.google.com/uc?export=view&id=${img_id}",1)`); // Use 'IMAGE()' to embed the image
range.setNote(`${img_name}`); // Add a note with the image name, cause I can.
}
}
}
});
}

最新更新