feat: update data analysis agent prompt and output content

2025-03-31 17:34:33 +08:00
parent 722d5c787d
commit d3313a6b39
9 changed files with 176 additions and 120 deletions
@@ -1,8 +1,8 @@
 SYSTEM_PROMPT = (
-    "You are an AI agent designed to data analysis and data visualization task. You have various tools at your disposal that you can call upon to efficiently complete complex requests."
-    "The initial directory is: {directory}"
+    "You are an AI agent designed to data analysis / visualization task. You have various tools at your disposal that you can call upon to efficiently complete complex requests."
+    "The workspace directory is: {directory}"
 )

 NEXT_STEP_PROMPT = """
-Based on user needs, proactively select the most appropriate tool or combination of tools. For complex tasks, you can break down the problem and use different tools step by step to solve it. After using each tool, clearly explain the execution results and suggest the next steps.
+Based on user needs, break down the problem and use different tools step by step to solve it. Each step select the most appropriate tool proactively(ONLY ONE). After using each tool, clearly explain the execution results and suggest the next steps.
 """
@@ -1,12 +1,12 @@
-from app.tool.chart_visualization.normal_python_execute import NormalPythonExecute
+from app.tool.python_execute import PythonExecute


-class VisualizationPrepare(NormalPythonExecute):
+class VisualizationPrepare(PythonExecute):
    """A tool for Chart Generation Preparation"""

    name: str = "visualization_preparation"
    description: str = (
-        "Using Python code to Generates structured visualization datasets with metadata. Outputs: 1) Cleaned CSV data files 2) JSON info with csv path and visualization description."
+        "Using Python code to Generates metadata of data_visualization tool. Outputs: 1) Cleaned CSV data files 2) JSON info with csv path and visualization description."
    )
    parameters: dict = {
        "type": "object",
@@ -18,26 +18,14 @@ class VisualizationPrepare(NormalPythonExecute):
 2. Csv Data and chart description generate
 2.1 Csv data (The data you want to visulazation, cleaning / transform from origin data, saved in .csv)
 2.2 Chart description of csv data (The chart title or description should be concise and clear. Examples: 'Product sales distribution', 'Monthly revenue trend'.)
-3. Save information in json file.( format: {"csvFilePath": string, "chartTitle": string}[] encoding='utf-8')
-3. Json file saving with path print: print(json_path)
+3. Save information in json file.( format: {"csvFilePath": string, "chartTitle": string}[])
+4. Json file saving with path print: print(json_path)
 # Note
-You can generate one or multiple csv data with different visualization needs.
+1. You can generate one or multiple csv data with different visualization needs.
+2. Make each chart data esay, clean and different.
+3. save/read in utf-8
 """,
            },
        },
        "required": ["code"],
    }
-
-    async def execute(self, code: str, timeout=5):
-        """
-        Executes the provided Python code with a timeout.
-
-        Args:
-            code (str): The Python code to execute.
-            analysis_content (str): The analysis content of current task.
-            timeout (int): Execution timeout in seconds.
-
-        Returns:
-            Dict: Contains 'output' with execution output or error message and 'success' status.
-        """
-        return await super().execute(code, timeout)
@@ -12,10 +12,10 @@ from app.config import config


 class ChartVisualization(BaseTool):
-    name: str = "data_visualization_with_insight"
-    description: str = """Visualize statistical chart with JSON info from visualization_preparation tool. Outputs: 1) Charts (png/html) 2) Charts Insights (.md).
-Note: Each tool call generates only one single chart.
-"""
+    name: str = "data_visualization"
+    description: str = (
+        """Visualize statistical chart with JSON info from visualization_preparation tool. Outputs: 1) Charts (png/html) 2) Charts Insights (.md)(Optional)."""
+    )
    parameters: dict = {
        "type": "object",
        "properties": {
@@ -41,16 +41,29 @@ Note: Each tool call generates only one single chart.
            self.llm = LLM(config_name=self.name.lower())
        return self

+    def get_csv_path(self, json_info: list[dict[str, str]]) -> list[str]:
+        res = []
+        for item in json_info:
+            if os.path.exists(item["csvFilePath"]):
+                res.append(item["csvFilePath"])
+            elif os.path.exists(
+                os.path.join(f"{config.workspace_root}", item["csvFilePath"])
+            ):
+                res.append(
+                    os.path.join(f"{config.workspace_root}", item["csvFilePath"])
+                )
+            else:
+                raise Exception(f"No such file or directory: {item["csvFilePath"]}")
+        return res
+
    def success_output_template(self, result: list[dict[str, str]]) -> str:
        content = ""
+        if len(result) == 0:
+            return "Is EMPTY!"
        for item in result:
-            content += f"""## {item["title"]}
-Chart saved in: {item["savedPath"]}"""
-            if len(item["insightsText"]) > 0:
-                insight_content = ""
-                for index, text in enumerate(item["insightsText"]):
-                    insight_content += f"{index}. {text}\n"
-                content += f"""\n### Insights of Chart\n{insight_content}"""
+            content += f"""## {item["title"]}\nChart saved in: {item["chart_path"]}"""
+            if "insight_path" in item and item["insight_path"]:
+                content += f"""\nChart insights saved in {item["insight_path"]}\n"""
            else:
                content += "\n"
        return f"Chart Generated Successful! Detail is below:\n{content}"
@@ -61,25 +74,26 @@ Chart saved in: {item["savedPath"]}"""
            with open(json_path, "r", encoding="utf-8") as file:
                json_info = json.load(file)
            data_list = []
-            for item in json_info:
-                df = pd.read_csv(item["csvFilePath"])
+            csv_file_path = self.get_csv_path(json_info)
+            for index, item in enumerate(json_info):
+                df = pd.read_csv(csv_file_path[index], encoding="utf-8")
                df = df.astype(object)
                df = df.where(pd.notnull(df), None)
                data_dict_list = df.to_json(orient="records", force_ascii=False)

                data_list.append(
                    {
-                        "file_name": os.path.basename(item["csvFilePath"]).replace(
+                        "file_name": os.path.basename(csv_file_path[index]).replace(
                            ".csv", ""
                        ),
                        "dict_data": data_dict_list,
-                        "chart_description": item["chartTitle"],
+                        "chartTitle": item["chartTitle"],
                    }
                )
            tasks = [
                self.invoke_vmind(
                    item["dict_data"],
-                    item["chart_description"],
+                    item["chartTitle"],
                    item["file_name"],
                    output_type,
                )
@@ -90,25 +104,23 @@ Chart saved in: {item["savedPath"]}"""
            error_list = []
            success_list = []
            for index, result in enumerate(results):
-                csv_path = json_info[index]["csvFilePath"]
-                if "error" in result:
+                csv_path = csv_file_path[index]
+                if "error" in result and "chart_path" not in result:
                    error_list.append(f"Error in {csv_path}: {result["error"]}")
                else:
                    success_list.append(
                        {
                            **result,
-                            "title": json_info[index]["chart_description"],
+                            "title": json_info[index]["chartTitle"],
                        }
                    )
            if len(error_list) > 0:
                return {
-                    "observation": f"# Error chart generated{'\n'.join(error_list)}\nCharts saved successful are below: \n{self.success_output_template(success_list)}",
+                    "observation": f"# Error chart generated{'\n'.join(error_list)}\n{self.success_output_template(success_list)}",
                    "success": False,
                }
            else:
-                return {
-                    "observation": f"All charts saved successful!\n{self.success_output_template(success_list)}"
-                }
+                return {"observation": f"{self.success_output_template(success_list)}"}
        except Exception as e:
            return {
                "observation": f"Error: {e}",
@@ -135,7 +147,6 @@ Chart saved in: {item["savedPath"]}"""
            "file_name": file_name,
            "directory": str(config.workspace_root),
        }
-        print(vmind_params)
        # build async sub process
        process = await asyncio.create_subprocess_exec(
            "npx",
@@ -146,13 +157,14 @@ Chart saved in: {item["savedPath"]}"""
            stderr=asyncio.subprocess.PIPE,
            cwd=os.path.dirname(__file__),
        )
-
-        input_json = json.dumps(vmind_params).encode("utf-8")
+        input_json = json.dumps(vmind_params, ensure_ascii=False).encode("utf-8")
        try:
            stdout, stderr = await process.communicate(input_json)
+            stdout_str = stdout.decode("utf-8")
+            stderr_str = stderr.decode("utf-8")
            if process.returncode == 0:
-                return json.loads(stdout)
+                return json.loads(stdout_str)
            else:
-                return {"error": f"Node.js Error: {stderr}"}
+                return {"error": f"Node.js Error: {stderr_str}"}
        except Exception as e:
            return {"error": f"Subprocess Error: {str(e)}"}
@@ -1,10 +1,4 @@
-import sys
-from io import StringIO
-
 from app.tool.python_execute import PythonExecute
-from app.tool.chart_visualization.utils import (
-    extract_executable_code,
-)


 class NormalPythonExecute(PythonExecute):
@@ -12,35 +6,27 @@ class NormalPythonExecute(PythonExecute):

    name: str = "common_python_execute"
    description: str = (
-        """Executes Python code strings to do data analysis. Note:
-1. Only outputs from print() are visible; function return values are not captured. Use print() statements to display results
-2. Do data analysis (cleaning / transform) saved in *.csv
-3. Generate a data analysis report in *.md"""
+        """Executes Python code strings to tasks such as data process and data report"""
    )
    parameters: dict = {
        "type": "object",
        "properties": {
            "code": {
                "type": "string",
-                "description": "The Python code to execute.",
+                "description": """The Python code to execute. Note:
+1. Only outputs from print() are visible; function return values are not captured. Use print() statements to display results
+2. Do data process (cleaning / transform) saved in *.csv
+3. Generate a data analysis report in html""",
+            },
+            "code_type": {
+                "description": "code type",
+                "type": "string",
+                "default": "html",
+                "enum": ["process", "report", "others"],
            },
        },
        "required": ["code"],
    }

-    def _run_code(self, code: str, result_dict: dict, safe_globals: dict) -> None:
-        original_stdout = sys.stdout
-        be_extracted_code = extract_executable_code(code)  # ignore_security_alert RCE
-        try:
-            output_buffer = StringIO()
-            sys.stdout = output_buffer
-            exec(  # ignore_security_alert RCE
-                be_extracted_code, safe_globals, safe_globals
-            )  # ignore_security_alert RCE
-            result_dict["observation"] = output_buffer.getvalue()
-            result_dict["success"] = True
-        except Exception as e:
-            result_dict["observation"] = str(e)
-            result_dict["success"] = False
-        finally:
-            sys.stdout = original_stdout
+    async def execute(self, code: str, code_type: str, timeout=5):
+        return await super().execute(code, timeout)
@@ -11,9 +11,11 @@
      "dependencies": {
        "@visactor/vchart": "^1.13.7",
        "@visactor/vmind": "^2.0.4",
-        "canvas": "^2.11.2"
+        "canvas": "^2.11.2",
+        "get-stdin": "^9.0.0"
      },
      "devDependencies": {
+        "@types/get-stdin": "^7.0.0",
        "@types/node": "^22.10.1",
        "ts-node": "^10.9.2",
        "typescript": "^5.7.2"
@@ -6213,6 +6215,16 @@
        "url": "https://opencollective.com/turf"
      }
    },
+    "node_modules/@types/get-stdin": {
+      "version": "7.0.0",
+      "resolved": "https://bnpm.byted.org/@types/get-stdin/-/get-stdin-7.0.0.tgz",
+      "integrity": "sha512-kiDwIsKQvsLRvtBOnasij+6eChbCzcUT7OyVvrC5BEOE4QSKbpnwejEp0xND/9sIdOTfiu+BBl3zsB16MJ3Fww==",
+      "deprecated": "This is a stub types definition. get-stdin provides its own type definitions, so you do not need this installed.",
+      "dev": true,
+      "dependencies": {
+        "get-stdin": "*"
+      }
+    },
    "node_modules/@types/node": {
      "version": "22.13.10",
      "resolved": "https://registry.npmjs.org/@types/node/-/node-22.13.10.tgz",
@@ -7131,6 +7143,14 @@
        "geojson-flatten": "geojson-flatten"
      }
    },
+    "node_modules/geojson-flatten/node_modules/get-stdin": {
+      "version": "6.0.0",
+      "resolved": "https://bnpm.byted.org/get-stdin/-/get-stdin-6.0.0.tgz",
+      "integrity": "sha512-jp4tHawyV7+fkkSKyvjuLZswblUtz+SQKzSWnBbii16BuZksJlU1wuBYXY75r+duh/llF1ur6oNwi+2ZzjKZ7g==",
+      "engines": {
+        "node": ">=4"
+      }
+    },
    "node_modules/geojson-linestring-dissolve": {
      "version": "0.0.1",
      "resolved": "https://bnpm.byted.org/geojson-linestring-dissolve/-/geojson-linestring-dissolve-0.0.1.tgz",
@@ -7180,11 +7200,14 @@
      }
    },
    "node_modules/get-stdin": {
-      "version": "6.0.0",
-      "resolved": "https://bnpm.byted.org/get-stdin/-/get-stdin-6.0.0.tgz",
-      "integrity": "sha512-jp4tHawyV7+fkkSKyvjuLZswblUtz+SQKzSWnBbii16BuZksJlU1wuBYXY75r+duh/llF1ur6oNwi+2ZzjKZ7g==",
+      "version": "9.0.0",
+      "resolved": "https://bnpm.byted.org/get-stdin/-/get-stdin-9.0.0.tgz",
+      "integrity": "sha512-dVKBjfWisLAicarI2Sf+JuBE/DghV4UzNAVe9yhEJuzeREd3JhOTE9cUaJTeSa77fsbQUK3pcOpJfM59+VKZaA==",
      "engines": {
-        "node": ">=4"
+        "node": ">=12"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
      }
    },
    "node_modules/gifuct-js": {
@@ -3,6 +3,7 @@
  "version": "1.0.0",
  "main": "src/index.ts",
  "devDependencies": {
+    "@types/get-stdin": "^7.0.0",
    "@types/node": "^22.10.1",
    "ts-node": "^10.9.2",
    "typescript": "^5.7.2"
@@ -10,7 +11,8 @@
  "dependencies": {
    "@visactor/vchart": "^1.13.7",
    "@visactor/vmind": "^2.0.4",
-    "canvas": "^2.11.2"
+    "canvas": "^2.11.2",
+    "get-stdin": "^9.0.0"
  },
  "scripts": {
    "test": "echo \"Error: no test specified\" && exit 1"
@@ -1,12 +1,11 @@
 import Canvas from "canvas";
 import path from "path";
 import fs from "fs";
-import { readFileSync } from "fs";
 import VMind, { ChartType } from "@visactor/vmind";
 import VChart from "@visactor/vchart";
 import { isString } from "@visactor/vutils";

-declare enum AlgorithmType {
+enum AlgorithmType {
  OverallTrending = "overallTrend",
  AbnormalTrend = "abnormalTrend",
  PearsonCorrelation = "pearsonCorrelation",
@@ -54,7 +53,7 @@ const serializeSpec = (spec: any) => {
  });
 };

-async function getHtmlVChart(spec: any, width: number, height: number) {
+async function getHtmlVChart(spec: any, width?: number, height?: number) {
  return `<!DOCTYPE html>
 <html>
 <head>
@@ -98,7 +97,7 @@ async function getHtmlVChart(spec: any, width: number, height: number) {
 function getSavedPathName(
  directory: string,
  fileName: string,
-  outputType: "html" | "png" | "json"
+  outputType: "html" | "png" | "json" | "md"
 ) {
  let newFileName = fileName;
  while (
@@ -111,8 +110,39 @@ function getSavedPathName(
  return path.join(directory, "visualization", `${newFileName}.${outputType}`);
 }

+const readStdin = (): Promise<string> => {
+  return new Promise((resolve) => {
+    let input = "";
+    process.stdin.setEncoding("utf-8"); // 确保编码与 Python 端一致
+    process.stdin.on("data", (chunk) => (input += chunk));
+    process.stdin.on("end", () => resolve(input));
+  });
+};
+
+const setInsightTemplate = (
+  path: string,
+  title: string,
+  insights: string[]
+) => {
+  let res = "";
+  if (insights.length) {
+    res += `## ${title} Insights`;
+    insights.forEach((insight, index) => {
+      res += `\n${index + 1}. ${insight}`;
+    });
+  }
+  if (res) {
+    fs.writeFileSync(path, res, "utf-8");
+    return path;
+  }
+  return "";
+};
+
 async function generateChart() {
-  const inputData = JSON.parse(readFileSync(process.stdin.fd, "utf-8"));
+  const input = await readStdin();
+  const inputData = JSON.parse(input);
+  const res: { chart_path?: string; error?: string; insight_path?: string } =
+    {};
  try {
    const {
      llm_config,
@@ -133,6 +163,7 @@ async function generateChart() {
        Authorization: `Bearer ${apiKey}`,
      },
    });
+    // Get chart spec and save in local file
    const jsonDataset = isString(dataset) ? JSON.parse(dataset) : dataset;
    const { spec, error, chartType } = await vmind.generateChart(
      userPrompt,
@@ -143,9 +174,34 @@ async function generateChart() {
        theme: "light",
      }
    );
+    if (error || !spec) {
+      console.log(
+        JSON.stringify({
+          error: error || "Spec of Chart was Empty!",
+        })
+      );
+      return;
+    }
+
    spec.title = {
      text: userPrompt,
    };
+    if (!fs.existsSync(path.join(directory, "visualization"))) {
+      fs.mkdirSync(path.join(directory, "visualization"));
+    }
+    const specPath = getSavedPathName(directory, fileName, "json");
+    fs.writeFileSync(specPath, JSON.stringify(spec, null, 2));
+    const savedPath = getSavedPathName(directory, fileName, outputType);
+    if (outputType === "png") {
+      const base64 = await getBase64(spec, width, height);
+      fs.writeFileSync(savedPath, base64);
+    } else {
+      const html = await getHtmlVChart(spec, width, height);
+      fs.writeFileSync(savedPath, html, "utf-8");
+    }
+    res.chart_path = savedPath;
+
+    // get chart insights and save in local
    const insights = [];
    if (
      chartType &&
@@ -177,36 +233,21 @@ async function generateChart() {
      });
      insights.push(...vmindInsights);
    }
-    const insightsText = insights.map(
-      (insight) => insight.textContent?.plainText
-    );
-    if (error || !spec) {
-      console.log(
-        JSON.stringify({
-          error: error || "Spec of Chart was Empty!",
-        })
-      );
-      return;
-    }
+    const insightsText = insights
+      .map((insight) => insight.textContent?.plainText)
+      .filter((insight) => !!insight) as string[];
    spec.insights = insights;
-    if (!fs.existsSync(path.join(directory, "visualization"))) {
-      fs.mkdirSync(path.join(directory, "visualization"));
-    }
-    fs.writeFileSync(
-      getSavedPathName(directory, fileName, "json"),
-      JSON.stringify(spec, null, 2)
+    fs.writeFileSync(specPath, JSON.stringify(spec, null, 2));
+    const insightRes = setInsightTemplate(
+      getSavedPathName(directory, fileName, "md"),
+      userPrompt,
+      insightsText
    );
-    const savedPath = getSavedPathName(directory, fileName, outputType);
-    if (outputType === "png") {
-      const base64 = await getBase64(spec, width, height);
-      fs.writeFileSync(savedPath, base64);
-    } else {
-      const html = await getHtmlVChart(spec, width, height);
-      fs.writeFileSync(savedPath, html, "utf-8");
-    }
-    console.log(JSON.stringify({ savedPath, insightsText }));
-  } catch (error) {
-    console.log(JSON.stringify({ error }));
+    res.insight_path = insightRes;
+  } catch (error: any) {
+    res.error = error.toString();
+  } finally {
+    console.log(JSON.stringify(res));
  }
 }

@@ -10,12 +10,14 @@ from app.logger import logger

 async def run_flow():
    agents = {
-        "manus": Manus(),
+        # "manus": Manus(),
        "visactor": DataAnalysis(),
    }

    try:
-        prompt = """Here's last month's sales data from my Amazon store in './data/amazon_sales_jan2025.csv'. Could you analyze it thoroughly with visualizations and recommend specific, data-driven strategies to boost next month's sales by 10%?"""
+        prompt = """Here's last month's sales data from my Amazon store. Could you analyze it thoroughly with visualizations and recommend specific, data-driven strategies to boost next month's sales by 10%?
+File Path: workspace/amazon_sales_jan2025.csv
+"""

        flow = FlowFactory.create_flow(
            flow_type=FlowType.PLANNING,
@@ -10,7 +10,9 @@ class PythonExecute(BaseTool):
    """A tool for executing Python code with timeout and safety restrictions."""

    name: str = "python_execute"
-    description: str = "Executes Python code string. Note: Only print outputs are visible, function return values are not captured. Use print statements to see results."
+    description: str = (
+        "Executes Python code string. Note: Only print outputs are visible, function return values are not captured. Use print statements to see results."
+    )
    parameters: dict = {
        "type": "object",
        "properties": {